BinaryStringTree

This project has taught me how trees can be used to efficiently store and manipulate data which plays a big role in solving real world problems that involve text analysis. the text is read and each word is stored in the tree in it’s lexicographical order which allows for binary search with logarithmic time. A tweak in the program was made prior to running to achieve a balanced tree and this really important because the height of the tree is calculated to be 24 which is significantly higher than a perfectly balanced tree which equals to around 11.

Here is the source code for the program:

package edu.ics211.h11;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Scanner;
import java.util.Set;

/**
 * @author Phuong Pham
 *
 */

public class BinaryStringTree {

	private static class BinaryNode {
		private String key;
		private long value;
		private BinaryNode left;
		private BinaryNode right;


		/** 
		 * constructor to build a node with no subtrees
		 * @param the value to be stored by this node
		 */
		private BinaryNode(String key, long value) {
			this.key = key;
			this.value = value;
			left = null;
			right = null;
		}


		/** 
		 * constructor to build a node with a specified (perhaps null) subtrees
		 * @param the value to be stored by this node
		 * @param the left subtree for this node
		 * @param the right subtree for this node
		 */
		private BinaryNode(String key, long value, BinaryNode l, BinaryNode r) {
			this.key = key;
			this.value = value;
			left = l;
			right = r;
		}
	}


	// Class variable
	private BinaryNode root;


	// Empty constructor
	public BinaryStringTree() { }


	// Constructor that reads file
	public BinaryStringTree(String fileName) throws FileNotFoundException {

		File file = new File(fileName); // Create a File object with the filename
		Scanner reader = new Scanner(file);  // Scanner to read our file

		while(reader.hasNext()) {
			StringBuilder sb = new StringBuilder();
			String word = reader.next();  // Get the next word from the scanner
			// Check each character in the word, only append alphabetic characters

			for ( int i = 0; i < word.length(); i++) {
				char c = word.charAt(i);  // Pull each character individually
				// If the character is alphabetic, append to the StringBuilder
				if(Character.isJavaIdentifierStart(c)) {
					sb.append(c);
				}
			}

			// If the word we built is not empty, add it to the tree
			if( sb.length() > 0) {
				add(sb.toString());
			}

		}
	}


	/**
	 * 
	 * @param key
	 * @return the number of occurrences of the given String, or
	 *         0 if the String is not found
	 */
	public long occurrences(String key) {
		//TODO: call a private recursive helper method that you design
		return occurrences(key, root);
	}


	// occurences() helper method, using recursion
	private long occurrences(String key, BinaryNode node) {
		// Base case - reach end of tree, key is not found
		if(node == null) {
			return 0;
		}

		// Base case - Found our target key
		if(node.key.equals(key)) {
			return node.value;
		}

		// Compare the search key to the current node's key to determine traversal
		int comparison = key.compareTo(node.key);
		// Search the right subtree
		if(comparison > 0) {
			return occurrences (key, node.right);
		}
		// Search the left subtree
		else {
			return occurrences(key, node.left);
		}
	}


	public java.util.Set keys() {
		java.util.Set<String> result = new java.util.HashSet<>();

		return keys(result, root);
	}


	// keys() helper method, using recursion 
	private java.util.Set keys(java.util.Set set, BinaryNode node) {

		if (node == null) {
			return set;
		}
		else {
			set.add(node.key);
			keys(set, node.left);
			keys(set, node.right);
		}
		return set;
	}


	public void add(String key) {
		root = add(key, root);
	}

	// add() helper method, using recursion
	protected BinaryNode add(String key, BinaryNode node) {
		// If key doesn't exist, add new node with the key, w/ initial count of 1
		if (node == null) {
			return new BinaryNode(key, 1);
		}

		// If the key already exist in the tree, increase it's count by 1
		if(key.compareTo(node.key) == 0) {
			node.value++;
		}
		// If we haven't found a matching key, continue our traversal
		else { 
			if (key.compareTo(node.key) < 0) { // add to left subtree
				node.left = add(key, node.left);
			}
			else {  // add to right subtree
				node.right = add(key, node.right);
			}
		}
		return node;
	}


	/**
	 * Removes one occurrence of the given key from the tree, usually
	 * by decrementing the value associated with the key.
	 * However, if the key has a value of 1, deletes the node
	 * If the key is not in the tree, does nothing.
	 * @param key
	 */
	public void removeOne(String key) {
		//TODO: search the tree for the target key.
		// If found, decrease the count by 1.
		// If the resulting count (after decreasing it) is 0, remove that node

		removeOne(key, root);


	}

	protected BinaryNode removeOne(String key, BinaryNode node) {
		if (node == null) {	// key not in tree
			return null;
		}
		if (key.compareTo(node.key) == 0) { // remove this node or decrement
			if (node.value == 1) { // If the vlaue is one then delete node
				if (node.left == null) { // replace this node with right child
					return node.right;
				} else if (node.right == null) { // replace with left child
					return node.left;
				} else {
					// replace the value in this  node with value in the
					// rightmost node of the left subtree
					BinaryNode replacement = getRightmost(node.left);

					// now remove the rightmost node in the left subtree,
					// by calling ourselves recursively
					BinaryNode newNode = new BinaryNode(replacement.key, replacement.value, remove(replacement.key, node.left), node.right);
					return newNode;
				}
			}
			else { // if the value is not 1 then just decrement value
				node.value--;
			}

		} else {		// remove from left or right subtree
			if (key.compareTo(node.key) < 0) {
				// remove from left subtree
				node.left = removeOne(key, node.left);
			} else {		// remove from right subtree
				node.right = removeOne(key, node.right);
			}
		}
		return node;
	}


	/**
	 * Removes the given String from the tree by deleting the node
	 * @param key
	 */
	public void remove(String key) {

		root = remove(key, root);
	}


	// remove() recursive helper method
	protected BinaryNode remove(String key, BinaryNode node) {
		if (node == null) {	// key not in tree
			return null;
		}
		if (key.compareTo(node.key) == 0) { // remove this node
			if (node.left == null) { // replace this node with right child
				return node.right;
			} else if (node.right == null) { // replace with left child
				return node.left;
			} else {
				// replace the value in this  node with value in the
				// rightmost node of the left subtree
				BinaryNode replacement = getRightmost(node.left);

				// now remove the rightmost node in the left subtree,
				// by calling ourselves recursively
				BinaryNode newNode = new BinaryNode(replacement.key, replacement.value, remove(replacement.key, node.left), node.right);
				return newNode;
			}
		} else {		// remove from left or right subtree
			if (key.compareTo(node.key) < 0) {
				// remove from left subtree
				node.left = remove(key, node.left);
			} else {		// remove from right subtree
				node.right = remove(key, node.right);
			}
		}
		return node;
	}


	// return the right-most node in the subtree
	protected BinaryNode getRightmost(BinaryNode node) {
		assert(node != null);
		BinaryNode right = node.right;
		if (right == null) {
			return node;
		} else {
			return getRightmost(right);
		}
	}

	
	/**
	 * 
	 * @return number of nodes in the tree == the number of unique Strings
	 */
	public int size() {

		return size(root);
	}

	// size() helper method
	private int size(BinaryNode node) {
		// Base case
		if(node == null) return 0;
		return 1 + size(node.left) + size(node.right);
	}
	

	/**
	 * 
	 * @return the height of the tree.  An empty tree has height 0,
	 *         a tree with one node has height 1
	 */
	public int height() {
		
		return height(root);
	}
	
	// height() helper method
	private int height(BinaryNode node) {

		if (node == null) {
			return 0;
		}
		else {
			int rightSubtreeDepth = height(node.right);
			int leftSubtreeDepth = height(node.left);

			if (leftSubtreeDepth > rightSubtreeDepth) {
				return (leftSubtreeDepth + 1);
			}
			else {
				return(rightSubtreeDepth + 1);
			}
		}

	}
	

	private class BSTComparator implements Comparator<String>{
		@ Override
		public int compare(String o1, String o2) {
			return (int) (occurrences(o2) - occurrences(o1));
		}
	}

	
	public void printTop10() {

		Set<String> set = keys();

		String[] keys = new String[set.size()];
		int index = 0;

		for(String key : set) {
			keys[index++] = key;
		}
		
		Arrays.sort(keys, new BSTComparator());

		for(int i = 0; i < 10; i++) {
			System.out.println(keys[i] + ": " + occurrences(keys[i]));
		}
	}

	
	public static void main(String[] args) throws FileNotFoundException{

		BinaryStringTree constitution = new BinaryStringTree("constitution.txt");
		
		constitution.printTop10();
		System.out.println(constitution.height());
		System.out.println(constitution.size());
	}
	
	// ~Analysis~
	/*
	 * the height of my tree is 24 and comparing to the minimum possible height of a 
	 * perfectly balanced tree which is 11, I would say my tree is not balanced nor
	 * is it too unbalanced. if the tree is really unbalanced then it would have the 
	 * maximum height of 1265. I can guarantee that all the values in the tree are at 
	 * least 1 because my implementation for the add, remove and removeOne method are 
	 * correct. for the add method every single time a node is added, if it's a node 
	 * that's not yet in the tree then a new node is created with a value of 1 and if 
	 * the node is found in the tree then the value is incremented. this guarantees 
	 * all new node created will have value of at least one. for my remove method, 
	 * if the node we want to remove is not in the tree then nothing happens, if it
	 * is in the tree then that node will be removed, if the node has two children then
	 * the right-most node of the left subtree will take place of the node that's being 
	 * removed. Same for removeOne method, but if the node we want to remove is in the 
	 * tree then we will just decrement the value, if the value of node we want to remove 
	 * is 1 then we will delete that node entirely making sure no node in tree have value 
	 * less than 1.
	 */

}