javastringhashtablebinary-search

Java Dictionary Searcher


I am trying to implement a program that will take a users input, split that string into tokens, and then search a dictionary for the words in that string. My goal for the parsed string is to have every single token be an English word.

For Example:

Input:
       aman

Split Method:
      a man
      a m an
      a m a n
      am an
      am a n
      ama n

Desired Output:
      a man

I currently have this code which does everything up until the desired output part:

    import java.util.Scanner;
import java.io.*;

public class Words {

    public static String[] dic = new String[80368];

    public static void split(String head, String in) {

        // head + " " + in is a segmentation 
        String segment = head + " " + in;

        // count number of dictionary words
        int count = 0;
        Scanner phraseScan = new Scanner(segment);
        while (phraseScan.hasNext()) {
            String word = phraseScan.next();
            for (int i=0; i<dic.length; i++) {
                if (word.equalsIgnoreCase(dic[i])) count++;
            }
        }

        System.out.println(segment + "\t" + count + " English words");

        // recursive calls
        for (int i=1; i<in.length(); i++) {
            split(head+" "+in.substring(0,i), in.substring(i,in.length()));
        }   
    }

    public static void main (String[] args) throws IOException {
        Scanner scan = new Scanner(System.in);
        System.out.print("Enter a string: ");
        String input = scan.next();
        System.out.println();

        Scanner filescan = new Scanner(new File("src:\\dictionary.txt"));
        int wc = 0;
        while (filescan.hasNext()) {
            dic[wc] = filescan.nextLine();
            wc++;
        }

        System.out.println(wc + " words stored");

        split("", input);

    }
}

I know there are better ways to store the dictionary (such as a binary search tree or a hash table), but I don't know how to implement those anyway.

I am stuck on how to implement a method that would check the split string to see if every segment was a word in the dictionary.

Any help would be great, Thank you


Solution

  • Splitting the input string every possible way is not going to finish in a reasonable amount of time if you want to support 20 or more characters. Here's a more efficient approach, comments inline:

    public static void main(String[] args) throws IOException {
        // load the dictionary into a set for fast lookups
        Set<String> dictionary = new HashSet<String>();
        Scanner filescan = new Scanner(new File("dictionary.txt"));
        while (filescan.hasNext()) {
            dictionary.add(filescan.nextLine().toLowerCase());
        }
    
        // scan for input
        Scanner scan = new Scanner(System.in);
        System.out.print("Enter a string: ");
        String input = scan.next().toLowerCase();
        System.out.println();
    
        // place to store list of results, each result is a list of strings
        List<List<String>> results = new ArrayList<>();
    
        long time = System.currentTimeMillis();
    
        // start the search, pass empty stack to represent words found so far
        search(input, dictionary, new Stack<String>(), results);
    
        time = System.currentTimeMillis() - time;
    
        // list the results found
        for (List<String> result : results) {
            for (String word : result) {
                System.out.print(word + " ");
            }
            System.out.println("(" + result.size() + " words)");
        }
        System.out.println();
        System.out.println("Took " + time + "ms");
    }
    
    public static void search(String input, Set<String> dictionary,
            Stack<String> words, List<List<String>> results) {
    
        for (int i = 0; i < input.length(); i++) {
            // take the first i characters of the input and see if it is a word
            String substring = input.substring(0, i + 1);
    
            if (dictionary.contains(substring)) {
                // the beginning of the input matches a word, store on stack
                words.push(substring);
    
                if (i == input.length() - 1) {
                    // there's no input left, copy the words stack to results
                    results.add(new ArrayList<String>(words));
                } else {
                    // there's more input left, search the remaining part
                    search(input.substring(i + 1), dictionary, words, results);
                }
    
                // pop the matched word back off so we can move onto the next i
                words.pop();
            }
        }
    }
    

    Example output:

    Enter a string: aman
    
    a man (2 words)
    am an (2 words)
    
    Took 0ms
    

    Here's a much longer input:

    Enter a string: thequickbrownfoxjumpedoverthelazydog
    
    the quick brown fox jump ed over the lazy dog (10 words)
    the quick brown fox jump ed overt he lazy dog (10 words)
    the quick brown fox jumped over the lazy dog (9 words)
    the quick brown fox jumped overt he lazy dog (9 words)
    
    Took 1ms