javaalgorithmboyer-moore

Modifying Boyer Moore for multiple pattern search


I have used Boyer Moore algorithm according to this site. This implements pattern search in the text for only once and the program exits. Can anyone help me to modify this code in order to find the pattern multiple times with their starting and ending index?

    public class BoyerMoore {
        private final int R;     // the radix
        private int[] right;     // the bad-character skip array
        private String pat;      // or as a string

        // pattern provided as a string
        public BoyerMoore(String pat) {
            this.R = 256;
            this.pat = pat;

            // position of rightmost occurrence of c in the pattern
            right = new int[R];
            for (int c = 0; c < R; c++)
                right[c] = -1;
            for (int j = 0; j < pat.length(); j++)
                right[pat.charAt(j)] = j;
        }

        // return offset of first match; N if no match
        public ArrayList<Integer> search(String txt) {
            int M = pat.length();
            int N = txt.length();
            ArrayList<Integer> newArrayInt = new ArrayList<Integer>();
            int skip;
            for (int i = 0; i <= N - M; i += skip) {
                skip = 0;
                for (int j = M-1; j >= 0; j--) {
                    if (pat.charAt(j) != txt.charAt(i+j)) {
                        skip = Math.max(1, j - right[txt.charAt(i+j)]);
                        break;
                    }
                }
                if (skip == 0) 
                    newArrayInt.add(i);    // found
            }
            return newArrayInt;                       // not found
        }

        // test client
        public static void main(String[] args) {
            String pat = "abc";
            String txt = "asdf ghjk klll abc qwerty abc and poaslf abc";

            BoyerMoore boyermoore1 = new BoyerMoore(pat);

            ArrayList<Integer> offset = boyermoore1.search(txt);

            // print results
            System.out.println("Offset: "+ offset);


 }
}

Solution

  • I got it. The skip was always 0 when it found the pattern in the text.

    public class BoyerMoore {
        private final int R;     // the radix
        private int[] right;     // the bad-character skip array
        private String pat;      // or as a string
    
        // pattern provided as a string
        public BoyerMoore(String pat) {
            this.R = 256;
            this.pat = pat;
    
            // position of rightmost occurrence of c in the pattern
            right = new int[R];
            for (int c = 0; c < R; c++)
                right[c] = -1;
            for (int j = 0; j < pat.length(); j++)
                right[pat.charAt(j)] = j;
        }
    
        // return offset of first match; N if no match
        public ArrayList<Integer> search(String txt) {
            int M = pat.length();
            int N = txt.length();
            ArrayList<Integer> newArrayInt = new ArrayList<Integer>();
            int skip;
            for (int i = 0; i <= N - M; i += skip) {
                skip = 0;
                for (int j = M-1; j >= 0; j--) {
                    if (pat.charAt(j) != txt.charAt(i+j)) {
                        skip = Math.max(1, j - right[txt.charAt(i+j)]);
                        break;
                    }
                }
                if (skip == 0) 
                {
                    newArrayInt.add(i);    // found
                    skip++;
                }
            }
            return newArrayInt;                       // not found
        }
    
        // test client
        public static void main(String[] args) {
            String pat = "abc";
            String txt = "asdf ghjk klll abc qwerty abc and poaslf abc";
    
            BoyerMoore boyermoore1 = new BoyerMoore(pat);
    
            ArrayList<Integer> offset = boyermoore1.search(txt);
    
            // print results
            System.out.println("Offset: "+ offset);
        }
    }