cocoansstringcfstring

How to get an array of sentences using CFStringTokenizer?


I've created an string tokenizer like this:

stringTokenizer = CFStringTokenizerCreate(
                         NULL
                         , (CFStringRef)str
                         , CFRangeMake(0, [str length])
                         , kCFStringTokenizerUnitSentence
                         , userLocale);

But how do I obtain those sentences now from the tokenizer? The CF String Programming Guide doesn't mention CFStringTokenizer or tokens (did a full-text search in the PDF).


Solution

  • Here is an example of CFStringTokenizer usage:

    CFStringRef string; // Get string from somewhere
    CFLocaleRef locale = CFLocaleCopyCurrent();
    
    CFStringTokenizerRef tokenizer = 
        CFStringTokenizerCreate(
            kCFAllocatorDefault
            , string
            , CFRangeMake(0, CFStringGetLength(string))
            , kCFStringTokenizerUnitSentence
            , locale);
    
    CFStringTokenizerTokenType tokenType = kCFStringTokenizerTokenNone;
    unsigned tokensFound = 0;
    
    while(kCFStringTokenizerTokenNone !=
        (tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer))) {
        CFRange tokenRange = CFStringTokenizerGetCurrentTokenRange(tokenizer);
        CFStringRef tokenValue =
            CFStringCreateWithSubstring(
                kCFAllocatorDefault
                , string
                , tokenRange);
    
      // Do something with the token
      CFShow(tokenValue);
      CFRelease(tokenValue);
      ++tokensFound;
    }
    
    // Clean up
    CFRelease(tokenizer);
    CFRelease(locale);