I am trying to make an object out of a paragraph that shows words with their frequencies.
var pattern = /\w+/g,
//the farsi paragraph
string = "من امروز در مورد مهر خروج مشمولین اطلاعات جدیدی از سفارت ایران در مالزی گرفتم",
matchedWords = string.match( pattern );
/* The Array.prototype.reduce method assists us in producing a single value from an
array. In this case, we're going to use it to output an object with results. */
var counts = matchedWords.reduce(function ( stats, word ) {
/* `stats` is the object that we'll be building up over time.
`word` is each individual entry in the `matchedWords` array */
if ( stats.hasOwnProperty( word ) ) {
/* `stats` already has an entry for the current `word`.
As a result, let's increment the count for that `word`. */
stats[ word ] = stats[ word ] + 1;
} else {
/* `stats` does not yet have an entry for the current `word`.
As a result, let's add a new entry, and set count to 1. */
stats[ word ] = 1;
}
/* Because we are building up `stats` over numerous iterations,
we need to return it for the next pass to modify it. */
return stats;
}, {})
var dict = []; // create an empty array
// this for loop makes a dictionary for you
for (i in counts){
dict.push({'text':i, "size": counts[i]});
};
/* lets print and see if you can solve your problem */
console.log( dict);
the code originally worked out for an English paragraph. However I need to use it for a Farsi one. I know that it should be something else instead of "/\w+/g" in:
var pattern = /\w+/g,
but I don't know what.
In your regex use the variable for "any character but whitespace" that is \S
.
Edit: whitespace is considered a newline, a tab and a space)
var pattern = /\S+/g,
//the farsi paragraph
string = "من امروز در مورد مهر خروج مشمولین اطلاعات جدیدی از سفارت ایران در مالزی گرفتم",
matchedWords = string.match( pattern );
/* The Array.prototype.reduce method assists us in producing a single value from an
array. In this case, we're going to use it to output an object with results. */
var counts = matchedWords.reduce(function ( stats, word ) {
/* `stats` is the object that we'll be building up over time.
`word` is each individual entry in the `matchedWords` array */
if ( stats.hasOwnProperty( word ) ) {
/* `stats` already has an entry for the current `word`.
As a result, let's increment the count for that `word`. */
stats[ word ] = stats[ word ] + 1;
} else {
/* `stats` does not yet have an entry for the current `word`.
As a result, let's add a new entry, and set count to 1. */
stats[ word ] = 1;
}
/* Because we are building up `stats` over numerous iterations,
we need to return it for the next pass to modify it. */
return stats;
}, {})
var dict = []; // create an empty array
// this for loop makes a dictionary for you
for (i in counts){
dict.push({'text':i, "size": counts[i]});
};
/* lets print and see if you can solve your problem */
console.log( dict);