tfIdfProbability function
Word probability calculation - Term Frequency - Inverse Document Frequency
.
This function is used to calculate the probability of a word to be the main topic (important score)
Returns probability for each word in each document
Implementation
List<Map<String, double>> tfIdfProbability(TokenizationOutput tokenOut) {
List<Map<String, double>> documentExclusiveWP = [];
int documentCount = tokenOut.documentTotalWord.length;
//calculate the IDF first, the value is used for all document
Map<String, double> wordIDF = {};
tokenOut.wordInDocumentOccurrence.forEach((key, val) {
wordIDF[key] = log(documentCount / tokenOut.wordInDocumentOccurrence[key]!) /
ln10; //log10
});
//for all document
for (int i = 0; i < tokenOut.documentTotalWord.length; i++) {
Map<String, double> currentWordProb = {};
//for all word available in a document
tokenOut.documentBOW[i].forEach((key, val) {
var tf = val / tokenOut.documentTotalWord[i];
var idf = wordIDF[key]!;
currentWordProb[key] = tf * idf;
});
//every document have their own word probability
documentExclusiveWP.add(currentWordProb);
}
return documentExclusiveWP;
}