tokenizer method
creates and returns list of tokens from String input
Implementation
List<Token> tokenizer(String input) {
List<Token> tokens = [];
List<String> stringCommentList = [];
// string and comment should be calculated at the beginning to avoid ambiguous matches later
RegexCollection.regExpStringOrComment.allMatches(input).forEach((e) {
stringCommentList.add(e.group(0)!);
input = input.replaceFirst(e.group(0)!, RegexCollection.nullChar);
});
/*
some properties of a token depends upon the trailing and upcoming token
we can keep track of previous and next token to find out the property
*/
Token? previousToken, currentToken, nextToken;
final tokenList = RegexCollection.regExpTokenizer
.allMatches(input)
.map((e) => e.group(0)!)
.toList();
final listLength = tokenList.length;
int p = 0,
currentLineNumber = 1,
maxLength = '${text.split('\n').length + 1}'.length;
if (listLength != 0) {
if (RegexCollection.isNullChar(tokenList[0])) {
currentToken = getTokenByString(stringCommentList[p]);
p++;
} else {
currentToken = getTokenByString(tokenList[0]);
}
}
if (listLength > 1) {
if (RegexCollection.isNullChar(tokenList[1])) {
nextToken = getTokenByString(stringCommentList[p]);
p++;
} else {
nextToken = getTokenByString(tokenList[1]);
}
}
if (lineNumbers) {
tokens.add(Token(_getLineValue(currentLineNumber++, maxLength),
TokenTypes.lineNumber, false));
}
for (int i = 0; i < listLength; ++i) {
if (currentToken?.type == TokenTypes.identifier) {
if (previousToken != null &&
previousToken.value.endsWith('.') &&
previousToken.isClassContext) {
//static
currentToken?.type = TokenTypes.staticType;
currentToken?.isClassContext = true;
} else {
if (nextToken != null && nextToken.value.trim().startsWith('(')) {
// function
if (previousToken != null && previousToken.value.endsWith('.')) {
currentToken?.isClassContext = false;
currentToken?.type = TokenTypes.method;
} else {
currentToken?.isClassContext = false;
currentToken?.type = TokenTypes.function;
}
} else {
// identifier
if (currentToken != null &&
RegexCollection.isPrivate(currentToken.value)) {
currentToken.isClassContext = false;
currentToken.type = TokenTypes.private;
} else {
currentToken?.isClassContext = false;
currentToken?.type = TokenTypes.identifier;
}
}
}
} else if (currentToken?.type == TokenTypes.classType) {
if (nextToken != null && nextToken.value.trim().startsWith('(')) {
// constructor
currentToken?.type = TokenTypes.constructor;
currentToken?.isClassContext = false;
} else {
// class
currentToken?.type = TokenTypes.classType;
currentToken?.isClassContext = true;
}
} else {
if (currentToken != null &&
((currentToken.type == TokenTypes.operator &&
currentToken.value.endsWith('.')) ||
currentToken.type == TokenTypes.separator)) {
currentToken.isClassContext =
previousToken == null ? false : previousToken.isClassContext;
} else {
currentToken?.isClassContext = false;
}
}
if (currentToken != null &&
currentToken.value.contains('\n') &&
lineNumbers) {
var tkns = <Token>[];
final splits = currentToken.value.split('\n');
tkns.add(
Token(splits[0], currentToken.type, currentToken.isClassContext));
for (int i = 1; i < splits.length; ++i) {
tkns.add(Token('\n${_getLineValue(currentLineNumber++, maxLength)}',
TokenTypes.lineNumber, false));
tkns.add(
Token(splits[i], currentToken.type, currentToken.isClassContext));
}
tokens.addAll(tkns);
} else {
tokens.add(currentToken!);
}
if (currentToken.type != TokenTypes.separator) {
previousToken = currentToken;
}
currentToken = nextToken;
if (i < (listLength - 2)) {
if (RegexCollection.isNullChar(tokenList[i + 2])) {
nextToken = getTokenByString(stringCommentList[p]);
p++;
} else {
nextToken = getTokenByString(tokenList[i + 2]);
}
} else {
nextToken = null;
}
}
return tokens;
}