detectCorruptionPatterns static method
Detects potential image corruption patterns in model responses
Implementation
static bool detectCorruptionPatterns(String response) {
try {
if (response.isEmpty) return false;
// Patterns that indicate image corruption
final corruptionPatterns = [
RegExp(r'describe\.describe\.describe\.+'), // Infinite "describe" repetition
RegExp(r'^[₹]{10,}'), // Rupee symbol repetition
RegExp(r'\bph\b.*\bph\b.*\bph\b'), // Repeating "ph" pattern
RegExp(r'^(.)\1{10,}'), // Any single character repeated 10+ times
RegExp(r'\b\w+\.\w+\.\w+\.+'), // Word repetition with dots
RegExp(r'\b[a-zA-Z]{1,2}\s+[a-zA-Z]{1,2}\s+[a-zA-Z]{1,2}\b'), // Short letter sequences as words
];
for (final pattern in corruptionPatterns) {
if (pattern.hasMatch(response)) {
debugPrint('ImageTokenizer: Detected corruption pattern - ${pattern.pattern}');
return true;
}
}
// Check for excessive repetition of short sequences
final words = response.split(RegExp(r'\s+'));
if (words.length > 10) {
final wordCounts = <String, int>{};
for (final word in words) {
if (word.length <= 3) { // Focus on short words that might be corrupted data
wordCounts[word] = (wordCounts[word] ?? 0) + 1;
}
}
// If any short word appears too frequently, it might be corruption
for (final entry in wordCounts.entries) {
if (entry.value > words.length * 0.3) { // More than 30% of words
debugPrint('ImageTokenizer: Detected excessive repetition of "${entry.key}" (${entry.value} times)');
return true;
}
}
}
return false;
} catch (e) {
debugPrint('ImageTokenizer: Error detecting corruption patterns - $e');
return false;
}
}