tiny-scribe / embedding_model_analysis.json
Luigi's picture
Add embedding model analysis documentation
c9955a9
{
"current_model": {
"name": "Granite-107M-Multilingual",
"repo": "ibm-granite/granite-embedding-107m-multilingual",
"params": "107M",
"pros": [
"Already integrated and working",
"Fast (107M parameters)",
"Proven in production tests",
"Correctly deduplicated Gemma-3 (47.8% dupes)",
"0% false positives with Qwen2.5 1.5B"
],
"cons": [
"Smaller model (107M vs 500M+)",
"May miss nuanced similarities"
],
"test_results": {
"qwen2.5_1.5b_extraction": {
"duplicate_rate": "0%",
"deduplication_accuracy": "100%",
"note": "Extraction already unique per window"
},
"gemma3_1b_extraction": {
"duplicate_rate": "47.8%",
"deduplication_accuracy": "100%",
"note": "Correctly identified all duplicates"
}
}
},
"alternatives": {
"bge_m3": {
"name": "BGE-M3",
"repo": "BAAI/bge-m3",
"gguf_repo": "lm-kit/bge-m3-gguf",
"params": "568M",
"pros": [
"SOTA on MTEB Chinese benchmarks",
"Larger model (568M vs 107M)",
"Better semantic understanding"
],
"cons": [
"5x larger (slower)",
"Requires sentence-transformers (not GGUF)",
"Unknown if GGUF version works with llama-cpp"
],
"recommendation": "Worth testing if accuracy issues arise"
},
"multilingual_e5": {
"name": "Multilingual-E5-Large",
"repo": "intfloat/multilingual-e5-large",
"params": "560M",
"pros": [
"Microsoft-backed, widely tested",
"Excellent for multilingual",
"Good for Chinese text"
],
"cons": [
"5x larger than Granite-107M",
"Requires sentence-transformers",
"No GGUF version readily available"
],
"recommendation": "Consider if switching to sentence-transformers"
}
},
"recommendation": {
"current_status": "KEEP Granite-107M",
"rationale": [
"Working correctly in production",
"Fast enough for real-time use",
"Zero false positives in tests",
"Simple GGUF integration"
],
"when_to_upgrade": [
"If false positives/negatives appear in production",
"If need better semantic matching (not just exact duplicates)",
"If processing very long texts (need better context understanding)"
],
"suggested_thresholds": {
"strict": 0.9,
"default": 0.85,
"lenient": 0.8
}
}
}