{ "current_model": { "name": "Granite-107M-Multilingual", "repo": "ibm-granite/granite-embedding-107m-multilingual", "params": "107M", "pros": [ "Already integrated and working", "Fast (107M parameters)", "Proven in production tests", "Correctly deduplicated Gemma-3 (47.8% dupes)", "0% false positives with Qwen2.5 1.5B" ], "cons": [ "Smaller model (107M vs 500M+)", "May miss nuanced similarities" ], "test_results": { "qwen2.5_1.5b_extraction": { "duplicate_rate": "0%", "deduplication_accuracy": "100%", "note": "Extraction already unique per window" }, "gemma3_1b_extraction": { "duplicate_rate": "47.8%", "deduplication_accuracy": "100%", "note": "Correctly identified all duplicates" } } }, "alternatives": { "bge_m3": { "name": "BGE-M3", "repo": "BAAI/bge-m3", "gguf_repo": "lm-kit/bge-m3-gguf", "params": "568M", "pros": [ "SOTA on MTEB Chinese benchmarks", "Larger model (568M vs 107M)", "Better semantic understanding" ], "cons": [ "5x larger (slower)", "Requires sentence-transformers (not GGUF)", "Unknown if GGUF version works with llama-cpp" ], "recommendation": "Worth testing if accuracy issues arise" }, "multilingual_e5": { "name": "Multilingual-E5-Large", "repo": "intfloat/multilingual-e5-large", "params": "560M", "pros": [ "Microsoft-backed, widely tested", "Excellent for multilingual", "Good for Chinese text" ], "cons": [ "5x larger than Granite-107M", "Requires sentence-transformers", "No GGUF version readily available" ], "recommendation": "Consider if switching to sentence-transformers" } }, "recommendation": { "current_status": "KEEP Granite-107M", "rationale": [ "Working correctly in production", "Fast enough for real-time use", "Zero false positives in tests", "Simple GGUF integration" ], "when_to_upgrade": [ "If false positives/negatives appear in production", "If need better semantic matching (not just exact duplicates)", "If processing very long texts (need better context understanding)" ], "suggested_thresholds": { "strict": 0.9, "default": 0.85, "lenient": 0.8 } } }