Spaces:
Running
Running
| { | |
| "current_model": { | |
| "name": "Granite-107M-Multilingual", | |
| "repo": "ibm-granite/granite-embedding-107m-multilingual", | |
| "params": "107M", | |
| "pros": [ | |
| "Already integrated and working", | |
| "Fast (107M parameters)", | |
| "Proven in production tests", | |
| "Correctly deduplicated Gemma-3 (47.8% dupes)", | |
| "0% false positives with Qwen2.5 1.5B" | |
| ], | |
| "cons": [ | |
| "Smaller model (107M vs 500M+)", | |
| "May miss nuanced similarities" | |
| ], | |
| "test_results": { | |
| "qwen2.5_1.5b_extraction": { | |
| "duplicate_rate": "0%", | |
| "deduplication_accuracy": "100%", | |
| "note": "Extraction already unique per window" | |
| }, | |
| "gemma3_1b_extraction": { | |
| "duplicate_rate": "47.8%", | |
| "deduplication_accuracy": "100%", | |
| "note": "Correctly identified all duplicates" | |
| } | |
| } | |
| }, | |
| "alternatives": { | |
| "bge_m3": { | |
| "name": "BGE-M3", | |
| "repo": "BAAI/bge-m3", | |
| "gguf_repo": "lm-kit/bge-m3-gguf", | |
| "params": "568M", | |
| "pros": [ | |
| "SOTA on MTEB Chinese benchmarks", | |
| "Larger model (568M vs 107M)", | |
| "Better semantic understanding" | |
| ], | |
| "cons": [ | |
| "5x larger (slower)", | |
| "Requires sentence-transformers (not GGUF)", | |
| "Unknown if GGUF version works with llama-cpp" | |
| ], | |
| "recommendation": "Worth testing if accuracy issues arise" | |
| }, | |
| "multilingual_e5": { | |
| "name": "Multilingual-E5-Large", | |
| "repo": "intfloat/multilingual-e5-large", | |
| "params": "560M", | |
| "pros": [ | |
| "Microsoft-backed, widely tested", | |
| "Excellent for multilingual", | |
| "Good for Chinese text" | |
| ], | |
| "cons": [ | |
| "5x larger than Granite-107M", | |
| "Requires sentence-transformers", | |
| "No GGUF version readily available" | |
| ], | |
| "recommendation": "Consider if switching to sentence-transformers" | |
| } | |
| }, | |
| "recommendation": { | |
| "current_status": "KEEP Granite-107M", | |
| "rationale": [ | |
| "Working correctly in production", | |
| "Fast enough for real-time use", | |
| "Zero false positives in tests", | |
| "Simple GGUF integration" | |
| ], | |
| "when_to_upgrade": [ | |
| "If false positives/negatives appear in production", | |
| "If need better semantic matching (not just exact duplicates)", | |
| "If processing very long texts (need better context understanding)" | |
| ], | |
| "suggested_thresholds": { | |
| "strict": 0.9, | |
| "default": 0.85, | |
| "lenient": 0.8 | |
| } | |
| } | |
| } |