File size: 2,704 Bytes
f29ea6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""
Embedding Module for RAG System
Uses FREE sentence-transformers (no API costs).
Gemini is ONLY used for final SQL generation.
"""

from sentence_transformers import SentenceTransformer
import os

# =============================================================================
# FREE LOCAL EMBEDDING MODEL
# =============================================================================

# Using all-MiniLM-L6-v2: fast, good quality, 384 dimensions
MODEL_NAME = "all-MiniLM-L6-v2"

# Global model instance (loaded once)
_model = None

def get_model():
    """Get or load the embedding model."""
    global _model
    if _model is None:
        print(f"  Loading embedding model: {MODEL_NAME}")
        _model = SentenceTransformer(MODEL_NAME)
    return _model

# =============================================================================
# EMBEDDING FUNCTIONS
# =============================================================================

def get_embedding(text):
    """Get embedding for a single text."""
    try:
        model = get_model()
        embedding = model.encode(text, convert_to_numpy=True)
        return embedding.tolist()
    except Exception as e:
        print(f"Error getting embedding: {e}")
        return None

def get_embeddings_batch(texts):
    """Get embeddings for multiple texts at once (efficient)."""
    try:
        model = get_model()
        embeddings = model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
        return [emb.tolist() for emb in embeddings]
    except Exception as e:
        print(f"Error in batch embedding: {e}")
        return [None] * len(texts)

# =============================================================================
# TEST
# =============================================================================

def test_embedding():
    """Test embedding functionality."""
    print("=" * 50)
    print("TESTING EMBEDDINGS (FREE - No API)")
    print("=" * 50)
    
    test_texts = [
        "Find all employees with salary greater than 50000",
        "Show customers who ordered last month",
        "Count products by category"
    ]
    
    print(f"\nModel: {MODEL_NAME}")
    print(f"Testing with {len(test_texts)} texts...\n")
    
    # Single embedding
    emb = get_embedding(test_texts[0])
    if emb:
        print(f"βœ“ Single embedding works")
        print(f"  Dimension: {len(emb)}")
    
    # Batch embedding
    embs = get_embeddings_batch(test_texts)
    if embs and embs[0]:
        print(f"βœ“ Batch embedding works")
        print(f"  Got {len(embs)} embeddings")
    
    print("\nβœ“ All tests passed (FREE - No Gemini used)")
    return True

if __name__ == "__main__":
    test_embedding()