File size: 1,547 Bytes
a03bf1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re

def clean_text(text: str) -> str:
    # Remove LaTeX formatting like \boxed{}, $...$, and other currency symbols
    text = re.sub(r"\\boxed{([^}]*)}", r"\1", text)
    text = re.sub(r"[$€£\\]", "", text)  # remove $ € £ and \
    return text

def extract_numbers(text: str):
    text = clean_text(text)
    # Find integers or floats, remove commas
    nums = re.findall(r"-?\d+(?:\.\d+)?", text.replace(",", ""))
    # Convert all to floats
    return [float(n) for n in nums]

def validate_answer(ref: str, llm: str, tolerance=0.1) -> bool:
    """
    Compares the final number in ref vs all numbers in LLM response.
    Returns True if any LLM number is within `tolerance` of the reference.
    """
    ref_nums = extract_numbers(ref)
    llm_nums = extract_numbers(llm)

    if not ref_nums or not llm_nums:
        return False  # nothing to compare

    ref_final = round(ref_nums[-1], 2)  # round to 2 decimals
    llm_nums_rounded = [round(n, 2) for n in llm_nums]

    # Compare rounded numbers with tolerance
    return any(abs(n - ref_final) <= tolerance for n in llm_nums_rounded)


def compute_similarity(sol: str, llm_resp: str) -> float:
    """Returns cosine similarity between solution and LLM response (0–100%)."""
    vect = TfidfVectorizer().fit([sol, llm_resp])
    tfidf = vect.transform([sol, llm_resp])
    sim = cosine_similarity(tfidf[0], tfidf[1])[0][0]
    return round(sim * 100, 2)