Spaces:
Sleeping
Sleeping
File size: 1,547 Bytes
a03bf1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
def clean_text(text: str) -> str:
# Remove LaTeX formatting like \boxed{}, $...$, and other currency symbols
text = re.sub(r"\\boxed{([^}]*)}", r"\1", text)
text = re.sub(r"[$€£\\]", "", text) # remove $ € £ and \
return text
def extract_numbers(text: str):
text = clean_text(text)
# Find integers or floats, remove commas
nums = re.findall(r"-?\d+(?:\.\d+)?", text.replace(",", ""))
# Convert all to floats
return [float(n) for n in nums]
def validate_answer(ref: str, llm: str, tolerance=0.1) -> bool:
"""
Compares the final number in ref vs all numbers in LLM response.
Returns True if any LLM number is within `tolerance` of the reference.
"""
ref_nums = extract_numbers(ref)
llm_nums = extract_numbers(llm)
if not ref_nums or not llm_nums:
return False # nothing to compare
ref_final = round(ref_nums[-1], 2) # round to 2 decimals
llm_nums_rounded = [round(n, 2) for n in llm_nums]
# Compare rounded numbers with tolerance
return any(abs(n - ref_final) <= tolerance for n in llm_nums_rounded)
def compute_similarity(sol: str, llm_resp: str) -> float:
"""Returns cosine similarity between solution and LLM response (0–100%)."""
vect = TfidfVectorizer().fit([sol, llm_resp])
tfidf = vect.transform([sol, llm_resp])
sim = cosine_similarity(tfidf[0], tfidf[1])[0][0]
return round(sim * 100, 2)
|