from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import re def clean_text(text: str) -> str: # Remove LaTeX formatting like \boxed{}, $...$, and other currency symbols text = re.sub(r"\\boxed{([^}]*)}", r"\1", text) text = re.sub(r"[$€£\\]", "", text) # remove $ € £ and \ return text def extract_numbers(text: str): text = clean_text(text) # Find integers or floats, remove commas nums = re.findall(r"-?\d+(?:\.\d+)?", text.replace(",", "")) # Convert all to floats return [float(n) for n in nums] def validate_answer(ref: str, llm: str, tolerance=0.1) -> bool: """ Compares the final number in ref vs all numbers in LLM response. Returns True if any LLM number is within `tolerance` of the reference. """ ref_nums = extract_numbers(ref) llm_nums = extract_numbers(llm) if not ref_nums or not llm_nums: return False # nothing to compare ref_final = round(ref_nums[-1], 2) # round to 2 decimals llm_nums_rounded = [round(n, 2) for n in llm_nums] # Compare rounded numbers with tolerance return any(abs(n - ref_final) <= tolerance for n in llm_nums_rounded) def compute_similarity(sol: str, llm_resp: str) -> float: """Returns cosine similarity between solution and LLM response (0–100%).""" vect = TfidfVectorizer().fit([sol, llm_resp]) tfidf = vect.transform([sol, llm_resp]) sim = cosine_similarity(tfidf[0], tfidf[1])[0][0] return round(sim * 100, 2)