Spaces:
Sleeping
Sleeping
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import re | |
| def clean_text(text: str) -> str: | |
| # Remove LaTeX formatting like \boxed{}, $...$, and other currency symbols | |
| text = re.sub(r"\\boxed{([^}]*)}", r"\1", text) | |
| text = re.sub(r"[$€£\\]", "", text) # remove $ € £ and \ | |
| return text | |
| def extract_numbers(text: str): | |
| text = clean_text(text) | |
| # Find integers or floats, remove commas | |
| nums = re.findall(r"-?\d+(?:\.\d+)?", text.replace(",", "")) | |
| # Convert all to floats | |
| return [float(n) for n in nums] | |
| def validate_answer(ref: str, llm: str, tolerance=0.1) -> bool: | |
| """ | |
| Compares the final number in ref vs all numbers in LLM response. | |
| Returns True if any LLM number is within `tolerance` of the reference. | |
| """ | |
| ref_nums = extract_numbers(ref) | |
| llm_nums = extract_numbers(llm) | |
| if not ref_nums or not llm_nums: | |
| return False # nothing to compare | |
| ref_final = round(ref_nums[-1], 2) # round to 2 decimals | |
| llm_nums_rounded = [round(n, 2) for n in llm_nums] | |
| # Compare rounded numbers with tolerance | |
| return any(abs(n - ref_final) <= tolerance for n in llm_nums_rounded) | |
| def compute_similarity(sol: str, llm_resp: str) -> float: | |
| """Returns cosine similarity between solution and LLM response (0–100%).""" | |
| vect = TfidfVectorizer().fit([sol, llm_resp]) | |
| tfidf = vect.transform([sol, llm_resp]) | |
| sim = cosine_similarity(tfidf[0], tfidf[1])[0][0] | |
| return round(sim * 100, 2) | |