Engchain / utils /evaluation.py
usmansafdarktk
Initial commit for Hugging Face Space
a03bf1f
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
def clean_text(text: str) -> str:
# Remove LaTeX formatting like \boxed{}, $...$, and other currency symbols
text = re.sub(r"\\boxed{([^}]*)}", r"\1", text)
text = re.sub(r"[$€£\\]", "", text) # remove $ € £ and \
return text
def extract_numbers(text: str):
text = clean_text(text)
# Find integers or floats, remove commas
nums = re.findall(r"-?\d+(?:\.\d+)?", text.replace(",", ""))
# Convert all to floats
return [float(n) for n in nums]
def validate_answer(ref: str, llm: str, tolerance=0.1) -> bool:
"""
Compares the final number in ref vs all numbers in LLM response.
Returns True if any LLM number is within `tolerance` of the reference.
"""
ref_nums = extract_numbers(ref)
llm_nums = extract_numbers(llm)
if not ref_nums or not llm_nums:
return False # nothing to compare
ref_final = round(ref_nums[-1], 2) # round to 2 decimals
llm_nums_rounded = [round(n, 2) for n in llm_nums]
# Compare rounded numbers with tolerance
return any(abs(n - ref_final) <= tolerance for n in llm_nums_rounded)
def compute_similarity(sol: str, llm_resp: str) -> float:
"""Returns cosine similarity between solution and LLM response (0–100%)."""
vect = TfidfVectorizer().fit([sol, llm_resp])
tfidf = vect.transform([sol, llm_resp])
sim = cosine_similarity(tfidf[0], tfidf[1])[0][0]
return round(sim * 100, 2)