Spaces:

Usmansafder
/

Engchain

Sleeping

Engchain / utils /evaluation.py

usmansafdarktk

Initial commit for Hugging Face Space

a03bf1f about 2 months ago

1.55 kB

	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import re

	def clean_text(text: str) -> str:
	# Remove LaTeX formatting like \boxed{}, $...$, and other currency symbols
	text = re.sub(r"\\boxed{([^}]*)}", r"\1", text)
	text = re.sub(r"[$€£\\]", "", text) # remove $ € £ and \
	return text

	def extract_numbers(text: str):
	text = clean_text(text)
	# Find integers or floats, remove commas
	nums = re.findall(r"-?\d+(?:\.\d+)?", text.replace(",", ""))
	# Convert all to floats
	return [float(n) for n in nums]

	def validate_answer(ref: str, llm: str, tolerance=0.1) -> bool:
	"""
	Compares the final number in ref vs all numbers in LLM response.
	Returns True if any LLM number is within `tolerance` of the reference.
	"""
	ref_nums = extract_numbers(ref)
	llm_nums = extract_numbers(llm)

	if not ref_nums or not llm_nums:
	return False # nothing to compare

	ref_final = round(ref_nums[-1], 2) # round to 2 decimals
	llm_nums_rounded = [round(n, 2) for n in llm_nums]

	# Compare rounded numbers with tolerance
	return any(abs(n - ref_final) <= tolerance for n in llm_nums_rounded)


	def compute_similarity(sol: str, llm_resp: str) -> float:
	"""Returns cosine similarity between solution and LLM response (0–100%)."""
	vect = TfidfVectorizer().fit([sol, llm_resp])
	tfidf = vect.transform([sol, llm_resp])
	sim = cosine_similarity(tfidf[0], tfidf[1])[0][0]
	return round(sim * 100, 2)