import re from typing import List, Tuple, Optional def convert_to_float_eng(text: str) -> Optional[float]: """ Converts a string with a number and optional units to a float. Handles integers, floats, and scientific notation (e.g., '1.23e-4'). Args: text: The input string to parse. Returns: The extracted number as a float, or None if no number is found. """ if not isinstance(text, str): return None # This regex is designed to find scientific notation, floats, and integers. # It handles patterns like: 123, 123.45, 1.23e-4, -0.98E+2 # It also ignores surrounding text or units (e.g., "7.65 L"). match = re.search(r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?', text.replace(',', '')) if not match: return None try: return float(match.group(0)) except (ValueError, TypeError): return None def extract_final_answer_eng(text: str) -> Optional[float]: """ Extracts the most likely final numerical answer from a text block. """ text = re.sub(r'\s+', ' ', text).strip() # Priority 1: Look for the number after the "**Answer:**" tag. match = re.search(r'\*\*Answer:\*\*\s*.*?([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)', text, re.IGNORECASE) if match and match.group(1): return convert_to_float_eng(match.group(1)) # Priority 2: Look for the last number after an equals sign, using word boundaries. # The \b ensures we match "V = 100" but not the 0 in "F_A0". matches = list(re.finditer(r'=\s*(\b[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?\b)', text)) if matches: return convert_to_float_eng(matches[-1].group(1)) # Priority 3: As a last resort, find the very last standalone number in the string. matches = list(re.finditer(r'(\b[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?\b)', text)) if matches: return convert_to_float_eng(matches[-1].group(0)) return None def extract_steps(full_text: str) -> Tuple[List[str], List[Optional[float]], Optional[float]]: """ Parses a full solution text into its constituent parts. (IMPROVED VERSION with more robust step prefix cleaning) """ # Regex to find the start of each step, robust to markdown and spacing. # It finds both "**Step 1**" and "1." style steps. step_starts = [match.start() for match in re.finditer(r'(?:^|\n)\s*(?:\*\*|#*)\s*(?:Step\s*\d+|\d+\.)', full_text, re.IGNORECASE)] if not step_starts: step_texts = [full_text.strip()] else: step_texts = [] for i, start_index in enumerate(step_starts): end_index = step_starts[i + 1] if i + 1 < len(step_starts) else len(full_text) step_texts.append(full_text[start_index:end_index].strip()) # NEW: A more robust regex to clean both "Step X:" and "X." prefixes # This is the key change to fix the "1.0" error. cleaned_step_texts = [re.sub(r'^(?:\*\*|#*)\s*(?:Step\s*\d+|\d+\.)\s*:\s*', '', text, flags=re.IGNORECASE).strip() for text in step_texts] step_answers = [extract_final_answer_eng(text) for text in cleaned_step_texts] overall_final_answer = extract_final_answer_eng(full_text) if overall_final_answer is None and step_answers: for answer in reversed(step_answers): if answer is not None: overall_final_answer = answer break return cleaned_step_texts, step_answers, overall_final_answer if __name__ == '__main__': # --- Test Case 1: Simple CSTR Problem (from before) --- sample_solution_cstr = """ **Step 1:** State the CSTR design equation. V = (F_A0 - F_A) / (-r_A) **Step 2:** Substitute the given values into the equation. V = (1.26 mol/s - 0.99 mol/s) / (0.0353 mol/(L·s)) V = 0.27 / 0.0353 = 7.6487 L **Step 3:** Calculate the final volume. V = 7.65 L **Answer:** The required reactor volume is 7.65 liters. """ print("--- Test Case 1: Simple CSTR Problem ---") steps, step_answers, final_answer = extract_steps(sample_solution_cstr) print(f"\nOverall Final Answer Extracted: {final_answer}\n") for i, (text, answer) in enumerate(zip(steps, step_answers)): print(f"--- Step {i+1} ---") print(f"Text: {text}") print(f"Answer found in step: {answer}") print("-" * 15) print("\n" + "="*50 + "\n") # Separator for clarity