# controller/pix2text_bp.py import os import cv2 from flask import Blueprint, render_template, request, jsonify from pix2text import Pix2Text from utils.math_solver import solve_equation from controller.models.camera_to_latex import camera_to_latex # Initialize Pix2Text globally once print("🔹 Loading Pix2Text model (mfd)...") try: p2t = Pix2Text(analyzer_config=dict(model_name='mfd')) print("✅ Pix2Text model loaded successfully.") except Exception as e: print(f"❌ Pix2Text failed to initialize: {e}") p2t = None # Flask blueprint pix2text_bp = Blueprint('pix2text_bp', __name__) UPLOAD_FOLDER = 'static/uploads' os.makedirs(UPLOAD_FOLDER, exist_ok=True) # Optional preprocessing def preprocess_image(image_path): """Preprocess image for better OCR results""" try: # Read image img = cv2.imread(image_path) if img is None: raise ValueError("Could not read image") # Convert to grayscale gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Apply mild Gaussian blur to reduce noise while preserving edges blurred = cv2.GaussianBlur(gray, (3, 3), 0) # Apply adaptive thresholding with parameters better suited for text thresh = cv2.adaptiveThreshold( blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, 2 ) # Save processed image processed_path = os.path.join( PROCESSED_FOLDER, os.path.basename(image_path).replace('.', '_processed.') ) cv2.imwrite(processed_path, thresh) return processed_path except Exception as e: print(f"Preprocessing error: {e}") return image_path # Return original if preprocessing fails # ----------------------------- # Math Routes # ----------------------------- @pix2text_bp.route("/math") def math_page(): return render_template("math.html") @pix2text_bp.route("/math/process", methods=["POST"]) def process_math_image(): try: if 'image' not in request.files: return jsonify({'error': 'No image file provided'}), 400 file = request.files['image'] if not file.filename: return jsonify({'error': 'No file selected'}), 400 filename = file.filename filepath = os.path.join(UPLOAD_FOLDER, filename) file.save(filepath) # Preprocess (optional) processed_path = preprocess_image(filepath) # Run Pix2Text if p2t: result = p2t.recognize(processed_path) if isinstance(result, dict): latex = result.get('text', '') elif isinstance(result, list) and result and isinstance(result[0], dict): latex = result[0].get('text', '') else: latex = str(result) else: latex = "\\text{Pix2Text not initialized}" return jsonify({ 'success': True, 'latex': latex, 'image_path': filepath }) except Exception as e: print(f"❌ Error in /math/process: {e}") return jsonify({'error': str(e)}), 500 @pix2text_bp.route("/math/solve", methods=["POST"]) def solve_math_equation(): try: data = request.get_json() if not data or 'latex' not in data: return jsonify({'error': 'No equation provided'}), 400 solution = solve_equation(data['latex']) return jsonify({'success': True, 'solution': solution}) except Exception as e: print(f"❌ Error in /math/solve: {e}") return jsonify({'error': str(e)}), 500 # ----------------------------- # Camera Routes # ----------------------------- # @pix2text_bp.route("/camera") # def camera_page(): # return render_template("camera.html") @pix2text_bp.route("/camera") def camera_page(): """Render the camera capture page""" return render_template("camera.html") @pix2text_bp.route("/camera/solve", methods=["POST"]) def solve_camera_equation(): """Solve a LaTeX equation from camera input""" try: data = request.get_json() if not data: return jsonify({'error': 'No data provided'}), 400 latex_equation = data.get('latex', '') if not latex_equation: return jsonify({'error': 'No equation provided'}), 400 # Solve the equation solution = solve_equation(latex_equation) return jsonify({ 'success': True, 'solution': solution }) except Exception as e: return jsonify({'error': str(e)}), 500 return jsonify({'error': 'Unknown error'}), 500 @pix2text_bp.route("/camera/process", methods=["POST"]) def process_camera_image(): """Process camera captured image using Pix2Text""" try: if 'image' not in request.files: return jsonify({'error': 'No image file provided'}), 400 file = request.files['image'] if file.filename == '': return jsonify({'error': 'No image file selected'}), 400 if file and file.filename: # Save original image filename = file.filename filepath = os.path.join(UPLOAD_FOLDER, filename) file.save(filepath) # For camera captures, try processing the original image first # as preprocessing might distort mathematical symbols processed_path = filepath # Process with Pix2Text if available if p2t: print(f"Processing image: {processed_path}") result = p2t.recognize(processed_path) print(f"Raw result: {result}") # Handle different result types if isinstance(result, dict): latex_code = result.get('text', '') elif isinstance(result, list): # If result is a list, extract text from first item if result and isinstance(result[0], dict): latex_code = result[0].get('text', '') else: latex_code = str(result) else: latex_code = str(result) # If we get no result or very short result, try with preprocessing if len(latex_code.strip()) < 2: print("Result too short, trying with preprocessing...") processed_path = preprocess_image(filepath) result = p2t.recognize(processed_path) print(f"Preprocessed result: {result}") if isinstance(result, dict): latex_code = result.get('text', '') elif isinstance(result, list): if result and isinstance(result[0], dict): latex_code = result[0].get('text', '') else: latex_code = str(result) else: latex_code = str(result) print(f"Final extracted LaTeX: {latex_code}") else: latex_code = "\\text{Pix2Text not available}" return jsonify({ 'success': True, 'latex': latex_code, 'image_path': filepath }) except Exception as e: print(f"Error processing camera image: {e}") return jsonify({'error': str(e)}), 500 return jsonify({'error': 'Unknown error'}), 500