import os import time from typing import Optional from PIL import Image from huggingface_hub import InferenceClient from image_utils import _pil_image_to_base64_jpeg from logging_helper import _log_model_response from common import MODELS_MAP MODEL_ID = "allenai/olmOCR-2-7B-1025-FP8" HF_ENDPOINT_URL = "https://wsy54j97qbvg7mua.us-east-1.aws.endpoints.huggingface.cloud" def _build_messages(image_base64: str, prompt: str): return [ { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}, }, ], } ] def _run_olmocr(image: Image.Image, prompt: str) -> str: image_base64 = _pil_image_to_base64_jpeg(image) messages = _build_messages(image_base64, prompt) hf_token: Optional[str] = os.getenv("HF_TOKEN") client = InferenceClient( base_url=HF_ENDPOINT_URL, token=hf_token, ) start_time = time.perf_counter() completion = client.chat.completions.create( model=MODEL_ID, messages=messages, max_tokens=512, temperature=0.1, ) duration = time.perf_counter() - start_time content = str(completion.choices[0].message.content) _log_model_response( model_name=MODEL_ID, content=content, duration=duration, usage=completion.usage, pricing=MODELS_MAP, ) return content