Spaces:
Running
Running
| import os | |
| import time | |
| from typing import Optional | |
| from PIL import Image | |
| from huggingface_hub import InferenceClient | |
| from image_utils import _pil_image_to_base64_jpeg | |
| from logging_helper import _log_model_response | |
| from common import MODELS_MAP | |
| MODEL_ID = "allenai/olmOCR-2-7B-1025-FP8" | |
| HF_ENDPOINT_URL = "https://wsy54j97qbvg7mua.us-east-1.aws.endpoints.huggingface.cloud" | |
| def _build_messages(image_base64: str, prompt: str): | |
| return [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt}, | |
| { | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}, | |
| }, | |
| ], | |
| } | |
| ] | |
| def _run_olmocr(image: Image.Image, prompt: str) -> str: | |
| image_base64 = _pil_image_to_base64_jpeg(image) | |
| messages = _build_messages(image_base64, prompt) | |
| hf_token: Optional[str] = os.getenv("HF_TOKEN") | |
| client = InferenceClient( | |
| base_url=HF_ENDPOINT_URL, | |
| token=hf_token, | |
| ) | |
| start_time = time.perf_counter() | |
| completion = client.chat.completions.create( | |
| model=MODEL_ID, | |
| messages=messages, | |
| max_tokens=512, | |
| temperature=0.1, | |
| ) | |
| duration = time.perf_counter() - start_time | |
| content = str(completion.choices[0].message.content) | |
| _log_model_response( | |
| model_name=MODEL_ID, | |
| content=content, | |
| duration=duration, | |
| usage=completion.usage, | |
| pricing=MODELS_MAP, | |
| ) | |
| return content | |