Spaces:

snake11235
/

words2csv

Running

File size: 1,554 Bytes

import os
import time
from typing import Optional
from PIL import Image
from huggingface_hub import InferenceClient
from image_utils import _pil_image_to_base64_jpeg
from logging_helper import _log_model_response
from common import MODELS_MAP


MODEL_ID = "allenai/olmOCR-2-7B-1025-FP8"
HF_ENDPOINT_URL = "https://wsy54j97qbvg7mua.us-east-1.aws.endpoints.huggingface.cloud"


def _build_messages(image_base64: str, prompt: str):
    return [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
                },
            ],
        }
    ]


def _run_olmocr(image: Image.Image, prompt: str) -> str:
    image_base64 = _pil_image_to_base64_jpeg(image)
    messages = _build_messages(image_base64, prompt)

    hf_token: Optional[str] = os.getenv("HF_TOKEN")

    client = InferenceClient(
        base_url=HF_ENDPOINT_URL,
        token=hf_token,
    )

    start_time = time.perf_counter()

    completion = client.chat.completions.create(
        model=MODEL_ID,
        messages=messages,
        max_tokens=512,
        temperature=0.1,
    )

    duration = time.perf_counter() - start_time

    content = str(completion.choices[0].message.content)

    _log_model_response(
        model_name=MODEL_ID,
        content=content,
        duration=duration,
        usage=completion.usage,
        pricing=MODELS_MAP,
    )

    return content