Spaces:

snake11235
/

words2csv

Running

words2csv / olm_ocr.py

feat: refactor model configuration to use unified MODELS_MAP with backend routing

9e38f34 4 days ago

1.55 kB

	import os
	import time
	from typing import Optional
	from PIL import Image
	from huggingface_hub import InferenceClient
	from image_utils import _pil_image_to_base64_jpeg
	from logging_helper import _log_model_response
	from common import MODELS_MAP


	MODEL_ID = "allenai/olmOCR-2-7B-1025-FP8"
	HF_ENDPOINT_URL = "https://wsy54j97qbvg7mua.us-east-1.aws.endpoints.huggingface.cloud"


	def _build_messages(image_base64: str, prompt: str):
	return [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
	},
	],
	}
	]


	def _run_olmocr(image: Image.Image, prompt: str) -> str:
	image_base64 = _pil_image_to_base64_jpeg(image)
	messages = _build_messages(image_base64, prompt)

	hf_token: Optional[str] = os.getenv("HF_TOKEN")

	client = InferenceClient(
	base_url=HF_ENDPOINT_URL,
	token=hf_token,
	)

	start_time = time.perf_counter()

	completion = client.chat.completions.create(
	model=MODEL_ID,
	messages=messages,
	max_tokens=512,
	temperature=0.1,
	)

	duration = time.perf_counter() - start_time

	content = str(completion.choices[0].message.content)

	_log_model_response(
	model_name=MODEL_ID,
	content=content,
	duration=duration,
	usage=completion.usage,
	pricing=MODELS_MAP,
	)

	return content