Engchain / evaluation /run_inference.py
usmansafdarktk
Initial commit for Hugging Face Space
a03bf1f
import os
import json
import time
from tqdm import tqdm
from dotenv import load_dotenv
import google.generativeai as genai
# 1. Configuration
load_dotenv()
# Configure the API client
API_KEY = os.getenv("API_KEY")
if not API_KEY:
raise ValueError("API_KEY not found in .env file or environment variables.")
genai.configure(api_key=API_KEY)
# Define model and file paths
MODEL_NAME = os.getenv("MODEL_NAME")
INPUT_DIR = "../testset"
OUTPUT_FILE = f"inference_results/{MODEL_NAME}_inference_results.jsonl"
# Define the prompt template for the model
PROMPT_TEMPLATE = """
You are an expert engineer. Solve the following problem by providing a detailed, structured solution. Use the exact headings and formatting provided below.
## Given
List all known variables and their values with units.
## Find
State the variable(s) to be calculated.
## Formulae
Write down all necessary governing equations before substituting any values.
## Solution
Provide a step-by-step calculation. Each step must start on a new line and be formatted exactly as '**Step X:**', where X is the step number. Show the substitution of values into the formulae clearly.
## Final Answer
State the final numerical result with its units in the format: **Answer:** [value] [units]
Problem:
{question}
"""
# 2. Data Loading Function
def load_all_problems(directory: str) -> list:
"""
Walks through the nested directory structure, finds all .jsonl files,
and loads all problems into a single list.
"""
all_problems = []
print(f"Loading problems from '{directory}'...")
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.jsonl'):
file_path = os.path.join(root, file)
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
all_problems.append(json.loads(line))
print(f"Successfully loaded {len(all_problems)} problems.")
return all_problems
# 3. Main Inference Logic (with Smart Rate Limiter)
if __name__ == "__main__":
# Load all problems from the testset directory
problems = load_all_problems(INPUT_DIR)
if not problems:
print(f"Error: No problems found in '{INPUT_DIR}'. Please check the path.")
else:
# Initialize the generative model
model = genai.GenerativeModel(MODEL_NAME)
print(f"Initialized model: {MODEL_NAME}")
# Open the output file in write mode to start fresh
with open(OUTPUT_FILE, "w", encoding='utf-8') as f_out:
print(f"Starting inference... Results will be saved to '{OUTPUT_FILE}'")
# Initialize variables for rate limiting
request_counter = 0
start_time = time.time()
REQUESTS_PER_MINUTE = 8 # Our safe target
progress_bar = tqdm(problems, desc="Initializing Inference")
for problem in progress_bar:
# Rate Limiting Logic
# Check if we have made 10 requests
if request_counter >= REQUESTS_PER_MINUTE:
elapsed_time = time.time() - start_time
# If 10 requests took less than a minute, wait for the remainder
if elapsed_time < 60:
wait_time = 60 - elapsed_time
tqdm.write(f"Rate limit reached. Pausing for {wait_time:.2f} seconds...")
time.sleep(wait_time)
# Reset the counter and timer for the next batch of 10
request_counter = 0
start_time = time.time()
# Update the progress bar's description
branch = problem.get('branch', 'unknown_branch')
problem_id = problem.get('id', 'unknown_id')
progress_bar.set_description(f"Processing '{problem_id}' from '{branch}'")
prompt = PROMPT_TEMPLATE.format(question=problem['question'])
try:
# Call the Gemini API
response = model.generate_content(prompt)
problem['generation'] = response.text
except Exception as e:
tqdm.write(f"\nAn error occurred for problem ID {problem_id}: {e}")
problem['generation'] = f"ERROR: {e}"
# Wait longer if an error occurs (e.g., server-side issues)
time.sleep(60)
# Write the result to the output file immediately
f_out.write(json.dumps(problem) + '\n')
# Increment the request counter after a successful call
request_counter += 1
print(f"\nInference complete. All {len(problems)} results saved to '{OUTPUT_FILE}'.")