Engchain / utils /data_generator.py
usmansafdarktk
fix(UI): improve dropdowns and update UI
ff9e41b
import os
import importlib
import ast
def discover_templates(root_dir="data/templates/branches"):
"""
Recursively scans the directory structure under `root_dir`
to find all Python files that define functions starting with 'template_'.
Works with nested folders (e.g., branches/chemical_engineering/reaction_kinetics/...).
Returns:
dict: {
"chemical_engineering": {
"chemical_engineering/reaction_kinetics/mole_balances.py": ["template_mole_balance", ...],
...
},
"transport_phenomena": {...},
}
"""
discovered = {}
if not os.path.isdir(root_dir):
print(f"Error: Directory '{root_dir}' not found.")
return discovered
# Iterate through top-level domain directories
for domain in sorted(os.listdir(root_dir)):
domain_path = os.path.join(root_dir, domain)
if os.path.isdir(domain_path):
domain_templates = {}
# Recursively walk through subdirectories
for dirpath, _, filenames in os.walk(domain_path):
for filename in sorted(filenames):
if filename.endswith(".py"):
file_path = os.path.join(dirpath, filename)
try:
with open(file_path, "r", encoding="utf-8") as f:
file_content = f.read()
tree = ast.parse(file_content)
# Collect template function names
template_functions = [
node.name
for node in ast.walk(tree)
if isinstance(node, ast.FunctionDef)
and node.name.startswith("template_")
]
if template_functions:
# store relative path (used later for import)
rel_path = os.path.relpath(file_path, root_dir)
domain_templates[rel_path] = sorted(template_functions)
except Exception as e:
print(f"Error parsing {file_path}: {e}")
if domain_templates:
discovered[domain] = domain_templates
return discovered
def generate_examples(branch, domain, filename, template_name):
"""
Dynamically imports and runs a selected template function multiple times
to generate a list of {question, solution} objects.
"""
if not all([branch, domain, filename, template_name]):
print("Missing arguments to generate_examples()")
return []
try:
# Clean filename for import path
filename_no_ext = filename.replace(".py", "").replace(os.sep, ".")
module_path = f"data.templates.branches.{filename_no_ext}"
module = importlib.import_module(module_path)
template_function = getattr(module, template_name)
examples = []
for _ in range(10):
result = template_function()
if isinstance(result, tuple) and len(result) == 2:
question, solution = result
examples.append({"question": question, "solution": solution})
else:
print(f"Warning: {template_name} did not return (question, solution) tuple.")
return examples
except Exception as e:
print(f"Error running template '{template_name}' from {branch}/{domain}/{filename}: {e}")
return []