iLOVE2D's picture
Upload 2846 files
5374a2d verified
from dotenv import load_dotenv
from evoagentx.agents.agent_manager import AgentManager
from evoagentx.benchmark import MATH
from evoagentx.core.callbacks import suppress_logger_info
from evoagentx.core.logging import logger
from evoagentx.evaluators import Evaluator
from evoagentx.models import OpenAILLM, OpenAILLMConfig
from evoagentx.optimizers import TextGradOptimizer
from evoagentx.prompts import StringTemplate
from evoagentx.workflow import SequentialWorkFlowGraph
load_dotenv()
class MathSplits(MATH):
def _load_data(self):
# load the original test data
super()._load_data()
# split the data into train, dev and test
import numpy as np
np.random.seed(42)
permutation = np.random.permutation(len(self._test_data))
full_test_data = self._test_data
# randomly select 10 samples for train, 40 for dev, and 100 for test
self._train_data = [full_test_data[idx] for idx in permutation[:10]]
self._dev_data = [full_test_data[idx] for idx in permutation[10:50]]
self._test_data = [full_test_data[idx] for idx in permutation[50:150]]
def collate_func(example: dict) -> dict:
return {"problem": example["problem"]}
math_graph_data = {
"goal": r"Answer the math question. The answer should be in box format, e.g., \boxed{123}",
"tasks": [
{
"name": "answer_generate",
"description": "Answer generation for Math.",
"inputs": [
{"name": "problem", "type": "str", "required": True, "description": "The problem to solve."}
],
"outputs": [
{"name": "answer", "type": "str", "required": True, "description": "The generated answer."}
],
"prompt_template": StringTemplate(instruction="Answer the math question. The answer should be in box format, e.g., \\boxed{{123}}\n"),
"parse_mode": "str"
}
]
}
def main():
executor_config = OpenAILLMConfig(model="gpt-4o-mini")
executor_llm = OpenAILLM(config=executor_config)
optimizer_config = OpenAILLMConfig(model="gpt-4o")
optimizer_llm = OpenAILLM(config=optimizer_config)
benchmark = MathSplits()
workflow_graph = SequentialWorkFlowGraph.from_dict(math_graph_data)
agent_manager = AgentManager()
agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)
evaluator = Evaluator(
llm=executor_llm,
agent_manager=agent_manager,
collate_func=collate_func,
num_workers=20,
verbose=True
)
textgrad_optimizer = TextGradOptimizer(
graph=workflow_graph,
optimize_mode="all",
executor_llm=executor_llm,
optimizer_llm=optimizer_llm,
batch_size=3,
max_steps=20,
evaluator=evaluator,
eval_every_n_steps=1,
eval_rounds=1,
save_interval=None,
save_path="./",
rollback=True,
constraints=[]
)
logger.info("Evaluating workflow on test set...")
with suppress_logger_info():
results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode="test")
logger.info(f"Evaluation metrics (before optimization): {results}")
logger.info("Optimizing workflow...")
textgrad_optimizer.optimize(benchmark, seed=8)
textgrad_optimizer.restore_best_graph()
logger.info("Evaluating workflow on test set...")
with suppress_logger_info():
results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode="test")
logger.info(f"Evaluation metrics (after optimization): {results}")
if __name__ == "__main__":
main()