|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
from evoagentx.agents import AgentManager |
|
|
from evoagentx.models import OpenAILLM, OpenAILLMConfig |
|
|
from evoagentx.benchmark import MATH |
|
|
from evoagentx.workflow import SequentialWorkFlowGraph |
|
|
from evoagentx.core.callbacks import suppress_logger_info |
|
|
from evoagentx.evaluators import Evaluator |
|
|
from evoagentx.core.logging import logger |
|
|
from evoagentx.prompts import MiproPromptTemplate |
|
|
from evoagentx.optimizers.mipro_optimizer import WorkFlowMiproOptimizer |
|
|
|
|
|
load_dotenv() |
|
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
|
|
|
|
|
class MathSplits(MATH): |
|
|
|
|
|
def _load_data(self): |
|
|
|
|
|
super()._load_data() |
|
|
|
|
|
import numpy as np |
|
|
np.random.seed(42) |
|
|
permutation = np.random.permutation(len(self._test_data)) |
|
|
full_test_data = self._test_data |
|
|
|
|
|
|
|
|
self._train_data = [full_test_data[idx] for idx in permutation[:100]] |
|
|
self._test_data = [full_test_data[idx] for idx in permutation[100:200]] |
|
|
|
|
|
def get_input_keys(self): |
|
|
return ["problem"] |
|
|
|
|
|
|
|
|
def collate_func(example: dict) -> dict: |
|
|
return {"problem": example["problem"]} |
|
|
|
|
|
|
|
|
math_graph_data = { |
|
|
"goal": r"Answer the math question. The answer should be in box format, e.g., \boxed{{123}}.", |
|
|
"tasks": [ |
|
|
{ |
|
|
"name": "answer_generate", |
|
|
"description": "Answer generation for Math.", |
|
|
"inputs": [ |
|
|
{"name": "problem", "type": "str", "required": True, "description": "The problem to solve."} |
|
|
], |
|
|
"outputs": [ |
|
|
{"name": "solution", "type": "str", "required": True, "description": "The generated answer."} |
|
|
], |
|
|
"prompt_template": MiproPromptTemplate( |
|
|
instruction=r"Let's think step by step to answer the math question.", |
|
|
), |
|
|
"parse_mode": "title" |
|
|
} |
|
|
] |
|
|
} |
|
|
|
|
|
def main(): |
|
|
|
|
|
openai_config = OpenAILLMConfig(model="gpt-4o-mini", openai_key=OPENAI_API_KEY, stream=True, output_response=False) |
|
|
executor_llm = OpenAILLM(config=openai_config) |
|
|
optimizer_config = OpenAILLMConfig(model="gpt-4o", openai_key=OPENAI_API_KEY, stream=True, output_response=False) |
|
|
optimizer_llm = OpenAILLM(config=optimizer_config) |
|
|
|
|
|
benchmark = MathSplits() |
|
|
workflow_graph: SequentialWorkFlowGraph = SequentialWorkFlowGraph.from_dict(math_graph_data) |
|
|
agent_manager = AgentManager() |
|
|
agent_manager.add_agents_from_workflow(workflow_graph, llm_config=openai_config) |
|
|
|
|
|
|
|
|
evaluator = Evaluator( |
|
|
llm = executor_llm, |
|
|
agent_manager = agent_manager, |
|
|
collate_func = collate_func, |
|
|
num_workers = 20, |
|
|
verbose = True |
|
|
) |
|
|
|
|
|
|
|
|
optimizer = WorkFlowMiproOptimizer( |
|
|
graph = workflow_graph, |
|
|
evaluator = evaluator, |
|
|
optimizer_llm = optimizer_llm, |
|
|
max_bootstrapped_demos = 4, |
|
|
max_labeled_demos = 4, |
|
|
eval_rounds = 1, |
|
|
auto = "medium", |
|
|
save_path = "examples/output/mipro/math_mipro", |
|
|
) |
|
|
|
|
|
logger.info("Optimizing workflow...") |
|
|
optimizer.optimize(dataset=benchmark) |
|
|
from pdb import set_trace; set_trace() |
|
|
optimizer.restore_best_program() |
|
|
|
|
|
logger.info("Evaluating program on test set...") |
|
|
with suppress_logger_info(): |
|
|
results = optimizer.evaluate(dataset=benchmark, eval_mode="test") |
|
|
logger.info(f"Evaluation metrics (after optimization): {results}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|