|
|
""" |
|
|
Multi-Agent EvoPrompt Workflow Example |
|
|
|
|
|
This script demonstrates multi-prompt evolution using ensemble voting strategies. |
|
|
It optimizes multiple prompts simultaneously to improve task performance through |
|
|
collaborative evolutionary optimization. |
|
|
""" |
|
|
|
|
|
import asyncio |
|
|
import os |
|
|
import re |
|
|
from collections import Counter |
|
|
|
|
|
from dotenv import load_dotenv |
|
|
from evoagentx.core.logging import logger |
|
|
|
|
|
from evoagentx.optimizers.evoprompt_optimizer import DEOptimizer, GAOptimizer |
|
|
from evoagentx.benchmark.bigbenchhard import BIGBenchHard |
|
|
from evoagentx.models import OpenAILLM, OpenAILLMConfig |
|
|
from evoagentx.optimizers.engine.registry import ParamRegistry |
|
|
|
|
|
|
|
|
class SarcasmClassifierProgram: |
|
|
""" |
|
|
Multi-prompt ensemble classifier using majority voting strategy. |
|
|
|
|
|
This program employs three independent prompt "voters" that can evolve |
|
|
independently to achieve better collective performance through diversity. |
|
|
""" |
|
|
|
|
|
def __init__(self, model: OpenAILLM): |
|
|
""" |
|
|
Initialize the multi-prompt ensemble classifier. |
|
|
|
|
|
Args: |
|
|
model: The language model to use for inference |
|
|
""" |
|
|
self.model = model |
|
|
|
|
|
|
|
|
self.prompt_direct = "As a straightforward responder, follow the task instruction exactly and provide the final answer." |
|
|
self.prompt_expert = "As an expert assistant, interpret the task instruction carefully and provide the final answer." |
|
|
self.prompt_cot = "As a thoughtful assistant, think step-by-step, then follow the task instruction and provide the final answer." |
|
|
self.task_instruction = "Respond with your final answer wrapped like this: FINAL_ANSWER(ANSWER)" |
|
|
|
|
|
def __call__(self, input: str) -> tuple[str, dict]: |
|
|
""" |
|
|
Execute ensemble prediction using majority voting. |
|
|
|
|
|
Args: |
|
|
input: The input text to process |
|
|
|
|
|
Returns: |
|
|
Tuple of (final_answer, metadata) |
|
|
""" |
|
|
answers = [] |
|
|
prompts = [self.prompt_direct, self.prompt_expert, self.prompt_cot] |
|
|
pattern = r"the answer is\s*(.*)" |
|
|
|
|
|
|
|
|
for prompt in prompts: |
|
|
full_prompt = f"{prompt}\n\n{self.task_instruction}\n\nText:\n{input}" |
|
|
response = self.model.generate(prompt=full_prompt) |
|
|
prediction = response.content.strip() |
|
|
|
|
|
|
|
|
match = re.search(pattern, prediction, re.IGNORECASE) |
|
|
if match: |
|
|
answers.append(match.group(1)) |
|
|
|
|
|
|
|
|
if not answers: |
|
|
return "N/A", {"votes": []} |
|
|
|
|
|
|
|
|
vote_counts = Counter(answers) |
|
|
most_common_answer = vote_counts.most_common(1)[0][0] |
|
|
|
|
|
return most_common_answer, {"votes": answers} |
|
|
|
|
|
def save(self, path: str): |
|
|
"""Save program state (placeholder for future implementation).""" |
|
|
pass |
|
|
|
|
|
def load(self, path: str): |
|
|
"""Load program state (placeholder for future implementation).""" |
|
|
pass |
|
|
|
|
|
async def main(): |
|
|
"""Main execution function for multi-agent EvoPrompt optimization.""" |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
|
|
if not OPENAI_API_KEY: |
|
|
raise ValueError("OPENAI_API_KEY not found in environment variables.") |
|
|
|
|
|
|
|
|
POPULATION_SIZE = 4 |
|
|
ITERATIONS = 10 |
|
|
CONCURRENCY_LIMIT = 100 |
|
|
COMBINATION_SAMPLE_SIZE = 3 |
|
|
DEV_SAMPLE_NUM = 15 |
|
|
|
|
|
|
|
|
evo_llm_config = OpenAILLMConfig( |
|
|
model="gpt-4.1-nano", |
|
|
openai_key=OPENAI_API_KEY, |
|
|
stream=False, |
|
|
top_p=0.95, |
|
|
temperature=0.5 |
|
|
) |
|
|
|
|
|
|
|
|
eval_llm_config = OpenAILLMConfig( |
|
|
model="gpt-4.1-nano", |
|
|
openai_key=OPENAI_API_KEY, |
|
|
stream=False, |
|
|
temperature=0 |
|
|
) |
|
|
llm = OpenAILLM(config=eval_llm_config) |
|
|
|
|
|
|
|
|
tasks = [ |
|
|
"snarks", |
|
|
"sports_understanding", |
|
|
"logical_deduction_three_objects", |
|
|
"dyck_languages", |
|
|
"multistep_arithmetic_two", |
|
|
] |
|
|
|
|
|
|
|
|
for task_name in tasks: |
|
|
logger.info(f"=== Task: {task_name} ===") |
|
|
|
|
|
|
|
|
benchmark = BIGBenchHard(task_name, dev_sample_num=DEV_SAMPLE_NUM, seed=10) |
|
|
program = SarcasmClassifierProgram(model=llm) |
|
|
|
|
|
|
|
|
registry = ParamRegistry() |
|
|
registry.track(program, "prompt_direct", name="direct_prompt_node") |
|
|
registry.track(program, "prompt_expert", name="expert_prompt_node") |
|
|
registry.track(program, "prompt_cot", name="cot_prompt_node") |
|
|
|
|
|
|
|
|
optimizer_DE = DEOptimizer( |
|
|
registry=registry, |
|
|
program=program, |
|
|
population_size=POPULATION_SIZE, |
|
|
iterations=ITERATIONS, |
|
|
llm_config=evo_llm_config, |
|
|
concurrency_limit=CONCURRENCY_LIMIT, |
|
|
combination_sample_size=COMBINATION_SAMPLE_SIZE, |
|
|
enable_logging=True |
|
|
) |
|
|
logger.info("Starting DE optimization...") |
|
|
await optimizer_DE.optimize(benchmark=benchmark) |
|
|
logger.info("DE optimization completed. Starting evaluation...") |
|
|
de_metrics = await optimizer_DE.evaluate(benchmark=benchmark, eval_mode="test") |
|
|
logger.info(f"DE results for {task_name}: {de_metrics['accuracy']}") |
|
|
|
|
|
|
|
|
optimizer_GA = GAOptimizer( |
|
|
registry=registry, |
|
|
program=program, |
|
|
population_size=POPULATION_SIZE, |
|
|
iterations=ITERATIONS, |
|
|
llm_config=evo_llm_config, |
|
|
concurrency_limit=CONCURRENCY_LIMIT, |
|
|
combination_sample_size=COMBINATION_SAMPLE_SIZE, |
|
|
enable_logging=True |
|
|
) |
|
|
logger.info("Starting GA optimization...") |
|
|
await optimizer_GA.optimize(benchmark=benchmark) |
|
|
logger.info("GA optimization completed. Starting evaluation...") |
|
|
ga_metrics = await optimizer_GA.evaluate(benchmark=benchmark, eval_mode="test") |
|
|
logger.info(f"GA results for {task_name}: {ga_metrics['accuracy']}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
asyncio.run(main()) |
|
|
|