File size: 7,978 Bytes

5374a2d

"""
Single-Agent EvoPrompt Optimization Example

This script demonstrates single-prompt evolution using both GA and DE optimizers.
It optimizes a single chain-of-thought prefix prompt for better task performance.
"""

import asyncio
import os
import re

from dotenv import load_dotenv
from evoagentx.core.logging import logger

from evoagentx.optimizers.evoprompt_optimizer import DEOptimizer, GAOptimizer
from evoagentx.benchmark.bigbenchhard import BIGBenchHard
from evoagentx.models import OpenAILLM, OpenAILLMConfig
from evoagentx.optimizers.engine.registry import ParamRegistry


class SinglePromptProgram:
    """
    A program that uses a single, evolvable prompt to process tasks.
    
    This program uses few-shot learning combined with an evolvable chain-of-thought
    prefix to improve task performance through evolutionary optimization.
    """
    
    def __init__(self, model: OpenAILLM, task_name: str):
        """
        Initialize the single prompt program.
        
        Args:
            model: The language model to use for inference
            task_name: Name of the task for loading few-shot examples
        """
        self.model = model
        self.task_name = task_name
        
        # Load task-specific few-shot prompt examples
        lib_path = os.path.join(os.path.dirname(__file__), 'lib_prompt', f'{task_name}.txt')
        try:
            with open(lib_path, 'r', encoding='utf-8') as f:
                examples = [line.strip() for line in f if line.strip()]
        except FileNotFoundError:
            examples = []
        self.fewshot_prompt = '\n'.join(examples)
        
        # Evolvable chain-of-thought prefixes - optimizer will evolve these
        self.chain_of_thought_prefix = [
            "Let's think step by step.",
            "Let's work this out in a step by step way to be sure we have the right answer.",
            "First,",
            "Let's think about this logically.",
            "Let's solve this problem by splitting it into steps.",
            "Let's be realistic and think step by step.",
            "Let's think like a detective step by step.",
            "Let's think",
            "Before we dive into the answer,",
            "The answer is after the proof.",
            "Let's break this problem down step by step.",
            "We'll tackle this math task one piece at a time.",
            "Let's approach this logically, step by step.",
            "We'll solve this by analyzing each part of the problem.",
            "Let's unravel this mathematical challenge gradually.",
            "We'll methodically work through this problem together.",
            "Let's systematically dissect this math task.",
            "We'll take this mathematical reasoning challenge one step at a time.",
            "Let's meticulously examine each aspect of this problem.",
            "We'll thoughtfully progress through this task step by step."
        ]
        self.task_prompt = "Please provide the answer in the format: 'the answer is ."
    
    def __call__(self, input: str) -> tuple[str, dict]:
        """
        Execute the program with the given input.
        
        Args:
            input: The input text to process
            
        Returns:
            Tuple of (answer, metadata)
        """
        # Select current prompt prefix (after optimization, may be string instead of list)
        prefix = (self.chain_of_thought_prefix[0] 
                 if isinstance(self.chain_of_thought_prefix, list) 
                 else self.chain_of_thought_prefix)
        
        # Build few-shot prompt
        prompt_body = []
        if self.fewshot_prompt:
            # Replace all '<prompt>' placeholders with current prefix
            prompt_body.append(self.fewshot_prompt.replace("<prompt>", prefix))
        prompt_body.append(f"Q: {input}")
        prompt_body.append(f"A: {prefix}")
        full_prompt = f'\n'.join(prompt_body) + f"{self.task_prompt}"
        
        # Call model and extract answer
        response = self.model.generate(prompt=full_prompt)
        text = response.content.strip()
        
        # Match 'the answer is (B)' and extract content after 'is'
        match = re.search(r"the answer is\s*(.*)", text, re.IGNORECASE)
        answer = match.group(1).strip().rstrip('.') if match else "N/A"
        
        return answer, {"full_prompt": full_prompt}

    def save(self, path: str):
        """Save the program state (placeholder for future implementation)."""
        pass

    def load(self, path: str):
        """Load the program state (placeholder for future implementation)."""
        pass


async def main():
    """Main execution function for single-agent EvoPrompt optimization."""
    
    # Load environment variables
    load_dotenv()
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
    if not OPENAI_API_KEY:
        raise ValueError("OPENAI_API_KEY not found in environment variables.")

    # Configuration parameters
    POPULATION_SIZE = 10
    ITERATIONS = 10
    CONCURRENCY_LIMIT = 7
    DEV_SAMPLE_NUM = 50

    # Configure LLM for evolution
    evo_llm_config = OpenAILLMConfig(
        model="gpt-4.1-nano",
        openai_key=OPENAI_API_KEY,
        stream=False,
        top_p=0.95,
        temperature=0.5
    )

    # Configure LLM for evaluation
    eval_llm_config = OpenAILLMConfig(
        model="gpt-4.1-nano",
        openai_key=OPENAI_API_KEY,
        stream=False,
        temperature=0
    )
    llm = OpenAILLM(config=eval_llm_config)

    # Tasks to optimize
    tasks = [
        "geometric_shapes",
        "multistep_arithmetic_two"
    ]
    
    # Run optimization for each task
    for task_name in tasks:
        logger.info(f"=== Task: {task_name} ===")
        
        # Set up benchmark and program
        benchmark = BIGBenchHard(task_name, dev_sample_num=DEV_SAMPLE_NUM, seed=10)
        program = SinglePromptProgram(model=llm, task_name=task_name)
        
        # Register single prompt node for optimization
        registry = ParamRegistry()
        registry.track(program, "chain_of_thought_prefix", name="cot_prefix_node")

        # Differential Evolution optimizer
        logger.info(f"Creating DE optimizer with concurrency_limit={CONCURRENCY_LIMIT}")
        optimizer_DE = DEOptimizer(
            registry=registry,
            program=program,
            population_size=POPULATION_SIZE,
            iterations=ITERATIONS,
            llm_config=evo_llm_config,
            concurrency_limit=CONCURRENCY_LIMIT,
            enable_logging=True,
            enable_early_stopping=True,
            early_stopping_patience=10
        )
        
        logger.info("Starting DE optimization...")
        await optimizer_DE.optimize(benchmark=benchmark)
        logger.info("DE optimization completed. Starting evaluation...")
        de_metrics = await optimizer_DE.evaluate(benchmark=benchmark, eval_mode="test")
        logger.info("DE evaluation completed.")
        logger.info(f"DE results for {task_name}: {de_metrics['accuracy']}")

        # Genetic Algorithm optimizer
        logger.info(f"Creating GA optimizer with concurrency_limit={CONCURRENCY_LIMIT}")
        optimizer_GA = GAOptimizer(
            registry=registry,
            program=program,
            population_size=POPULATION_SIZE,
            iterations=ITERATIONS,
            llm_config=evo_llm_config,
            concurrency_limit=CONCURRENCY_LIMIT,
            enable_logging=True,
            enable_early_stopping=True,
            early_stopping_patience=10
        )
        
        logger.info("Starting GA optimization...")
        await optimizer_GA.optimize(benchmark=benchmark)
        logger.info("GA optimization completed. Starting evaluation...")
        ga_metrics = await optimizer_GA.evaluate(benchmark=benchmark, eval_mode="test")
        logger.info(f"GA results for {task_name}: {ga_metrics['accuracy']}")


if __name__ == "__main__":
    asyncio.run(main())