File size: 7,978 Bytes
5374a2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
"""
Single-Agent EvoPrompt Optimization Example

This script demonstrates single-prompt evolution using both GA and DE optimizers.
It optimizes a single chain-of-thought prefix prompt for better task performance.
"""

import asyncio
import os
import re

from dotenv import load_dotenv
from evoagentx.core.logging import logger

from evoagentx.optimizers.evoprompt_optimizer import DEOptimizer, GAOptimizer
from evoagentx.benchmark.bigbenchhard import BIGBenchHard
from evoagentx.models import OpenAILLM, OpenAILLMConfig
from evoagentx.optimizers.engine.registry import ParamRegistry


class SinglePromptProgram:
    """
    A program that uses a single, evolvable prompt to process tasks.
    
    This program uses few-shot learning combined with an evolvable chain-of-thought
    prefix to improve task performance through evolutionary optimization.
    """
    
    def __init__(self, model: OpenAILLM, task_name: str):
        """
        Initialize the single prompt program.
        
        Args:
            model: The language model to use for inference
            task_name: Name of the task for loading few-shot examples
        """
        self.model = model
        self.task_name = task_name
        
        # Load task-specific few-shot prompt examples
        lib_path = os.path.join(os.path.dirname(__file__), 'lib_prompt', f'{task_name}.txt')
        try:
            with open(lib_path, 'r', encoding='utf-8') as f:
                examples = [line.strip() for line in f if line.strip()]
        except FileNotFoundError:
            examples = []
        self.fewshot_prompt = '\n'.join(examples)
        
        # Evolvable chain-of-thought prefixes - optimizer will evolve these
        self.chain_of_thought_prefix = [
            "Let's think step by step.",
            "Let's work this out in a step by step way to be sure we have the right answer.",
            "First,",
            "Let's think about this logically.",
            "Let's solve this problem by splitting it into steps.",
            "Let's be realistic and think step by step.",
            "Let's think like a detective step by step.",
            "Let's think",
            "Before we dive into the answer,",
            "The answer is after the proof.",
            "Let's break this problem down step by step.",
            "We'll tackle this math task one piece at a time.",
            "Let's approach this logically, step by step.",
            "We'll solve this by analyzing each part of the problem.",
            "Let's unravel this mathematical challenge gradually.",
            "We'll methodically work through this problem together.",
            "Let's systematically dissect this math task.",
            "We'll take this mathematical reasoning challenge one step at a time.",
            "Let's meticulously examine each aspect of this problem.",
            "We'll thoughtfully progress through this task step by step."
        ]
        self.task_prompt = "Please provide the answer in the format: 'the answer is ."
    
    def __call__(self, input: str) -> tuple[str, dict]:
        """
        Execute the program with the given input.
        
        Args:
            input: The input text to process
            
        Returns:
            Tuple of (answer, metadata)
        """
        # Select current prompt prefix (after optimization, may be string instead of list)
        prefix = (self.chain_of_thought_prefix[0] 
                 if isinstance(self.chain_of_thought_prefix, list) 
                 else self.chain_of_thought_prefix)
        
        # Build few-shot prompt
        prompt_body = []
        if self.fewshot_prompt:
            # Replace all '<prompt>' placeholders with current prefix
            prompt_body.append(self.fewshot_prompt.replace("<prompt>", prefix))
        prompt_body.append(f"Q: {input}")
        prompt_body.append(f"A: {prefix}")
        full_prompt = f'\n'.join(prompt_body) + f"{self.task_prompt}"
        
        # Call model and extract answer
        response = self.model.generate(prompt=full_prompt)
        text = response.content.strip()
        
        # Match 'the answer is (B)' and extract content after 'is'
        match = re.search(r"the answer is\s*(.*)", text, re.IGNORECASE)
        answer = match.group(1).strip().rstrip('.') if match else "N/A"
        
        return answer, {"full_prompt": full_prompt}

    def save(self, path: str):
        """Save the program state (placeholder for future implementation)."""
        pass

    def load(self, path: str):
        """Load the program state (placeholder for future implementation)."""
        pass


async def main():
    """Main execution function for single-agent EvoPrompt optimization."""
    
    # Load environment variables
    load_dotenv()
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
    if not OPENAI_API_KEY:
        raise ValueError("OPENAI_API_KEY not found in environment variables.")

    # Configuration parameters
    POPULATION_SIZE = 10
    ITERATIONS = 10
    CONCURRENCY_LIMIT = 7
    DEV_SAMPLE_NUM = 50

    # Configure LLM for evolution
    evo_llm_config = OpenAILLMConfig(
        model="gpt-4.1-nano",
        openai_key=OPENAI_API_KEY,
        stream=False,
        top_p=0.95,
        temperature=0.5
    )

    # Configure LLM for evaluation
    eval_llm_config = OpenAILLMConfig(
        model="gpt-4.1-nano",
        openai_key=OPENAI_API_KEY,
        stream=False,
        temperature=0
    )
    llm = OpenAILLM(config=eval_llm_config)

    # Tasks to optimize
    tasks = [
        "geometric_shapes",
        "multistep_arithmetic_two"
    ]
    
    # Run optimization for each task
    for task_name in tasks:
        logger.info(f"=== Task: {task_name} ===")
        
        # Set up benchmark and program
        benchmark = BIGBenchHard(task_name, dev_sample_num=DEV_SAMPLE_NUM, seed=10)
        program = SinglePromptProgram(model=llm, task_name=task_name)
        
        # Register single prompt node for optimization
        registry = ParamRegistry()
        registry.track(program, "chain_of_thought_prefix", name="cot_prefix_node")

        # Differential Evolution optimizer
        logger.info(f"Creating DE optimizer with concurrency_limit={CONCURRENCY_LIMIT}")
        optimizer_DE = DEOptimizer(
            registry=registry,
            program=program,
            population_size=POPULATION_SIZE,
            iterations=ITERATIONS,
            llm_config=evo_llm_config,
            concurrency_limit=CONCURRENCY_LIMIT,
            enable_logging=True,
            enable_early_stopping=True,
            early_stopping_patience=10
        )
        
        logger.info("Starting DE optimization...")
        await optimizer_DE.optimize(benchmark=benchmark)
        logger.info("DE optimization completed. Starting evaluation...")
        de_metrics = await optimizer_DE.evaluate(benchmark=benchmark, eval_mode="test")
        logger.info("DE evaluation completed.")
        logger.info(f"DE results for {task_name}: {de_metrics['accuracy']}")

        # Genetic Algorithm optimizer
        logger.info(f"Creating GA optimizer with concurrency_limit={CONCURRENCY_LIMIT}")
        optimizer_GA = GAOptimizer(
            registry=registry,
            program=program,
            population_size=POPULATION_SIZE,
            iterations=ITERATIONS,
            llm_config=evo_llm_config,
            concurrency_limit=CONCURRENCY_LIMIT,
            enable_logging=True,
            enable_early_stopping=True,
            early_stopping_patience=10
        )
        
        logger.info("Starting GA optimization...")
        await optimizer_GA.optimize(benchmark=benchmark)
        logger.info("GA optimization completed. Starting evaluation...")
        ga_metrics = await optimizer_GA.evaluate(benchmark=benchmark, eval_mode="test")
        logger.info(f"GA results for {task_name}: {ga_metrics['accuracy']}")


if __name__ == "__main__":
    asyncio.run(main())