| import asyncio |
| import os |
| import time |
| from dataclasses import dataclass |
| from typing import List, Optional, AsyncGenerator |
| import gradio as gr |
| from dotenv import load_dotenv |
| from langchain_openai import ChatOpenAI |
| from rich.console import Console |
| from rich.panel import Panel |
| from rich.text import Text |
| from logger import setup_logger, log_execution_time, log_async_execution_time |
|
|
| from browser_use import Agent, Browser |
| from browser_use.browser.browser import BrowserContext |
| from api_clients import OpenRouterClient, ElevenLabsClient |
|
|
| load_dotenv() |
|
|
| console = Console() |
| logger = setup_logger("interface") |
|
|
| @dataclass |
| class ActionResult: |
| is_done: bool |
| extracted_content: Optional[str] |
| error: Optional[str] |
| include_in_memory: bool |
|
|
|
|
| @dataclass |
| class AgentHistoryList: |
| all_results: List[ActionResult] |
| all_model_outputs: List[dict] |
|
|
|
|
| def parse_agent_history(history_str: str) -> None: |
| |
| sections = history_str.split('ActionResult(') |
|
|
| for i, section in enumerate(sections[1:], 1): |
| |
| content = '' |
| if 'extracted_content=' in section: |
| content = section.split('extracted_content=')[1].split(',')[0].strip("'") |
|
|
| if content: |
| header = Text(f'Step {i}', style='bold blue') |
| panel = Panel(content, title=header, border_style='blue') |
| console.print(panel) |
| console.print() |
|
|
|
|
| async def run_browser_task( |
| task: str, |
| api_key: str, |
| provider: str = 'openai', |
| model: str = 'gpt-4-vision', |
| headless: bool = True, |
| ) -> str: |
| if not api_key.strip(): |
| return 'Please provide an API key' |
|
|
| if provider == 'openai': |
| os.environ['OPENAI_API_KEY'] = api_key |
| llm = ChatOpenAI(model=model) |
| elif provider == 'anthropic': |
| os.environ['ANTHROPIC_API_KEY'] = api_key |
| llm = ChatAnthropic(model=model) |
| else: |
| os.environ['GOOGLE_API_KEY'] = api_key |
| llm = ChatGoogleGenerativeAI(model=model) |
|
|
| try: |
| agent = Agent( |
| task=task, |
| llm=llm, |
| browser=Browser(BrowserContext(headless=True)) |
| ) |
| result = await agent.run() |
| |
| return result |
| except Exception as e: |
| return f'Error: {str(e)}' |
|
|
|
|
| @log_async_execution_time(logger) |
| async def scrape_content(url: str) -> str: |
| """ |
| Scrape and summarize content from the given URL using browser automation |
| |
| This function performs the following steps: |
| 1. Validates the input URL |
| 2. Initializes the browser agent |
| 3. Extracts and summarizes the content |
| |
| Args: |
| url: Target URL to scrape |
| |
| Returns: |
| Summarized content suitable for podcast generation |
| |
| Raises: |
| ValueError: If URL is invalid or content extraction fails |
| """ |
| logger.info(f"Starting content scrape for URL: {url}") |
| |
| |
| if not url.startswith(('http://', 'https://')): |
| logger.error(f"Invalid URL format: {url}") |
| raise ValueError("URL must start with http:// or https://") |
| |
| try: |
| logger.debug("Initializing LLM and browser agent") |
| llm = ChatOpenAI(model="gpt-4") |
| agent = Agent( |
| task=f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way.", |
| llm=llm, |
| browser=Browser(BrowserContext(headless=True)) |
| ) |
| |
| logger.info("Executing content extraction") |
| result = await agent.run() |
| |
| logger.debug(f"Content extraction successful. Length: {len(result)} chars") |
| logger.debug(f"Content preview: {result[:200]}...") |
| |
| return result |
| except Exception as e: |
| logger.error(f"Content extraction failed for {url}", exc_info=True) |
| raise |
|
|
| @log_async_execution_time(logger) |
| async def create_podcast( |
| url: str, |
| prompt: str, |
| elevenlabs_key: str, |
| voice_id: str, |
| openrouter_key: str, |
| model_id: str, |
| ) -> AsyncGenerator[tuple[Optional[str], str], None]: |
| """ |
| Create a podcast through a multi-step process: |
| 1. Content extraction from URL |
| 2. Script generation using AI |
| 3. Voice synthesis |
| |
| Progress updates are yielded at each step for UI feedback. |
| """ |
| logger.info(f"Starting podcast creation for URL: {url}") |
| logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}") |
| logger.debug(f"Prompt length: {len(prompt)} chars") |
| |
| try: |
| |
| logger.debug("Initializing API clients") |
| openrouter = OpenRouterClient(openrouter_key) |
| elevenlabs = ElevenLabsClient(elevenlabs_key) |
| |
| |
| logger.info("Phase 1/3: Content scraping") |
| yield None, "Scraping website content..." |
| content = await scrape_content(url) |
| logger.debug(f"Scraped content length: {len(content)} chars") |
| |
| |
| logger.info("Phase 2/3: Script generation") |
| yield None, "Generating podcast script..." |
| script = await openrouter.generate_script(content, prompt, model_id) |
| logger.debug(f"Generated script length: {len(script)} chars") |
| |
| |
| logger.info("Phase 3/3: Audio generation") |
| yield None, "Converting to audio..." |
| audio = elevenlabs.generate_audio(script, voice_id) |
| logger.debug(f"Generated audio size: {len(audio)} bytes") |
| |
| |
| audio_path = f"podcast_{int(time.time())}.mp3" |
| logger.debug(f"Saving audio to: {audio_path}") |
| with open(audio_path, "wb") as f: |
| f.write(audio) |
| |
| logger.info("Podcast creation completed successfully") |
| yield audio_path, "Podcast created successfully!" |
| |
| except Exception as e: |
| logger.error("Podcast creation failed", exc_info=True) |
| yield None, f"Error: {str(e)}" |
|
|
| def create_ui(): |
| logger.info("Initializing Gradio interface") |
| |
| |
| default_voices = [("", "Enter API key to load voices")] |
| default_models = [("", "Enter API key to load models")] |
| |
| with gr.Blocks(title='PodcastCreator', theme=gr.themes.Soft()) as interface: |
| with gr.Row(): |
| with gr.Column(scale=2): |
| url_input = gr.Textbox(label='Source URL', placeholder='Enter the URL...') |
| prompt = gr.Textbox(label='Podcast Topic', lines=3) |
| |
| with gr.Row(): |
| with gr.Column(): |
| elevenlabs_key = gr.Textbox( |
| label='ElevenLabs API Key', |
| type='password', |
| placeholder='Enter key...' |
| ) |
| voice = gr.Dropdown( |
| label='Voice', |
| choices=default_voices, |
| value=None, |
| allow_custom_value=True |
| ) |
| |
| with gr.Column(): |
| openrouter_key = gr.Textbox( |
| label='OpenRouter API Key', |
| type='password', |
| placeholder='Enter key...' |
| ) |
| model = gr.Dropdown( |
| label='AI Model', |
| choices=default_models, |
| value=None, |
| allow_custom_value=True |
| ) |
| |
| submit_btn = gr.Button('Create Podcast', variant='primary') |
|
|
| with gr.Column(scale=1): |
| audio_output = gr.Audio(label="Generated Podcast") |
| status = gr.Textbox(label='Status', interactive=False) |
|
|
| |
| def update_voices(key): |
| if not key: |
| return gr.Dropdown(choices=default_voices, value=default_voices[0][0]) |
| try: |
| client = ElevenLabsClient(key) |
| voices = client.get_voices() |
| return gr.Dropdown(choices=voices, value=voices[0][0] if voices else None) |
| except Exception as e: |
| logger.error(f"Failed to load voices: {e}") |
| return gr.Dropdown(choices=[(None, f"Error: {str(e)}")], value=None) |
|
|
| async def update_models(key): |
| if not key: |
| return gr.Dropdown(choices=default_models, value=default_models[0][0]) |
| try: |
| client = OpenRouterClient(key) |
| models = await client.get_models() |
| return gr.Dropdown(choices=models, value=models[0][0] if models else None) |
| except Exception as e: |
| logger.error(f"Failed to load models: {e}") |
| return gr.Dropdown(choices=[(None, f"Error: {str(e)}")], value=None) |
|
|
| |
| try: |
| elevenlabs_key.change(fn=update_voices, inputs=elevenlabs_key, outputs=voice) |
| openrouter_key.change(fn=update_models, inputs=openrouter_key, outputs=model) |
| |
| submit_btn.click( |
| fn=create_podcast, |
| inputs=[url_input, prompt, elevenlabs_key, voice, openrouter_key, model], |
| outputs=[audio_output, status] |
| ) |
| except Exception as e: |
| logger.error(f"Failed to set up event handlers: {e}") |
| raise |
|
|
| logger.info("Gradio interface initialized successfully") |
| return interface |
|
|
| if __name__ == '__main__': |
| demo = create_ui() |
| demo.launch() |