Smart_Confidant

Configuration error

File size: 17,904 Bytes

a44add2
 
 
 
 
9780e52
a44add2
9780e52
 
1dc1bd1
 
a44add2
 
 
24ed9c5
 
 
 
 
 
 
 
 
 
9780e52
a44add2
e301d09
a44add2
 
 
24ed9c5
1a4c576
c0b3a22
e301d09
a44add2
78f4f84
 
 
 
 
 
a44add2
9780e52
 
 
a44add2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24ed9c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a44add2
 
 
 
 
1dc1bd1
 
 
 
 
 
a44add2
4b38e3a
a44add2
1dc1bd1
 
a44add2
 
 
 
 
 
 
 
 
 
 
1dc1bd1
a44add2
 
 
 
 
 
 
1dc1bd1
a44add2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dc1bd1
a44add2
 
 
 
 
 
1dc1bd1
a44add2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dc1bd1
a44add2
 
 
 
 
 
1dc1bd1
a44add2
 
 
 
 
1dc1bd1
a44add2
 
 
 
 
 
 
 
 
1dc1bd1
a44add2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dc1bd1
e301d09
9780e52
a44add2
 
 
3453dc9
9780e52
 
 
 
 
 
 
78f4f84
9780e52
a44add2
 
24ed9c5
a44add2
 
 
 
 
 
 
 
24ed9c5
a44add2
 
 
9780e52
24ed9c5
 
 
 
 
a44add2
 
 
 
9780e52
a44add2
 
 
 
 
9780e52
a44add2
 
 
24ed9c5
 
 
 
 
 
a44add2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9780e52
a44add2
 
 
9780e52
a44add2
 
 
 
 
 
 
 
 
 
9780e52
a44add2
 
 
24ed9c5
 
 
 
 
a44add2
9780e52
a44add2
24ed9c5
 
 
a44add2
 
 
 
 
24ed9c5
 
 
a44add2
 
 
 
9780e52
a44add2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9780e52
a44add2
 
 
 
 
 
 
 
 
 
 
 
24ed9c5
 
 
 
 
a44add2
24ed9c5
a44add2
24ed9c5
 
 
a44add2
 
 
 
9780e52
a44add2
24ed9c5
 
 
a44add2
 
 
 
24ed9c5
 
 
a44add2
 
 
 
 
 
24ed9c5
 
 
 
a44add2
 
 
24ed9c5
 
 
 
 
 
 
 
0c6cc3d
 
24ed9c5
0c6cc3d
58d6f74
a44add2
58d6f74
 
 
 
 
24ed9c5
58d6f74
a44add2
e872704
 
 
 
58d6f74
 
 
 
 
e872704
 
0c6cc3d
9780e52
a44add2
 
 
 
9780e52
a44add2
 
 
 
 
24ed9c5
 
 
 
 
 
a44add2
24ed9c5

"""
Smart Confidant - A Magic: The Gathering chatbot with support for local and API-based LLMs.
Supports both local transformers models and HuggingFace API models with custom theming.
"""

import gradio as gr
from gradio.themes.base import Base
from huggingface_hub import InferenceClient
import os
import base64
from pathlib import Path
import traceback
from datetime import datetime
from threading import Lock
import time
from prometheus_client import start_http_server, Counter, Summary, Gauge

# Load environment variables from .env file
try:
    from dotenv import load_dotenv
    load_dotenv()
except ImportError:
    # If python-dotenv not installed, skip (will use system env vars only)
    pass

# ============================================================================
# Configuration
# ============================================================================

LOCAL_MODELS = ["arnir0/Tiny-LLM"]
API_MODELS = ["meta-llama/Llama-3.2-3B-Instruct"]
DEFAULT_SYSTEM_MESSAGE = "You are an expert assistant for Magic: The Gathering. You're name is Smart Confidant, but people tend to call you Bob."
TITLE = "🎓🧙🏻‍♂️ Smart Confidant 🧙🏻‍♂️🎓"

# Create labeled model options for the radio selector
MODEL_OPTIONS = []
for model in LOCAL_MODELS:
    MODEL_OPTIONS.append(f"{model} (local)")
for model in API_MODELS:
    MODEL_OPTIONS.append(f"{model} (api)")

# Global state for local model pipeline (cached across requests)
pipe = None
stop_inference = False

# Debug logging setup with thread-safe access
debug_logs = []
debug_lock = Lock()
MAX_LOG_LINES = 100

# ============================================================================
# Debug Logging Functions
# ============================================================================

def log_debug(message, level="INFO"):
    """Add timestamped message to debug log (thread-safe, rotating buffer)."""
    timestamp = datetime.now().strftime("%H:%M:%S")
    log_entry = f"[{timestamp}] [{level}] {message}"
    with debug_lock:
        debug_logs.append(log_entry)
        if len(debug_logs) > MAX_LOG_LINES:
            debug_logs.pop(0)
    print(log_entry)
    return "\n".join(debug_logs)

def get_debug_logs():
    """Retrieve all debug logs as a single string."""
    with debug_lock:
        return "\n".join(debug_logs)

# ============================================================================
# Prometheus Metrics
# ============================================================================

# Core request metrics
REQUEST_COUNTER = Counter('smart_confidant_requests_total', 'Total number of chat requests')
SUCCESSFUL_REQUESTS = Counter('smart_confidant_successful_requests_total', 'Total number of successful requests')
FAILED_REQUESTS = Counter('smart_confidant_failed_requests_total', 'Total number of failed requests')
REQUEST_DURATION = Summary('smart_confidant_request_duration_seconds', 'Time spent processing request')

# Enhanced chatbot metrics
MODEL_SELECTION_COUNTER = Counter('smart_confidant_model_selections_total',
                                   'Count of model selections',
                                   ['model_name', 'model_type'])
TOKEN_COUNT = Summary('smart_confidant_tokens_generated', 'Number of tokens generated per response')
CONVERSATION_LENGTH = Gauge('smart_confidant_conversation_length', 'Number of messages in current conversation')
ERROR_BY_TYPE = Counter('smart_confidant_errors_by_type_total',
                       'Count of errors by type',
                       ['error_type'])

# ============================================================================
# Asset Loading & Theme Configuration
# ============================================================================

# Load background image as base64 data URL for CSS injection
ASSETS_DIR = Path(__file__).parent / "assets"
BACKGROUND_IMAGE_PATH = ASSETS_DIR / "confidant_pattern.png"
try:
    with open(BACKGROUND_IMAGE_PATH, "rb") as _img_f:
        _encoded_img = base64.b64encode(_img_f.read()).decode("ascii")
        BACKGROUND_DATA_URL = f"data:image/png;base64,{_encoded_img}"
    log_debug("Background image loaded successfully")
except Exception as e:
    log_debug(f"Error loading background image: {e}", "ERROR")
    BACKGROUND_DATA_URL = ""

class TransparentTheme(Base):
    """Custom Gradio theme with transparent body background to show tiled image."""
    def __init__(self):
        super().__init__()
        super().set(
            body_background_fill="*neutral_950",
            body_background_fill_dark="*neutral_950",
        )

# Custom CSS for dark theme with tiled background image
# Uses aggressive selectors to override Gradio's default styling
fancy_css = f"""
    /* Tiled background image on page body */
    body {{
        background-image: url('{BACKGROUND_DATA_URL}') !important;
        background-repeat: repeat !important;
        background-size: auto !important;
        background-attachment: fixed !important;
        background-color: #1a1a1a !important;
    }}
    
    /* Make Gradio wrapper divs transparent to show background */
    gradio-app,
    .gradio-container,
    .gradio-container > div,
    .gradio-container > div > div,
    .main,
    .contain,
    [class*="svelte"] > div,
    div[class*="wrap"]:not(.gr-button):not([class*="input"]):not([class*="textbox"]):not([class*="bubble"]):not([class*="message"]),
    div[class*="container"]:not([class*="input"]):not([class*="button"]) {{
        background: transparent !important;
        background-color: transparent !important;
        background-image: none !important;
    }}
    
    /* Center and constrain main container */
    .gradio-container {{
        max-width: 700px !important;
        margin: 0 auto !important;
        padding: 20px !important;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1) !important;
        border-radius: 10px !important;
        font-family: 'Arial', sans-serif !important;
    }}
    
    /* Green title banner */
    #title {{
        text-align: center !important;
        font-size: 2em !important;
        margin-bottom: 20px !important;
        color: #ffffff !important;
        background-color: #4CAF50 !important;
        padding: 20px !important;
        border-radius: 10px !important;
        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3) !important;
    }}
    
    /* Dark grey backgrounds for chatbot and settings components */
    .block.svelte-12cmxck {{
        background-color: rgba(60, 60, 60, 0.95) !important;
        border-radius: 10px !important;
    }}
    
    div[class*="bubble-wrap"],
    div[class*="message-wrap"] {{
        background-color: rgba(60, 60, 60, 0.95) !important;
        border-radius: 10px !important;
        padding: 15px !important;
    }}
    
    .label-wrap,
    div[class*="accordion"] {{
        background-color: rgba(60, 60, 60, 0.95) !important;
        border-radius: 10px !important;
    }}
    
    /* White text for readability on dark backgrounds */
    .block.svelte-12cmxck,
    .block.svelte-12cmxck *,
    div[class*="bubble-wrap"] *,
    div[class*="message-wrap"] *,
    .label-wrap,
    .label-wrap * {{
        color: #ffffff !important;
    }}
    
    /* Green buttons with hover effect */
    .gr-button,
    button {{
        background-color: #4CAF50 !important;
        background-image: none !important;
        color: white !important;
        border: none !important;
        border-radius: 5px !important;
        padding: 10px 20px !important;
        cursor: pointer !important;
        transition: background-color 0.3s ease !important;
    }}
    .gr-button:hover,
    button:hover {{
        background-color: #45a049 !important;
    }}
    .gr-slider input {{
        color: #4CAF50 !important;
    }}
    """

# ============================================================================
# Chat Response Handler
# ============================================================================

def respond(
    message,
    history: list[dict[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    selected_model: str,
):
    """
    Handle chat responses using either local transformers models or HuggingFace API.

    Args:
        message: User's input message
        history: List of previous messages in conversation
        system_message: System prompt to guide model behavior
        max_tokens: Maximum tokens to generate
        temperature: Sampling temperature (higher = more random)
        top_p: Nucleus sampling threshold
        selected_model: Model identifier with "(local)" or "(api)" suffix

    Yields:
        str: Generated response text or error message
    """
    global pipe

    # Prometheus metrics: Track request start
    REQUEST_COUNTER.inc()
    start_time = time.perf_counter()

    try:
        log_debug(f"New message received: '{message[:50]}...'")
        log_debug(f"Selected model: {selected_model}")
        log_debug(f"Parameters - max_tokens: {max_tokens}, temp: {temperature}, top_p: {top_p}")

        # Build complete message history with system prompt
        messages = [{"role": "system", "content": system_message}]
        messages.extend(history)
        messages.append({"role": "user", "content": message})
        log_debug(f"Message history length: {len(messages)}")

        # Parse model type and name from selection
        is_local = selected_model.endswith("(local)")
        model_name = selected_model.replace(" (local)", "").replace(" (api)", "")

        # Prometheus metrics: Track model selection and conversation length
        model_type = "local" if is_local else "api"
        MODEL_SELECTION_COUNTER.labels(model_name=model_name, model_type=model_type).inc()
        CONVERSATION_LENGTH.set(len(messages))

        response = ""

        if is_local:
            # ===== LOCAL MODEL PATH =====
            log_debug(f"Using LOCAL mode with model: {model_name}")
            try:
                from transformers import pipeline
                import torch
                log_debug("Transformers imported successfully")
                
                # Load or reuse cached pipeline
                if pipe is None or pipe.model.name_or_path != model_name:
                    log_debug(f"Loading model pipeline for: {model_name}")
                    pipe = pipeline("text-generation", model=model_name)
                    log_debug("Model pipeline loaded successfully")
                else:
                    log_debug("Using cached model pipeline")

                # Format conversation as plain text prompt
                prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
                log_debug(f"Prompt length: {len(prompt)} characters")

                # Run inference
                log_debug("Starting inference...")
                outputs = pipe(
                    prompt,
                    max_new_tokens=max_tokens,
                    do_sample=True,
                    temperature=temperature,
                    top_p=top_p,
                )
                log_debug("Inference completed")

                # Extract new tokens only (strip original prompt)
                response = outputs[0]["generated_text"][len(prompt):]
                log_debug(f"Response length: {len(response)} characters")

                # Prometheus metrics: Track success and approximate token count
                SUCCESSFUL_REQUESTS.inc()
                TOKEN_COUNT.observe(len(response.split()))  # Approximate token count using word count

                yield response.strip()

            except ImportError as e:
                # Prometheus metrics: Track error
                FAILED_REQUESTS.inc()
                ERROR_BY_TYPE.labels(error_type="import_error").inc()
                error_msg = f"Import error: {str(e)}"
                log_debug(error_msg, "ERROR")
                log_debug(traceback.format_exc(), "ERROR")
                yield f"❌ Import Error: {str(e)}\n\nPlease check log.txt for details."
            except Exception as e:
                # Prometheus metrics: Track error
                FAILED_REQUESTS.inc()
                ERROR_BY_TYPE.labels(error_type="local_model_error").inc()
                error_msg = f"Local model error: {str(e)}"
                log_debug(error_msg, "ERROR")
                log_debug(traceback.format_exc(), "ERROR")
                yield f"❌ Local Model Error: {str(e)}\n\nPlease check log.txt for details."

        else:
            # ===== API MODEL PATH =====
            log_debug(f"Using API mode with model: {model_name}")
            
            try:
                # Check for HuggingFace API token
                hf_token = os.environ.get("HF_TOKEN", None)
                if hf_token:
                    log_debug("HF_TOKEN found in environment")
                else:
                    log_debug("No HF_TOKEN in environment - API call will likely fail", "WARN")
                
                # Create HuggingFace Inference client
                log_debug("Creating InferenceClient...")
                client = InferenceClient(
                    api_key=hf_token,
                )
                log_debug("InferenceClient created successfully")

                # Call chat completion API
                log_debug("Starting chat completion...")
                completion = client.chat.completions.create(
                    model=model_name,
                    messages=messages,
                    max_tokens=max_tokens,
                    temperature=temperature,
                    top_p=top_p,
                )
                
                response = completion.choices[0].message.content
                log_debug(f"Completion received. Response length: {len(response)} characters")

                # Prometheus metrics: Track success and approximate token count
                SUCCESSFUL_REQUESTS.inc()
                TOKEN_COUNT.observe(len(response.split()))  # Approximate token count using word count

                yield response

            except Exception as e:
                # Prometheus metrics: Track error
                FAILED_REQUESTS.inc()
                ERROR_BY_TYPE.labels(error_type="api_error").inc()
                error_msg = f"API error: {str(e)}"
                log_debug(error_msg, "ERROR")
                log_debug(traceback.format_exc(), "ERROR")
                yield f"❌ API Error: {str(e)}\n\nPlease check log.txt for details."

    except Exception as e:
        # Prometheus metrics: Track error
        FAILED_REQUESTS.inc()
        ERROR_BY_TYPE.labels(error_type="unexpected_error").inc()
        error_msg = f"Unexpected error in respond function: {str(e)}"
        log_debug(error_msg, "ERROR")
        log_debug(traceback.format_exc(), "ERROR")
        yield f"❌ Unexpected Error: {str(e)}\n\nPlease check log.txt for details."
    finally:
        # Prometheus metrics: Record request duration
        REQUEST_DURATION.observe(time.perf_counter() - start_time)


# ============================================================================
# Gradio UI Definition
# ============================================================================

# Allow Gradio to serve static files from assets directory (requires absolute path)
ASSETS_DIR_ABSOLUTE = str(Path(__file__).parent / "assets")
gr.set_static_paths(paths=[ASSETS_DIR_ABSOLUTE])

with gr.Blocks(theme=TransparentTheme(), css=fancy_css) as demo:
    # Title banner
    gr.Markdown(f"<h1 id='title' style='text-align: center;'>{TITLE}</h1>")

    # Chatbot component with custom avatar icons (using forward slashes for web serving)
    # Gradio serves files via HTTP URLs which require forward slashes, not Windows backslashes
    MONSTER_ICON = str((ASSETS_DIR / "monster_icon.png").as_posix())
    BOT_ICON = str((ASSETS_DIR / "smart_confidant_icon.png").as_posix())
    log_debug(f"Monster icon path: {MONSTER_ICON}")
    log_debug(f"Bot icon path: {BOT_ICON}")

    chatbot = gr.Chatbot(
        type="messages",
        avatar_images=(MONSTER_ICON, BOT_ICON)
    )
    
    # Collapsible settings panel
    with gr.Accordion("⚙️ Additional Settings", open=False):
        system_message = gr.Textbox(value=DEFAULT_SYSTEM_MESSAGE, label="System message")
        max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
        temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
        selected_model = gr.Radio(choices=MODEL_OPTIONS, label="Select Model", value=MODEL_OPTIONS[1])
    
    # Wire up chat interface with response handler
    gr.ChatInterface(
        fn=respond,
        chatbot=chatbot,
        additional_inputs=[
            system_message,
            max_tokens,
            temperature,
            top_p,
            selected_model,
        ],
        type="messages",
    )

# ============================================================================
# Application Entry Point
# ============================================================================

if __name__ == "__main__":
    log_debug("="*50)
    log_debug("Smart Confidant Application Starting")
    log_debug(f"Available models: {MODEL_OPTIONS}")
    log_debug(f"HF_TOKEN present: {'Yes' if os.environ.get('HF_TOKEN') else 'No'}")
    log_debug("="*50)

    # Start Prometheus metrics server on port 8000
    log_debug("Starting Prometheus metrics server on port 8000")
    start_http_server(8000)
    log_debug("Prometheus metrics server started - available at http://0.0.0.0:8000/metrics")

    # Launch on all interfaces for VM/container deployment, with Gradio share link
    demo.launch(server_name="0.0.0.0", server_port=8012, share=True, allowed_paths=[ASSETS_DIR_ABSOLUTE])