Smart_Confidant

Configuration error

App Files Files Community

heffnt commited on Nov 10

Commit

24ed9c5

1 Parent(s): 49391c1

cs3

Browse files

Files changed (9) hide show

.env.example +10 -0
.gitignore +7 -1
Dockerfile +26 -0
README.md +0 -3
app.py +91 -13
deploy.sh +240 -150
prometheus.yml +11 -0
requirements.txt +11 -0
run-local.bat +63 -0

.env.example CHANGED Viewed

	@@ -0,0 +1,10 @@

+# Smart Confidant Environment Variables
+# Copy this file to .env and fill in your values
+# HuggingFace API Token
+HF_TOKEN=your_huggingface_token_here
+# Optional: Override default ports for local development
+# GRADIO_PORT=8012
+# METRICS_PORT=8000
+# NODE_EXPORTER_PORT=9100

.gitignore CHANGED Viewed

@@ -22,4 +22,10 @@ env/
 uv.lock
 # Temporary files
-tmp/

 uv.lock
 # Temporary files
+tmp/
+# Gradio cache
+.gradio/
+# Claude Code
+.claude/

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM python:3.10-slim
+WORKDIR /opt/app
+COPY . .
+RUN pip install --no-cache-dir uv && \
+    uv pip install --system -r /opt/app/requirements.txt
+# Install prometheus-node-exporter for system-level metrics
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -yq ca-certificates && \
+    apt-get install -yq --no-install-recommends \
+    prometheus-node-exporter
+# Expose ports:
+# 8012 - Gradio web interface
+# 8000 - Application Prometheus metrics
+# 9100 - Node exporter system metrics
+EXPOSE 8012
+EXPOSE 8000
+EXPOSE 9100
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+# Run node-exporter in background, then start the Python application
+CMD bash -c "prometheus-node-exporter --web.listen-address=':9100' & python /opt/app/app.py"

README.md CHANGED Viewed

@@ -69,9 +69,6 @@ The app will be available at `http://your-server:8012`
 ### API Models (require HF_TOKEN)
 - **HuggingFaceH4/zephyr-7b-beta** (7B params) - Recommended: Best quality for chat
-- **google/gemma-2-2b-it** (2B params) - Instruction-tuned, good balance
-- **distilgpt2** (82M params) - Very small and fast (older generation)
-- **gpt2** (124M params) - Reliable baseline (older generation)
 ### Local Models (run on your device)
 - **arnir0/Tiny-LLM** - Very small model for testing

 ### API Models (require HF_TOKEN)
 - **HuggingFaceH4/zephyr-7b-beta** (7B params) - Recommended: Best quality for chat
 ### Local Models (run on your device)
 - **arnir0/Tiny-LLM** - Very small model for testing

app.py CHANGED Viewed

@@ -12,13 +12,23 @@ from pathlib import Path
 import traceback
 from datetime import datetime
 from threading import Lock
 # ============================================================================
 # Configuration
 # ============================================================================
 LOCAL_MODELS = ["arnir0/Tiny-LLM"]
-API_MODELS = ["google/gemma-2-2b-it", "HuggingFaceH4/zephyr-7b-beta"]
 DEFAULT_SYSTEM_MESSAGE = "You are an expert assistant for Magic: The Gathering. You're name is Smart Confidant, but people tend to call you Bob."
 TITLE = "🎓🧙🏻‍♂️ Smart Confidant 🧙🏻‍♂️🎓"
@@ -58,6 +68,26 @@ def get_debug_logs():
     with debug_lock:
         return "\n".join(debug_logs)
 # ============================================================================
 # Asset Loading & Theme Configuration
 # ============================================================================
@@ -197,7 +227,7 @@ def respond(
 ):
     """
     Handle chat responses using either local transformers models or HuggingFace API.
     Args:
         message: User's input message
         history: List of previous messages in conversation
@@ -206,12 +236,16 @@ def respond(
         temperature: Sampling temperature (higher = more random)
         top_p: Nucleus sampling threshold
         selected_model: Model identifier with "(local)" or "(api)" suffix
     Yields:
         str: Generated response text or error message
     """
     global pipe
     try:
         log_debug(f"New message received: '{message[:50]}...'")
         log_debug(f"Selected model: {selected_model}")
@@ -226,7 +260,12 @@ def respond(
         # Parse model type and name from selection
         is_local = selected_model.endswith("(local)")
         model_name = selected_model.replace(" (local)", "").replace(" (api)", "")
         response = ""
         if is_local:
@@ -263,14 +302,25 @@ def respond(
                 # Extract new tokens only (strip original prompt)
                 response = outputs[0]["generated_text"][len(prompt):]
                 log_debug(f"Response length: {len(response)} characters")
                 yield response.strip()
             except ImportError as e:
                 error_msg = f"Import error: {str(e)}"
                 log_debug(error_msg, "ERROR")
                 log_debug(traceback.format_exc(), "ERROR")
                 yield f"❌ Import Error: {str(e)}\n\nPlease check log.txt for details."
             except Exception as e:
                 error_msg = f"Local model error: {str(e)}"
                 log_debug(error_msg, "ERROR")
                 log_debug(traceback.format_exc(), "ERROR")
@@ -291,7 +341,6 @@ def respond(
                 # Create HuggingFace Inference client
                 log_debug("Creating InferenceClient...")
                 client = InferenceClient(
-                    provider="auto",
                     api_key=hf_token,
                 )
                 log_debug("InferenceClient created successfully")
@@ -308,33 +357,57 @@ def respond(
                 response = completion.choices[0].message.content
                 log_debug(f"Completion received. Response length: {len(response)} characters")
                 yield response
             except Exception as e:
                 error_msg = f"API error: {str(e)}"
                 log_debug(error_msg, "ERROR")
                 log_debug(traceback.format_exc(), "ERROR")
                 yield f"❌ API Error: {str(e)}\n\nPlease check log.txt for details."
     except Exception as e:
         error_msg = f"Unexpected error in respond function: {str(e)}"
         log_debug(error_msg, "ERROR")
         log_debug(traceback.format_exc(), "ERROR")
         yield f"❌ Unexpected Error: {str(e)}\n\nPlease check log.txt for details."
 # ============================================================================
 # Gradio UI Definition
 # ============================================================================
 with gr.Blocks(theme=TransparentTheme(), css=fancy_css) as demo:
     # Title banner
     gr.Markdown(f"<h1 id='title' style='text-align: center;'>{TITLE}</h1>")
-    # Chatbot component with custom avatar icons
     chatbot = gr.Chatbot(
         type="messages",
-        avatar_images=(str(ASSETS_DIR / "monster_icon.png"), str(ASSETS_DIR / "smart_confidant_icon.png"))
     )
     # Collapsible settings panel
@@ -343,7 +416,7 @@ with gr.Blocks(theme=TransparentTheme(), css=fancy_css) as demo:
         max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
         temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
         top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
-        selected_model = gr.Radio(choices=MODEL_OPTIONS, label="Select Model", value=MODEL_OPTIONS[0])
     # Wire up chat interface with response handler
     gr.ChatInterface(
@@ -369,6 +442,11 @@ if __name__ == "__main__":
     log_debug(f"Available models: {MODEL_OPTIONS}")
     log_debug(f"HF_TOKEN present: {'Yes' if os.environ.get('HF_TOKEN') else 'No'}")
     log_debug("="*50)
     # Launch on all interfaces for VM/container deployment, with Gradio share link
-    demo.launch(server_name="0.0.0.0", server_port=8012, share=True)

 import traceback
 from datetime import datetime
 from threading import Lock
+import time
+from prometheus_client import start_http_server, Counter, Summary, Gauge
+# Load environment variables from .env file
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    # If python-dotenv not installed, skip (will use system env vars only)
+    pass
 # ============================================================================
 # Configuration
 # ============================================================================
 LOCAL_MODELS = ["arnir0/Tiny-LLM"]
+API_MODELS = ["meta-llama/Llama-3.2-3B-Instruct"]
 DEFAULT_SYSTEM_MESSAGE = "You are an expert assistant for Magic: The Gathering. You're name is Smart Confidant, but people tend to call you Bob."
 TITLE = "🎓🧙🏻‍♂️ Smart Confidant 🧙🏻‍♂️🎓"
     with debug_lock:
         return "\n".join(debug_logs)
+# ============================================================================
+# Prometheus Metrics
+# ============================================================================
+# Core request metrics
+REQUEST_COUNTER = Counter('smart_confidant_requests_total', 'Total number of chat requests')
+SUCCESSFUL_REQUESTS = Counter('smart_confidant_successful_requests_total', 'Total number of successful requests')
+FAILED_REQUESTS = Counter('smart_confidant_failed_requests_total', 'Total number of failed requests')
+REQUEST_DURATION = Summary('smart_confidant_request_duration_seconds', 'Time spent processing request')
+# Enhanced chatbot metrics
+MODEL_SELECTION_COUNTER = Counter('smart_confidant_model_selections_total',
+                                   'Count of model selections',
+                                   ['model_name', 'model_type'])
+TOKEN_COUNT = Summary('smart_confidant_tokens_generated', 'Number of tokens generated per response')
+CONVERSATION_LENGTH = Gauge('smart_confidant_conversation_length', 'Number of messages in current conversation')
+ERROR_BY_TYPE = Counter('smart_confidant_errors_by_type_total',
+                       'Count of errors by type',
+                       ['error_type'])
 # ============================================================================
 # Asset Loading & Theme Configuration
 # ============================================================================
 ):
     """
     Handle chat responses using either local transformers models or HuggingFace API.
     Args:
         message: User's input message
         history: List of previous messages in conversation
         temperature: Sampling temperature (higher = more random)
         top_p: Nucleus sampling threshold
         selected_model: Model identifier with "(local)" or "(api)" suffix
     Yields:
         str: Generated response text or error message
     """
     global pipe
+    # Prometheus metrics: Track request start
+    REQUEST_COUNTER.inc()
+    start_time = time.perf_counter()
     try:
         log_debug(f"New message received: '{message[:50]}...'")
         log_debug(f"Selected model: {selected_model}")
         # Parse model type and name from selection
         is_local = selected_model.endswith("(local)")
         model_name = selected_model.replace(" (local)", "").replace(" (api)", "")
+        # Prometheus metrics: Track model selection and conversation length
+        model_type = "local" if is_local else "api"
+        MODEL_SELECTION_COUNTER.labels(model_name=model_name, model_type=model_type).inc()
+        CONVERSATION_LENGTH.set(len(messages))
         response = ""
         if is_local:
                 # Extract new tokens only (strip original prompt)
                 response = outputs[0]["generated_text"][len(prompt):]
                 log_debug(f"Response length: {len(response)} characters")
+                # Prometheus metrics: Track success and approximate token count
+                SUCCESSFUL_REQUESTS.inc()
+                TOKEN_COUNT.observe(len(response.split()))  # Approximate token count using word count
                 yield response.strip()
             except ImportError as e:
+                # Prometheus metrics: Track error
+                FAILED_REQUESTS.inc()
+                ERROR_BY_TYPE.labels(error_type="import_error").inc()
                 error_msg = f"Import error: {str(e)}"
                 log_debug(error_msg, "ERROR")
                 log_debug(traceback.format_exc(), "ERROR")
                 yield f"❌ Import Error: {str(e)}\n\nPlease check log.txt for details."
             except Exception as e:
+                # Prometheus metrics: Track error
+                FAILED_REQUESTS.inc()
+                ERROR_BY_TYPE.labels(error_type="local_model_error").inc()
                 error_msg = f"Local model error: {str(e)}"
                 log_debug(error_msg, "ERROR")
                 log_debug(traceback.format_exc(), "ERROR")
                 # Create HuggingFace Inference client
                 log_debug("Creating InferenceClient...")
                 client = InferenceClient(
                     api_key=hf_token,
                 )
                 log_debug("InferenceClient created successfully")
                 response = completion.choices[0].message.content
                 log_debug(f"Completion received. Response length: {len(response)} characters")
+                # Prometheus metrics: Track success and approximate token count
+                SUCCESSFUL_REQUESTS.inc()
+                TOKEN_COUNT.observe(len(response.split()))  # Approximate token count using word count
                 yield response
             except Exception as e:
+                # Prometheus metrics: Track error
+                FAILED_REQUESTS.inc()
+                ERROR_BY_TYPE.labels(error_type="api_error").inc()
                 error_msg = f"API error: {str(e)}"
                 log_debug(error_msg, "ERROR")
                 log_debug(traceback.format_exc(), "ERROR")
                 yield f"❌ API Error: {str(e)}\n\nPlease check log.txt for details."
     except Exception as e:
+        # Prometheus metrics: Track error
+        FAILED_REQUESTS.inc()
+        ERROR_BY_TYPE.labels(error_type="unexpected_error").inc()
         error_msg = f"Unexpected error in respond function: {str(e)}"
         log_debug(error_msg, "ERROR")
         log_debug(traceback.format_exc(), "ERROR")
         yield f"❌ Unexpected Error: {str(e)}\n\nPlease check log.txt for details."
+    finally:
+        # Prometheus metrics: Record request duration
+        REQUEST_DURATION.observe(time.perf_counter() - start_time)
 # ============================================================================
 # Gradio UI Definition
 # ============================================================================
+# Allow Gradio to serve static files from assets directory (requires absolute path)
+ASSETS_DIR_ABSOLUTE = str(Path(__file__).parent / "assets")
+gr.set_static_paths(paths=[ASSETS_DIR_ABSOLUTE])
 with gr.Blocks(theme=TransparentTheme(), css=fancy_css) as demo:
     # Title banner
     gr.Markdown(f"<h1 id='title' style='text-align: center;'>{TITLE}</h1>")
+    # Chatbot component with custom avatar icons (using forward slashes for web serving)
+    # Gradio serves files via HTTP URLs which require forward slashes, not Windows backslashes
+    MONSTER_ICON = str((ASSETS_DIR / "monster_icon.png").as_posix())
+    BOT_ICON = str((ASSETS_DIR / "smart_confidant_icon.png").as_posix())
+    log_debug(f"Monster icon path: {MONSTER_ICON}")
+    log_debug(f"Bot icon path: {BOT_ICON}")
     chatbot = gr.Chatbot(
         type="messages",
+        avatar_images=(MONSTER_ICON, BOT_ICON)
     )
     # Collapsible settings panel
         max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
         temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
         top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
+        selected_model = gr.Radio(choices=MODEL_OPTIONS, label="Select Model", value=MODEL_OPTIONS[1])
     # Wire up chat interface with response handler
     gr.ChatInterface(
     log_debug(f"Available models: {MODEL_OPTIONS}")
     log_debug(f"HF_TOKEN present: {'Yes' if os.environ.get('HF_TOKEN') else 'No'}")
     log_debug("="*50)
+    # Start Prometheus metrics server on port 8000
+    log_debug("Starting Prometheus metrics server on port 8000")
+    start_http_server(8000)
+    log_debug("Prometheus metrics server started - available at http://0.0.0.0:8000/metrics")
     # Launch on all interfaces for VM/container deployment, with Gradio share link
+    demo.launch(server_name="0.0.0.0", server_port=8012, share=True, allowed_paths=[ASSETS_DIR_ABSOLUTE])

deploy.sh CHANGED Viewed

@@ -1,196 +1,286 @@
 #! /bin/bash
 # Configuration
-PORT=22012
-MACHINE=paffenroth-23.dyn.wpi.edu
-MY_KEY_PATH=$HOME/.ssh/mlopskey  # Path to your personal SSH key
-STUDENT_ADMIN_KEY_PATH=$HOME/.ssh/student-admin_key  # Path to student-admin fallback key
 # Load environment variables from .env file if it exists
 if [ -f .env ]; then
-    echo "Loading environment variables from .env file..."
     export $(grep -v '^#' .env | xargs)
 fi
-# Clean up from previous runs
-ssh-keygen -f "$HOME/.ssh/known_hosts" -R "[$MACHINE]:$PORT" 2>/dev/null
-rm -rf tmp
-# Create a temporary directory
-mkdir tmp
-# Change the permissions of the directory
-chmod 700 tmp
-# Change to the temporary directory
-cd tmp
-echo "Checking if personal key works..."
-# Try connecting with personal key
-if ssh -i ${MY_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no -o ConnectTimeout=10 student-admin@${MACHINE} "echo 'success'" > /dev/null 2>&1; then
-    echo "✓ Personal key works! No update needed."
-    MY_KEY=${MY_KEY_PATH}
 else
-    echo "✗ Personal key failed. Updating with student-admin key..."
-    # Check if the keys exist
-    if [ ! -f "${MY_KEY_PATH}.pub" ]; then
-        echo "Error: Personal public key not found at ${MY_KEY_PATH}.pub"
-        echo "Creating a new key pair..."
-        ssh-keygen -f ${MY_KEY_PATH} -t ed25519 -N ""
-    fi
-    if [ ! -f "${STUDENT_ADMIN_KEY_PATH}" ]; then
-        echo "Error: Student-admin key not found at ${STUDENT_ADMIN_KEY_PATH}"
-        exit 1
-    fi
-    # Read the public key content
-    MY_PUB_KEY=$(cat ${MY_KEY_PATH}.pub)
-    # Update authorized_keys on the server using student-admin key
-    echo "Connecting with student-admin key to update authorized_keys..."
-    ssh -i ${STUDENT_ADMIN_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE} << EOF
-mkdir -p ~/.ssh
-chmod 700 ~/.ssh
-touch ~/.ssh/authorized_keys
-chmod 600 ~/.ssh/authorized_keys
-# Remove any old keys from this machine
-grep -v 'rcpaffenroth@paffenroth-23' ~/.ssh/authorized_keys > ~/.ssh/authorized_keys.tmp 2>/dev/null || true
-mv ~/.ssh/authorized_keys.tmp ~/.ssh/authorized_keys 2>/dev/null || true
-# Add the new key
-echo '${MY_PUB_KEY}' >> ~/.ssh/authorized_keys
-echo 'Key updated'
-EOF
-    if [ $? -ne 0 ]; then
-        echo "Failed to update key with student-admin key"
-        exit 1
-    fi
-    # Verify the personal key now works
-    echo "Verifying personal key..."
-    sleep 2
-    if ssh -i ${MY_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE} "echo 'success'" > /dev/null 2>&1; then
-        echo "✓ Success! Personal key is now working."
-        MY_KEY=${MY_KEY_PATH}
-    else
-        echo "✗ Personal key still not working after update"
-        exit 1
-    fi
 fi
-# Add the key to the ssh-agent
-eval "$(ssh-agent -s)"
-ssh-add ${MY_KEY}
-# Check the key file on the server
-echo "Checking authorized_keys on server:"
-ssh -i ${MY_KEY} -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE} "cat ~/.ssh/authorized_keys"
-# Clone or copy the repo
-# If using git:
-# git clone https://github.com/yourusername/Smart_Confidant
-# Or just copy the local directory:
-echo "Copying Smart_Confidant code..."
-mkdir -p Smart_Confidant
-# Copy all files except tmp and .git directories
-for item in ../*; do
-    base=$(basename "$item")
-    if [ "$base" != "tmp" ] && [ "$base" != ".git" ]; then
-        cp -r "$item" Smart_Confidant/
-    fi
-done
-# Copy the files to the server
-echo "Uploading code to server..."
-scp -i ${MY_KEY} -P ${PORT} -o StrictHostKeyChecking=no -r Smart_Confidant student-admin@${MACHINE}:~/
-if [ $? -eq 0 ]; then
-    echo "✓ Code successfully uploaded to server"
 else
-    echo "✗ Failed to upload code"
     exit 1
 fi
-# Define SSH command for subsequent steps using the confirmed key
-COMMAND="ssh -i ${MY_KEY} -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE}"
-# Run all setup in a single SSH session
-echo "Setting up environment on remote server..."
-# Pass HF_TOKEN to the remote session
-${COMMAND} bash -s << ENDSSH
 set -e
-export HF_TOKEN='${HF_TOKEN}'
-# Stop old process
-echo "→ Stopping old process if running..."
-pkill -f 'python.*app.py' || true
-# Check if micromamba is installed
-if [ ! -f ~/bin/micromamba ]; then
-    echo "→ Installing micromamba..."
-    curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C ~/ bin/micromamba
-    mkdir -p ~/micromamba
-    export MAMBA_ROOT_PREFIX=~/micromamba
-    echo 'export MAMBA_ROOT_PREFIX=~/micromamba' >> ~/.bashrc
-    echo 'eval "$(~/bin/micromamba shell hook -s bash)"' >> ~/.bashrc
-    echo "✓ Micromamba installed"
 else
-    echo "✓ Micromamba already installed"
-    export MAMBA_ROOT_PREFIX=~/micromamba
 fi
-eval "$(~/bin/micromamba shell hook -s bash)" 2>/dev/null || true
-cd Smart_Confidant
-# Check if environment exists
-if ~/bin/micromamba env list | grep -q "smart-confidant"; then
-    echo "→ Updating existing environment..."
-    ~/bin/micromamba install -n smart-confidant -f environment.yml -y
 else
-    echo "→ Creating new environment..."
-    ~/bin/micromamba create -f environment.yml -y
 fi
-# Check if uv is installed
-if ! ~/bin/micromamba run -n smart-confidant which uv &>/dev/null; then
-    echo "→ Installing uv..."
-    ~/bin/micromamba run -n smart-confidant pip install uv
 else
-    echo "✓ uv already installed"
 fi
-# Install/update dependencies
-echo "→ Installing/updating dependencies..."
-~/bin/micromamba run -n smart-confidant uv pip install -e .
-# Start application
-echo "→ Starting application..."
-# Pass HF_TOKEN if it exists
-if [ ! -z "$HF_TOKEN" ]; then
-    echo "→ HF_TOKEN provided, API models will be available"
-    nohup ~/bin/micromamba run -n smart-confidant -e HF_TOKEN="$HF_TOKEN" python -u app.py > ~/log.txt 2>&1 &
 else
-    echo "⚠ HF_TOKEN not set - API models will not work"
-    nohup ~/bin/micromamba run -n smart-confidant python -u app.py > ~/log.txt 2>&1 &
 fi
-# Wait for the app to start
 sleep 5
-echo "✓ Setup complete"
 ENDSSH
-# Extract the Gradio share link from the remote log file
-SHARE_LINK=$(${COMMAND} "grep -oP 'https://[a-z0-9]+\.gradio\.live' ~/log.txt | tail -1" 2>/dev/null)
 echo ""
 echo "=========================================="
-echo "Deployment complete!"
-echo "Public Gradio Share Link: ${SHARE_LINK}"
-echo "==========================================="

 #! /bin/bash
+# ============================================================================
+# Smart Confidant - Docker Deployment Script for Melnibone
+# Deploys from laptop to melnibone.wpi.edu using Docker
+# ============================================================================
+set -e  # Exit on error
 # Configuration
+PORT=2222
+MACHINE=melnibone.wpi.edu
+USER=group12
+MY_KEY_PATH=$HOME/.ssh/mlops  # Path to your SSH key for melnibone
+# Docker configuration
+DOCKER_USER=heffnt
+DOCKER_IMAGE=smart_confidant
+DOCKER_TAG=cs3
+FULL_IMAGE_NAME=${DOCKER_USER}/${DOCKER_IMAGE}:${DOCKER_TAG}
+# Container configuration
+CONTAINER_NAME=smart_confidant
+GRADIO_PORT=2727
+METRICS_PORT=2728
+NODE_EXPORTER_PORT=2729
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+# Helper functions
+log_info() {
+    echo -e "${BLUE}→${NC} $1"
+}
+log_success() {
+    echo -e "${GREEN}✓${NC} $1"
+}
+log_warning() {
+    echo -e "${YELLOW}⚠${NC} $1"
+}
+log_error() {
+    echo -e "${RED}✗${NC} $1"
+}
 # Load environment variables from .env file if it exists
 if [ -f .env ]; then
+    log_info "Loading environment variables from .env file..."
     export $(grep -v '^#' .env | xargs)
 fi
+# Check if HF_TOKEN is set
+if [ -z "$HF_TOKEN" ]; then
+    log_warning "HF_TOKEN not set - API models will not work"
+    log_warning "Set it in .env file or export HF_TOKEN=your_token"
+else
+    log_success "HF_TOKEN found"
+fi
+# ============================================================================
+# Step 1: Build Docker Image Locally
+# ============================================================================
+echo ""
+echo "========================================"
+echo "Step 1: Building Docker Image"
+echo "========================================"
+log_info "Building Docker image: ${FULL_IMAGE_NAME}"
+if docker build -t ${FULL_IMAGE_NAME} .; then
+    log_success "Docker image built successfully"
+else
+    log_error "Docker build failed"
+    exit 1
+fi
+# ============================================================================
+# Step 2: Push to DockerHub
+# ============================================================================
+echo ""
+echo "========================================"
+echo "Step 2: Pushing to DockerHub"
+echo "========================================"
+log_info "Checking Docker login status..."
+if docker info 2>/dev/null | grep -q "Username: ${DOCKER_USER}"; then
+    log_success "Already logged in to DockerHub as ${DOCKER_USER}"
+elif docker login --username ${DOCKER_USER} 2>/dev/null; then
+    log_success "Logged in to DockerHub"
 else
+    log_error "Not logged in to DockerHub"
+    log_info "Please run: docker login --username ${DOCKER_USER}"
+    exit 1
 fi
+log_info "Pushing image to DockerHub..."
+if docker push ${FULL_IMAGE_NAME}; then
+    log_success "Image pushed to DockerHub"
+else
+    log_error "Failed to push image to DockerHub"
+    exit 1
+fi
+# ============================================================================
+# Step 3: Verify SSH Access to Melnibone
+# ============================================================================
+echo ""
+echo "========================================"
+echo "Step 3: Verifying SSH Access"
+echo "========================================"
+# Clean up known_hosts entry for melnibone
+ssh-keygen -f "$HOME/.ssh/known_hosts" -R "[${MACHINE}]:${PORT}" 2>/dev/null || true
+if [ ! -f "${MY_KEY_PATH}" ]; then
+    log_error "SSH key not found at ${MY_KEY_PATH}"
+    log_info "Please ensure your SSH key is set up correctly"
+    exit 1
+fi
+log_info "Testing SSH connection to ${USER}@${MACHINE}:${PORT}..."
+if ssh -i ${MY_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no -o ConnectTimeout=10 ${USER}@${MACHINE} "echo 'success'" > /dev/null 2>&1; then
+    log_success "SSH connection successful"
 else
+    log_error "SSH connection failed"
+    log_info "Make sure you can connect with: ssh -i ${MY_KEY_PATH} -p ${PORT} ${USER}@${MACHINE}"
     exit 1
 fi
+# ============================================================================
+# Step 4: Deploy to Melnibone
+# ============================================================================
+echo ""
+echo "========================================"
+echo "Step 4: Deploying to Melnibone"
+echo "========================================"
+# Define SSH command
+SSH_CMD="ssh -i ${MY_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no ${USER}@${MACHINE}"
+log_info "Deploying to remote server..."
+# Run deployment commands on remote server
+${SSH_CMD} bash -s << ENDSSH
 set -e
+echo "→ Pulling Docker image from DockerHub..."
+if docker pull ${FULL_IMAGE_NAME}; then
+    echo "✓ Image pulled successfully"
 else
+    echo "✗ Failed to pull image"
+    exit 1
 fi
+echo "→ Stopping existing container if running..."
+docker stop ${CONTAINER_NAME} 2>/dev/null || echo "  (no container to stop)"
+echo "→ Removing existing container..."
+docker rm ${CONTAINER_NAME} 2>/dev/null || echo "  (no container to remove)"
+echo "→ Starting new container..."
+docker run -d \\
+    --name ${CONTAINER_NAME} \\
+    -p ${GRADIO_PORT}:8012 \\
+    -p ${METRICS_PORT}:8000 \\
+    -p ${NODE_EXPORTER_PORT}:9100 \\
+    -e HF_TOKEN="${HF_TOKEN}" \\
+    ${FULL_IMAGE_NAME}
+if [ \$? -eq 0 ]; then
+    echo "✓ Container started successfully"
 else
+    echo "✗ Failed to start container"
+    exit 1
 fi
+# Wait for container to be ready
+echo "→ Waiting for container to start..."
+sleep 5
+# Verify container is running
+if docker ps | grep -q ${CONTAINER_NAME}; then
+    echo "✓ Container is running"
 else
+    echo "✗ Container failed to start"
+    docker logs ${CONTAINER_NAME}
+    exit 1
 fi
+# Verify ports are accessible
+echo "→ Verifying ports..."
+curl -s -o /dev/null -w "%{http_code}" http://localhost:${GRADIO_PORT} | grep -q "200" && echo "✓ Gradio port ${GRADIO_PORT} is accessible"
+curl -s http://localhost:${METRICS_PORT}/metrics | grep -q "smart_confidant" && echo "✓ Metrics port ${METRICS_PORT} is accessible"
+curl -s http://localhost:${NODE_EXPORTER_PORT}/metrics | grep -q "node_" && echo "✓ Node exporter port ${NODE_EXPORTER_PORT} is accessible"
+ENDSSH
+if [ $? -eq 0 ]; then
+    log_success "Deployment to melnibone completed successfully"
 else
+    log_error "Deployment failed"
+    exit 1
 fi
+# ============================================================================
+# Step 5: Setup ngrok Tunnel
+# ============================================================================
+echo ""
+echo "========================================"
+echo "Step 5: Setting up ngrok Tunnel"
+echo "========================================"
+log_info "Setting up ngrok tunnel for global access..."
+${SSH_CMD} bash -s << 'ENDSSH'
+set -e
+# Kill any existing ngrok processes for this port
+echo "→ Stopping existing ngrok tunnels for port 2727..."
+pkill -f "ngrok http 2727" 2>/dev/null || echo "  (no existing tunnel to stop)"
+sleep 2
+# Start new ngrok tunnel
+echo "→ Starting ngrok tunnel..."
+nohup ngrok http 2727 --log=stdout > ~/ngrok_smart_confidant.log 2>&1 &
+NGROK_PID=$!
+# Wait for ngrok to start
 sleep 5
+# Check if ngrok started successfully
+if ps -p $NGROK_PID > /dev/null 2>&1; then
+    echo "✓ ngrok started successfully (PID: $NGROK_PID)"
+    # Extract the ngrok URL from the log
+    NGROK_URL=$(grep -o "url=https://[^ ]*" ~/ngrok_smart_confidant.log | head -1 | cut -d'=' -f2)
+    if [ ! -z "$NGROK_URL" ]; then
+        echo "NGROK_URL=$NGROK_URL"
+    else
+        echo "⚠ Could not extract ngrok URL from log"
+        echo "Check ~/ngrok_smart_confidant.log on the server"
+    fi
+else
+    echo "✗ ngrok failed to start"
+    cat ~/ngrok_smart_confidant.log
+    exit 1
+fi
 ENDSSH
+# Extract the ngrok URL from the SSH output
+NGROK_URL=$(${SSH_CMD} "grep -o 'url=https://[^ ]*' ~/ngrok_smart_confidant.log | head -1 | cut -d'=' -f2")
+# ============================================================================
+# Deployment Summary
+# ============================================================================
+echo ""
+echo "=========================================="
+echo "🎉 DEPLOYMENT COMPLETE!"
+echo "=========================================="
+echo ""
+echo "Docker Image: ${FULL_IMAGE_NAME}"
+echo "Container Name: ${CONTAINER_NAME}"
+echo ""
+echo "Access URLs:"
+echo "  🌐 Public URL (ngrok):  ${NGROK_URL}"
+echo "  🏠 Local Gradio:        http://localhost:${GRADIO_PORT}"
+echo "  📊 App Metrics:         http://localhost:${METRICS_PORT}/metrics"
+echo "  🖥️  System Metrics:      http://localhost:${NODE_EXPORTER_PORT}/metrics"
+echo ""
+echo "Port Mappings:"
+echo "  ${GRADIO_PORT} → 8012 (Gradio Interface)"
+echo "  ${METRICS_PORT} → 8000 (Application Metrics)"
+echo "  ${NODE_EXPORTER_PORT} → 9100 (Node Exporter)"
+echo ""
+echo "Container Management:"
+echo "  View logs:    ssh -i ${MY_KEY_PATH} -p ${PORT} ${USER}@${MACHINE} 'docker logs ${CONTAINER_NAME}'"
+echo "  Stop:         ssh -i ${MY_KEY_PATH} -p ${PORT} ${USER}@${MACHINE} 'docker stop ${CONTAINER_NAME}'"
+echo "  Restart:      ssh -i ${MY_KEY_PATH} -p ${PORT} ${USER}@${MACHINE} 'docker restart ${CONTAINER_NAME}'"
 echo ""
 echo "=========================================="

prometheus.yml ADDED Viewed

	@@ -0,0 +1,11 @@

+global:
+  scrape_interval: 15s
+scrape_configs:
+  - job_name: 'smart_confidant_app'
+    static_configs:
+      - targets: ['localhost:8000']
+  - job_name: 'node_exporter'
+    static_configs:
+      - targets: ['localhost:9100']

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+gradio>=4.43.0
+huggingface-hub>=0.27.0
+transformers>=4.43.0
+torch>=2.2
+accelerate>=0.33.0
+pydantic>=2.6.0
+psutil>=5.9.0
+sentencepiece>=0.1.99
+protobuf>=3.20.0
+prometheus_client>=0.20
+python-dotenv>=1.0.0

run-local.bat ADDED Viewed

	@@ -0,0 +1,63 @@

+@echo off
+REM ============================================================================
+REM Smart Confidant - Local Development Runner (using uv)
+REM Run the app directly on Windows for fast iteration
+REM ============================================================================
+echo ========================================
+echo Smart Confidant - Local Development
+echo ========================================
+echo.
+REM Check if uv is installed
+where uv >nul 2>nul
+if errorlevel 1 (
+    echo ERROR: uv is not installed
+    echo Install it with: pip install uv
+    echo Or visit: https://docs.astral.sh/uv/
+    pause
+    exit /b 1
+)
+echo [1/2] Installing dependencies with uv...
+uv pip install -r requirements.txt
+if errorlevel 1 (
+    echo ERROR: Failed to install dependencies
+    echo Make sure requirements.txt is present
+    pause
+    exit /b 1
+)
+echo.
+echo [2/2] Starting Smart Confidant application...
+echo.
+echo ========================================
+echo Application Starting
+echo ========================================
+echo.
+echo Access the app at: http://localhost:8012
+echo Metrics available at: http://localhost:8000/metrics
+echo.
+echo Press Ctrl+C to stop the application
+echo ========================================
+echo.
+REM Check if .env file exists
+if not exist ".env" (
+    echo WARNING: .env file not found
+    echo API models will not work without HF_TOKEN
+    echo Copy .env.example to .env and add your token
+    echo.
+)
+REM Run the application with uv
+uv run --no-project python app.py
+REM Keep window open if there's an error
+if errorlevel 1 (
+    echo.
+    echo ========================================
+    echo Application stopped with error
+    echo ========================================
+    pause
+)