heffnt commited on
Commit
24ed9c5
·
1 Parent(s): 49391c1
Files changed (9) hide show
  1. .env.example +10 -0
  2. .gitignore +7 -1
  3. Dockerfile +26 -0
  4. README.md +0 -3
  5. app.py +91 -13
  6. deploy.sh +240 -150
  7. prometheus.yml +11 -0
  8. requirements.txt +11 -0
  9. run-local.bat +63 -0
.env.example CHANGED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Smart Confidant Environment Variables
2
+ # Copy this file to .env and fill in your values
3
+
4
+ # HuggingFace API Token
5
+ HF_TOKEN=your_huggingface_token_here
6
+
7
+ # Optional: Override default ports for local development
8
+ # GRADIO_PORT=8012
9
+ # METRICS_PORT=8000
10
+ # NODE_EXPORTER_PORT=9100
.gitignore CHANGED
@@ -22,4 +22,10 @@ env/
22
  uv.lock
23
 
24
  # Temporary files
25
- tmp/
 
 
 
 
 
 
 
22
  uv.lock
23
 
24
  # Temporary files
25
+ tmp/
26
+
27
+ # Gradio cache
28
+ .gradio/
29
+
30
+ # Claude Code
31
+ .claude/
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /opt/app
4
+ COPY . .
5
+ RUN pip install --no-cache-dir uv && \
6
+ uv pip install --system -r /opt/app/requirements.txt
7
+
8
+ # Install prometheus-node-exporter for system-level metrics
9
+ ENV DEBIAN_FRONTEND=noninteractive
10
+ RUN apt-get update && \
11
+ apt-get upgrade -yq ca-certificates && \
12
+ apt-get install -yq --no-install-recommends \
13
+ prometheus-node-exporter
14
+
15
+ # Expose ports:
16
+ # 8012 - Gradio web interface
17
+ # 8000 - Application Prometheus metrics
18
+ # 9100 - Node exporter system metrics
19
+ EXPOSE 8012
20
+ EXPOSE 8000
21
+ EXPOSE 9100
22
+
23
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
24
+
25
+ # Run node-exporter in background, then start the Python application
26
+ CMD bash -c "prometheus-node-exporter --web.listen-address=':9100' & python /opt/app/app.py"
README.md CHANGED
@@ -69,9 +69,6 @@ The app will be available at `http://your-server:8012`
69
 
70
  ### API Models (require HF_TOKEN)
71
  - **HuggingFaceH4/zephyr-7b-beta** (7B params) - Recommended: Best quality for chat
72
- - **google/gemma-2-2b-it** (2B params) - Instruction-tuned, good balance
73
- - **distilgpt2** (82M params) - Very small and fast (older generation)
74
- - **gpt2** (124M params) - Reliable baseline (older generation)
75
 
76
  ### Local Models (run on your device)
77
  - **arnir0/Tiny-LLM** - Very small model for testing
 
69
 
70
  ### API Models (require HF_TOKEN)
71
  - **HuggingFaceH4/zephyr-7b-beta** (7B params) - Recommended: Best quality for chat
 
 
 
72
 
73
  ### Local Models (run on your device)
74
  - **arnir0/Tiny-LLM** - Very small model for testing
app.py CHANGED
@@ -12,13 +12,23 @@ from pathlib import Path
12
  import traceback
13
  from datetime import datetime
14
  from threading import Lock
 
 
 
 
 
 
 
 
 
 
15
 
16
  # ============================================================================
17
  # Configuration
18
  # ============================================================================
19
 
20
  LOCAL_MODELS = ["arnir0/Tiny-LLM"]
21
- API_MODELS = ["google/gemma-2-2b-it", "HuggingFaceH4/zephyr-7b-beta"]
22
  DEFAULT_SYSTEM_MESSAGE = "You are an expert assistant for Magic: The Gathering. You're name is Smart Confidant, but people tend to call you Bob."
23
  TITLE = "🎓🧙🏻‍♂️ Smart Confidant 🧙🏻‍♂️🎓"
24
 
@@ -58,6 +68,26 @@ def get_debug_logs():
58
  with debug_lock:
59
  return "\n".join(debug_logs)
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # ============================================================================
62
  # Asset Loading & Theme Configuration
63
  # ============================================================================
@@ -197,7 +227,7 @@ def respond(
197
  ):
198
  """
199
  Handle chat responses using either local transformers models or HuggingFace API.
200
-
201
  Args:
202
  message: User's input message
203
  history: List of previous messages in conversation
@@ -206,12 +236,16 @@ def respond(
206
  temperature: Sampling temperature (higher = more random)
207
  top_p: Nucleus sampling threshold
208
  selected_model: Model identifier with "(local)" or "(api)" suffix
209
-
210
  Yields:
211
  str: Generated response text or error message
212
  """
213
  global pipe
214
-
 
 
 
 
215
  try:
216
  log_debug(f"New message received: '{message[:50]}...'")
217
  log_debug(f"Selected model: {selected_model}")
@@ -226,7 +260,12 @@ def respond(
226
  # Parse model type and name from selection
227
  is_local = selected_model.endswith("(local)")
228
  model_name = selected_model.replace(" (local)", "").replace(" (api)", "")
229
-
 
 
 
 
 
230
  response = ""
231
 
232
  if is_local:
@@ -263,14 +302,25 @@ def respond(
263
  # Extract new tokens only (strip original prompt)
264
  response = outputs[0]["generated_text"][len(prompt):]
265
  log_debug(f"Response length: {len(response)} characters")
 
 
 
 
 
266
  yield response.strip()
267
 
268
  except ImportError as e:
 
 
 
269
  error_msg = f"Import error: {str(e)}"
270
  log_debug(error_msg, "ERROR")
271
  log_debug(traceback.format_exc(), "ERROR")
272
  yield f"❌ Import Error: {str(e)}\n\nPlease check log.txt for details."
273
  except Exception as e:
 
 
 
274
  error_msg = f"Local model error: {str(e)}"
275
  log_debug(error_msg, "ERROR")
276
  log_debug(traceback.format_exc(), "ERROR")
@@ -291,7 +341,6 @@ def respond(
291
  # Create HuggingFace Inference client
292
  log_debug("Creating InferenceClient...")
293
  client = InferenceClient(
294
- provider="auto",
295
  api_key=hf_token,
296
  )
297
  log_debug("InferenceClient created successfully")
@@ -308,33 +357,57 @@ def respond(
308
 
309
  response = completion.choices[0].message.content
310
  log_debug(f"Completion received. Response length: {len(response)} characters")
 
 
 
 
 
311
  yield response
312
-
313
  except Exception as e:
 
 
 
314
  error_msg = f"API error: {str(e)}"
315
  log_debug(error_msg, "ERROR")
316
  log_debug(traceback.format_exc(), "ERROR")
317
  yield f"❌ API Error: {str(e)}\n\nPlease check log.txt for details."
318
 
319
  except Exception as e:
 
 
 
320
  error_msg = f"Unexpected error in respond function: {str(e)}"
321
  log_debug(error_msg, "ERROR")
322
  log_debug(traceback.format_exc(), "ERROR")
323
  yield f"❌ Unexpected Error: {str(e)}\n\nPlease check log.txt for details."
 
 
 
324
 
325
 
326
  # ============================================================================
327
  # Gradio UI Definition
328
  # ============================================================================
329
 
 
 
 
 
330
  with gr.Blocks(theme=TransparentTheme(), css=fancy_css) as demo:
331
  # Title banner
332
  gr.Markdown(f"<h1 id='title' style='text-align: center;'>{TITLE}</h1>")
333
-
334
- # Chatbot component with custom avatar icons
 
 
 
 
 
 
335
  chatbot = gr.Chatbot(
336
  type="messages",
337
- avatar_images=(str(ASSETS_DIR / "monster_icon.png"), str(ASSETS_DIR / "smart_confidant_icon.png"))
338
  )
339
 
340
  # Collapsible settings panel
@@ -343,7 +416,7 @@ with gr.Blocks(theme=TransparentTheme(), css=fancy_css) as demo:
343
  max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
344
  temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
345
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
346
- selected_model = gr.Radio(choices=MODEL_OPTIONS, label="Select Model", value=MODEL_OPTIONS[0])
347
 
348
  # Wire up chat interface with response handler
349
  gr.ChatInterface(
@@ -369,6 +442,11 @@ if __name__ == "__main__":
369
  log_debug(f"Available models: {MODEL_OPTIONS}")
370
  log_debug(f"HF_TOKEN present: {'Yes' if os.environ.get('HF_TOKEN') else 'No'}")
371
  log_debug("="*50)
372
-
 
 
 
 
 
373
  # Launch on all interfaces for VM/container deployment, with Gradio share link
374
- demo.launch(server_name="0.0.0.0", server_port=8012, share=True)
 
12
  import traceback
13
  from datetime import datetime
14
  from threading import Lock
15
+ import time
16
+ from prometheus_client import start_http_server, Counter, Summary, Gauge
17
+
18
+ # Load environment variables from .env file
19
+ try:
20
+ from dotenv import load_dotenv
21
+ load_dotenv()
22
+ except ImportError:
23
+ # If python-dotenv not installed, skip (will use system env vars only)
24
+ pass
25
 
26
  # ============================================================================
27
  # Configuration
28
  # ============================================================================
29
 
30
  LOCAL_MODELS = ["arnir0/Tiny-LLM"]
31
+ API_MODELS = ["meta-llama/Llama-3.2-3B-Instruct"]
32
  DEFAULT_SYSTEM_MESSAGE = "You are an expert assistant for Magic: The Gathering. You're name is Smart Confidant, but people tend to call you Bob."
33
  TITLE = "🎓🧙🏻‍♂️ Smart Confidant 🧙🏻‍♂️🎓"
34
 
 
68
  with debug_lock:
69
  return "\n".join(debug_logs)
70
 
71
+ # ============================================================================
72
+ # Prometheus Metrics
73
+ # ============================================================================
74
+
75
+ # Core request metrics
76
+ REQUEST_COUNTER = Counter('smart_confidant_requests_total', 'Total number of chat requests')
77
+ SUCCESSFUL_REQUESTS = Counter('smart_confidant_successful_requests_total', 'Total number of successful requests')
78
+ FAILED_REQUESTS = Counter('smart_confidant_failed_requests_total', 'Total number of failed requests')
79
+ REQUEST_DURATION = Summary('smart_confidant_request_duration_seconds', 'Time spent processing request')
80
+
81
+ # Enhanced chatbot metrics
82
+ MODEL_SELECTION_COUNTER = Counter('smart_confidant_model_selections_total',
83
+ 'Count of model selections',
84
+ ['model_name', 'model_type'])
85
+ TOKEN_COUNT = Summary('smart_confidant_tokens_generated', 'Number of tokens generated per response')
86
+ CONVERSATION_LENGTH = Gauge('smart_confidant_conversation_length', 'Number of messages in current conversation')
87
+ ERROR_BY_TYPE = Counter('smart_confidant_errors_by_type_total',
88
+ 'Count of errors by type',
89
+ ['error_type'])
90
+
91
  # ============================================================================
92
  # Asset Loading & Theme Configuration
93
  # ============================================================================
 
227
  ):
228
  """
229
  Handle chat responses using either local transformers models or HuggingFace API.
230
+
231
  Args:
232
  message: User's input message
233
  history: List of previous messages in conversation
 
236
  temperature: Sampling temperature (higher = more random)
237
  top_p: Nucleus sampling threshold
238
  selected_model: Model identifier with "(local)" or "(api)" suffix
239
+
240
  Yields:
241
  str: Generated response text or error message
242
  """
243
  global pipe
244
+
245
+ # Prometheus metrics: Track request start
246
+ REQUEST_COUNTER.inc()
247
+ start_time = time.perf_counter()
248
+
249
  try:
250
  log_debug(f"New message received: '{message[:50]}...'")
251
  log_debug(f"Selected model: {selected_model}")
 
260
  # Parse model type and name from selection
261
  is_local = selected_model.endswith("(local)")
262
  model_name = selected_model.replace(" (local)", "").replace(" (api)", "")
263
+
264
+ # Prometheus metrics: Track model selection and conversation length
265
+ model_type = "local" if is_local else "api"
266
+ MODEL_SELECTION_COUNTER.labels(model_name=model_name, model_type=model_type).inc()
267
+ CONVERSATION_LENGTH.set(len(messages))
268
+
269
  response = ""
270
 
271
  if is_local:
 
302
  # Extract new tokens only (strip original prompt)
303
  response = outputs[0]["generated_text"][len(prompt):]
304
  log_debug(f"Response length: {len(response)} characters")
305
+
306
+ # Prometheus metrics: Track success and approximate token count
307
+ SUCCESSFUL_REQUESTS.inc()
308
+ TOKEN_COUNT.observe(len(response.split())) # Approximate token count using word count
309
+
310
  yield response.strip()
311
 
312
  except ImportError as e:
313
+ # Prometheus metrics: Track error
314
+ FAILED_REQUESTS.inc()
315
+ ERROR_BY_TYPE.labels(error_type="import_error").inc()
316
  error_msg = f"Import error: {str(e)}"
317
  log_debug(error_msg, "ERROR")
318
  log_debug(traceback.format_exc(), "ERROR")
319
  yield f"❌ Import Error: {str(e)}\n\nPlease check log.txt for details."
320
  except Exception as e:
321
+ # Prometheus metrics: Track error
322
+ FAILED_REQUESTS.inc()
323
+ ERROR_BY_TYPE.labels(error_type="local_model_error").inc()
324
  error_msg = f"Local model error: {str(e)}"
325
  log_debug(error_msg, "ERROR")
326
  log_debug(traceback.format_exc(), "ERROR")
 
341
  # Create HuggingFace Inference client
342
  log_debug("Creating InferenceClient...")
343
  client = InferenceClient(
 
344
  api_key=hf_token,
345
  )
346
  log_debug("InferenceClient created successfully")
 
357
 
358
  response = completion.choices[0].message.content
359
  log_debug(f"Completion received. Response length: {len(response)} characters")
360
+
361
+ # Prometheus metrics: Track success and approximate token count
362
+ SUCCESSFUL_REQUESTS.inc()
363
+ TOKEN_COUNT.observe(len(response.split())) # Approximate token count using word count
364
+
365
  yield response
366
+
367
  except Exception as e:
368
+ # Prometheus metrics: Track error
369
+ FAILED_REQUESTS.inc()
370
+ ERROR_BY_TYPE.labels(error_type="api_error").inc()
371
  error_msg = f"API error: {str(e)}"
372
  log_debug(error_msg, "ERROR")
373
  log_debug(traceback.format_exc(), "ERROR")
374
  yield f"❌ API Error: {str(e)}\n\nPlease check log.txt for details."
375
 
376
  except Exception as e:
377
+ # Prometheus metrics: Track error
378
+ FAILED_REQUESTS.inc()
379
+ ERROR_BY_TYPE.labels(error_type="unexpected_error").inc()
380
  error_msg = f"Unexpected error in respond function: {str(e)}"
381
  log_debug(error_msg, "ERROR")
382
  log_debug(traceback.format_exc(), "ERROR")
383
  yield f"❌ Unexpected Error: {str(e)}\n\nPlease check log.txt for details."
384
+ finally:
385
+ # Prometheus metrics: Record request duration
386
+ REQUEST_DURATION.observe(time.perf_counter() - start_time)
387
 
388
 
389
  # ============================================================================
390
  # Gradio UI Definition
391
  # ============================================================================
392
 
393
+ # Allow Gradio to serve static files from assets directory (requires absolute path)
394
+ ASSETS_DIR_ABSOLUTE = str(Path(__file__).parent / "assets")
395
+ gr.set_static_paths(paths=[ASSETS_DIR_ABSOLUTE])
396
+
397
  with gr.Blocks(theme=TransparentTheme(), css=fancy_css) as demo:
398
  # Title banner
399
  gr.Markdown(f"<h1 id='title' style='text-align: center;'>{TITLE}</h1>")
400
+
401
+ # Chatbot component with custom avatar icons (using forward slashes for web serving)
402
+ # Gradio serves files via HTTP URLs which require forward slashes, not Windows backslashes
403
+ MONSTER_ICON = str((ASSETS_DIR / "monster_icon.png").as_posix())
404
+ BOT_ICON = str((ASSETS_DIR / "smart_confidant_icon.png").as_posix())
405
+ log_debug(f"Monster icon path: {MONSTER_ICON}")
406
+ log_debug(f"Bot icon path: {BOT_ICON}")
407
+
408
  chatbot = gr.Chatbot(
409
  type="messages",
410
+ avatar_images=(MONSTER_ICON, BOT_ICON)
411
  )
412
 
413
  # Collapsible settings panel
 
416
  max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
417
  temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
418
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
419
+ selected_model = gr.Radio(choices=MODEL_OPTIONS, label="Select Model", value=MODEL_OPTIONS[1])
420
 
421
  # Wire up chat interface with response handler
422
  gr.ChatInterface(
 
442
  log_debug(f"Available models: {MODEL_OPTIONS}")
443
  log_debug(f"HF_TOKEN present: {'Yes' if os.environ.get('HF_TOKEN') else 'No'}")
444
  log_debug("="*50)
445
+
446
+ # Start Prometheus metrics server on port 8000
447
+ log_debug("Starting Prometheus metrics server on port 8000")
448
+ start_http_server(8000)
449
+ log_debug("Prometheus metrics server started - available at http://0.0.0.0:8000/metrics")
450
+
451
  # Launch on all interfaces for VM/container deployment, with Gradio share link
452
+ demo.launch(server_name="0.0.0.0", server_port=8012, share=True, allowed_paths=[ASSETS_DIR_ABSOLUTE])
deploy.sh CHANGED
@@ -1,196 +1,286 @@
1
  #! /bin/bash
2
 
 
 
 
 
 
 
 
3
  # Configuration
4
- PORT=22012
5
- MACHINE=paffenroth-23.dyn.wpi.edu
6
- MY_KEY_PATH=$HOME/.ssh/mlopskey # Path to your personal SSH key
7
- STUDENT_ADMIN_KEY_PATH=$HOME/.ssh/student-admin_key # Path to student-admin fallback key
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Load environment variables from .env file if it exists
10
  if [ -f .env ]; then
11
- echo "Loading environment variables from .env file..."
12
  export $(grep -v '^#' .env | xargs)
13
  fi
14
 
15
- # Clean up from previous runs
16
- ssh-keygen -f "$HOME/.ssh/known_hosts" -R "[$MACHINE]:$PORT" 2>/dev/null
17
- rm -rf tmp
 
 
 
 
18
 
19
- # Create a temporary directory
20
- mkdir tmp
 
 
 
 
 
21
 
22
- # Change the permissions of the directory
23
- chmod 700 tmp
 
 
 
 
 
24
 
25
- # Change to the temporary directory
26
- cd tmp
 
 
 
 
 
27
 
28
- echo "Checking if personal key works..."
29
- # Try connecting with personal key
30
- if ssh -i ${MY_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no -o ConnectTimeout=10 student-admin@${MACHINE} "echo 'success'" > /dev/null 2>&1; then
31
- echo "✓ Personal key works! No update needed."
32
- MY_KEY=${MY_KEY_PATH}
33
  else
34
- echo " Personal key failed. Updating with student-admin key..."
35
-
36
- # Check if the keys exist
37
- if [ ! -f "${MY_KEY_PATH}.pub" ]; then
38
- echo "Error: Personal public key not found at ${MY_KEY_PATH}.pub"
39
- echo "Creating a new key pair..."
40
- ssh-keygen -f ${MY_KEY_PATH} -t ed25519 -N ""
41
- fi
42
-
43
- if [ ! -f "${STUDENT_ADMIN_KEY_PATH}" ]; then
44
- echo "Error: Student-admin key not found at ${STUDENT_ADMIN_KEY_PATH}"
45
- exit 1
46
- fi
47
-
48
- # Read the public key content
49
- MY_PUB_KEY=$(cat ${MY_KEY_PATH}.pub)
50
-
51
- # Update authorized_keys on the server using student-admin key
52
- echo "Connecting with student-admin key to update authorized_keys..."
53
- ssh -i ${STUDENT_ADMIN_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE} << EOF
54
- mkdir -p ~/.ssh
55
- chmod 700 ~/.ssh
56
- touch ~/.ssh/authorized_keys
57
- chmod 600 ~/.ssh/authorized_keys
58
- # Remove any old keys from this machine
59
- grep -v 'rcpaffenroth@paffenroth-23' ~/.ssh/authorized_keys > ~/.ssh/authorized_keys.tmp 2>/dev/null || true
60
- mv ~/.ssh/authorized_keys.tmp ~/.ssh/authorized_keys 2>/dev/null || true
61
- # Add the new key
62
- echo '${MY_PUB_KEY}' >> ~/.ssh/authorized_keys
63
- echo 'Key updated'
64
- EOF
65
-
66
- if [ $? -ne 0 ]; then
67
- echo "Failed to update key with student-admin key"
68
- exit 1
69
- fi
70
-
71
- # Verify the personal key now works
72
- echo "Verifying personal key..."
73
- sleep 2
74
-
75
- if ssh -i ${MY_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE} "echo 'success'" > /dev/null 2>&1; then
76
- echo "✓ Success! Personal key is now working."
77
- MY_KEY=${MY_KEY_PATH}
78
- else
79
- echo "✗ Personal key still not working after update"
80
- exit 1
81
- fi
82
  fi
83
 
84
- # Add the key to the ssh-agent
85
- eval "$(ssh-agent -s)"
86
- ssh-add ${MY_KEY}
87
-
88
- # Check the key file on the server
89
- echo "Checking authorized_keys on server:"
90
- ssh -i ${MY_KEY} -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE} "cat ~/.ssh/authorized_keys"
91
-
92
- # Clone or copy the repo
93
- # If using git:
94
- # git clone https://github.com/yourusername/Smart_Confidant
95
- # Or just copy the local directory:
96
- echo "Copying Smart_Confidant code..."
97
- mkdir -p Smart_Confidant
98
- # Copy all files except tmp and .git directories
99
- for item in ../*; do
100
- base=$(basename "$item")
101
- if [ "$base" != "tmp" ] && [ "$base" != ".git" ]; then
102
- cp -r "$item" Smart_Confidant/
103
- fi
104
- done
105
 
106
- # Copy the files to the server
107
- echo "Uploading code to server..."
108
- scp -i ${MY_KEY} -P ${PORT} -o StrictHostKeyChecking=no -r Smart_Confidant student-admin@${MACHINE}:~/
 
 
 
 
109
 
110
- if [ $? -eq 0 ]; then
111
- echo " Code successfully uploaded to server"
 
 
 
 
 
 
 
 
 
 
112
  else
113
- echo " Failed to upload code"
 
114
  exit 1
115
  fi
116
 
117
- # Define SSH command for subsequent steps using the confirmed key
118
- COMMAND="ssh -i ${MY_KEY} -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE}"
 
 
 
 
 
 
 
 
 
 
119
 
120
- # Run all setup in a single SSH session
121
- echo "Setting up environment on remote server..."
122
- # Pass HF_TOKEN to the remote session
123
- ${COMMAND} bash -s << ENDSSH
124
  set -e
125
- export HF_TOKEN='${HF_TOKEN}'
126
-
127
- # Stop old process
128
- echo " Stopping old process if running..."
129
- pkill -f 'python.*app.py' || true
130
-
131
- # Check if micromamba is installed
132
- if [ ! -f ~/bin/micromamba ]; then
133
- echo "→ Installing micromamba..."
134
- curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C ~/ bin/micromamba
135
- mkdir -p ~/micromamba
136
- export MAMBA_ROOT_PREFIX=~/micromamba
137
- echo 'export MAMBA_ROOT_PREFIX=~/micromamba' >> ~/.bashrc
138
- echo 'eval "$(~/bin/micromamba shell hook -s bash)"' >> ~/.bashrc
139
- echo "✓ Micromamba installed"
140
  else
141
- echo " Micromamba already installed"
142
- export MAMBA_ROOT_PREFIX=~/micromamba
143
  fi
144
 
145
- eval "$(~/bin/micromamba shell hook -s bash)" 2>/dev/null || true
 
 
 
 
146
 
147
- cd Smart_Confidant
 
 
 
 
 
 
 
148
 
149
- # Check if environment exists
150
- if ~/bin/micromamba env list | grep -q "smart-confidant"; then
151
- echo "→ Updating existing environment..."
152
- ~/bin/micromamba install -n smart-confidant -f environment.yml -y
153
  else
154
- echo " Creating new environment..."
155
- ~/bin/micromamba create -f environment.yml -y
156
  fi
157
 
158
- # Check if uv is installed
159
- if ! ~/bin/micromamba run -n smart-confidant which uv &>/dev/null; then
160
- echo "→ Installing uv..."
161
- ~/bin/micromamba run -n smart-confidant pip install uv
 
 
 
162
  else
163
- echo " uv already installed"
 
 
164
  fi
165
 
166
- # Install/update dependencies
167
- echo "→ Installing/updating dependencies..."
168
- ~/bin/micromamba run -n smart-confidant uv pip install -e .
 
 
 
 
169
 
170
- # Start application
171
- echo " Starting application..."
172
- # Pass HF_TOKEN if it exists
173
- if [ ! -z "$HF_TOKEN" ]; then
174
- echo "→ HF_TOKEN provided, API models will be available"
175
- nohup ~/bin/micromamba run -n smart-confidant -e HF_TOKEN="$HF_TOKEN" python -u app.py > ~/log.txt 2>&1 &
176
  else
177
- echo " HF_TOKEN not set - API models will not work"
178
- nohup ~/bin/micromamba run -n smart-confidant python -u app.py > ~/log.txt 2>&1 &
179
  fi
180
 
181
- # Wait for the app to start
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  sleep 5
183
 
184
- echo "✓ Setup complete"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  ENDSSH
186
 
187
- # Extract the Gradio share link from the remote log file
188
- SHARE_LINK=$(${COMMAND} "grep -oP 'https://[a-z0-9]+\.gradio\.live' ~/log.txt | tail -1" 2>/dev/null)
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  echo ""
191
  echo "=========================================="
192
- echo "Deployment complete!"
193
- echo "Public Gradio Share Link: ${SHARE_LINK}"
194
- echo "==========================================="
195
-
196
-
 
1
  #! /bin/bash
2
 
3
+ # ============================================================================
4
+ # Smart Confidant - Docker Deployment Script for Melnibone
5
+ # Deploys from laptop to melnibone.wpi.edu using Docker
6
+ # ============================================================================
7
+
8
+ set -e # Exit on error
9
+
10
  # Configuration
11
+ PORT=2222
12
+ MACHINE=melnibone.wpi.edu
13
+ USER=group12
14
+ MY_KEY_PATH=$HOME/.ssh/mlops # Path to your SSH key for melnibone
15
+
16
+ # Docker configuration
17
+ DOCKER_USER=heffnt
18
+ DOCKER_IMAGE=smart_confidant
19
+ DOCKER_TAG=cs3
20
+ FULL_IMAGE_NAME=${DOCKER_USER}/${DOCKER_IMAGE}:${DOCKER_TAG}
21
+
22
+ # Container configuration
23
+ CONTAINER_NAME=smart_confidant
24
+ GRADIO_PORT=2727
25
+ METRICS_PORT=2728
26
+ NODE_EXPORTER_PORT=2729
27
+
28
+ # Colors for output
29
+ RED='\033[0;31m'
30
+ GREEN='\033[0;32m'
31
+ YELLOW='\033[1;33m'
32
+ BLUE='\033[0;34m'
33
+ NC='\033[0m' # No Color
34
+
35
+ # Helper functions
36
+ log_info() {
37
+ echo -e "${BLUE}→${NC} $1"
38
+ }
39
+
40
+ log_success() {
41
+ echo -e "${GREEN}✓${NC} $1"
42
+ }
43
+
44
+ log_warning() {
45
+ echo -e "${YELLOW}⚠${NC} $1"
46
+ }
47
+
48
+ log_error() {
49
+ echo -e "${RED}✗${NC} $1"
50
+ }
51
 
52
  # Load environment variables from .env file if it exists
53
  if [ -f .env ]; then
54
+ log_info "Loading environment variables from .env file..."
55
  export $(grep -v '^#' .env | xargs)
56
  fi
57
 
58
+ # Check if HF_TOKEN is set
59
+ if [ -z "$HF_TOKEN" ]; then
60
+ log_warning "HF_TOKEN not set - API models will not work"
61
+ log_warning "Set it in .env file or export HF_TOKEN=your_token"
62
+ else
63
+ log_success "HF_TOKEN found"
64
+ fi
65
 
66
+ # ============================================================================
67
+ # Step 1: Build Docker Image Locally
68
+ # ============================================================================
69
+ echo ""
70
+ echo "========================================"
71
+ echo "Step 1: Building Docker Image"
72
+ echo "========================================"
73
 
74
+ log_info "Building Docker image: ${FULL_IMAGE_NAME}"
75
+ if docker build -t ${FULL_IMAGE_NAME} .; then
76
+ log_success "Docker image built successfully"
77
+ else
78
+ log_error "Docker build failed"
79
+ exit 1
80
+ fi
81
 
82
+ # ============================================================================
83
+ # Step 2: Push to DockerHub
84
+ # ============================================================================
85
+ echo ""
86
+ echo "========================================"
87
+ echo "Step 2: Pushing to DockerHub"
88
+ echo "========================================"
89
 
90
+ log_info "Checking Docker login status..."
91
+ if docker info 2>/dev/null | grep -q "Username: ${DOCKER_USER}"; then
92
+ log_success "Already logged in to DockerHub as ${DOCKER_USER}"
93
+ elif docker login --username ${DOCKER_USER} 2>/dev/null; then
94
+ log_success "Logged in to DockerHub"
95
  else
96
+ log_error "Not logged in to DockerHub"
97
+ log_info "Please run: docker login --username ${DOCKER_USER}"
98
+ exit 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  fi
100
 
101
+ log_info "Pushing image to DockerHub..."
102
+ if docker push ${FULL_IMAGE_NAME}; then
103
+ log_success "Image pushed to DockerHub"
104
+ else
105
+ log_error "Failed to push image to DockerHub"
106
+ exit 1
107
+ fi
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
+ # ============================================================================
110
+ # Step 3: Verify SSH Access to Melnibone
111
+ # ============================================================================
112
+ echo ""
113
+ echo "========================================"
114
+ echo "Step 3: Verifying SSH Access"
115
+ echo "========================================"
116
 
117
+ # Clean up known_hosts entry for melnibone
118
+ ssh-keygen -f "$HOME/.ssh/known_hosts" -R "[${MACHINE}]:${PORT}" 2>/dev/null || true
119
+
120
+ if [ ! -f "${MY_KEY_PATH}" ]; then
121
+ log_error "SSH key not found at ${MY_KEY_PATH}"
122
+ log_info "Please ensure your SSH key is set up correctly"
123
+ exit 1
124
+ fi
125
+
126
+ log_info "Testing SSH connection to ${USER}@${MACHINE}:${PORT}..."
127
+ if ssh -i ${MY_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no -o ConnectTimeout=10 ${USER}@${MACHINE} "echo 'success'" > /dev/null 2>&1; then
128
+ log_success "SSH connection successful"
129
  else
130
+ log_error "SSH connection failed"
131
+ log_info "Make sure you can connect with: ssh -i ${MY_KEY_PATH} -p ${PORT} ${USER}@${MACHINE}"
132
  exit 1
133
  fi
134
 
135
+ # ============================================================================
136
+ # Step 4: Deploy to Melnibone
137
+ # ============================================================================
138
+ echo ""
139
+ echo "========================================"
140
+ echo "Step 4: Deploying to Melnibone"
141
+ echo "========================================"
142
+
143
+ # Define SSH command
144
+ SSH_CMD="ssh -i ${MY_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no ${USER}@${MACHINE}"
145
+
146
+ log_info "Deploying to remote server..."
147
 
148
+ # Run deployment commands on remote server
149
+ ${SSH_CMD} bash -s << ENDSSH
 
 
150
  set -e
151
+
152
+ echo "→ Pulling Docker image from DockerHub..."
153
+ if docker pull ${FULL_IMAGE_NAME}; then
154
+ echo " Image pulled successfully"
 
 
 
 
 
 
 
 
 
 
 
155
  else
156
+ echo " Failed to pull image"
157
+ exit 1
158
  fi
159
 
160
+ echo " Stopping existing container if running..."
161
+ docker stop ${CONTAINER_NAME} 2>/dev/null || echo " (no container to stop)"
162
+
163
+ echo "→ Removing existing container..."
164
+ docker rm ${CONTAINER_NAME} 2>/dev/null || echo " (no container to remove)"
165
 
166
+ echo "→ Starting new container..."
167
+ docker run -d \\
168
+ --name ${CONTAINER_NAME} \\
169
+ -p ${GRADIO_PORT}:8012 \\
170
+ -p ${METRICS_PORT}:8000 \\
171
+ -p ${NODE_EXPORTER_PORT}:9100 \\
172
+ -e HF_TOKEN="${HF_TOKEN}" \\
173
+ ${FULL_IMAGE_NAME}
174
 
175
+ if [ \$? -eq 0 ]; then
176
+ echo "✓ Container started successfully"
 
 
177
  else
178
+ echo " Failed to start container"
179
+ exit 1
180
  fi
181
 
182
+ # Wait for container to be ready
183
+ echo "→ Waiting for container to start..."
184
+ sleep 5
185
+
186
+ # Verify container is running
187
+ if docker ps | grep -q ${CONTAINER_NAME}; then
188
+ echo "✓ Container is running"
189
  else
190
+ echo " Container failed to start"
191
+ docker logs ${CONTAINER_NAME}
192
+ exit 1
193
  fi
194
 
195
+ # Verify ports are accessible
196
+ echo "→ Verifying ports..."
197
+ curl -s -o /dev/null -w "%{http_code}" http://localhost:${GRADIO_PORT} | grep -q "200" && echo "✓ Gradio port ${GRADIO_PORT} is accessible"
198
+ curl -s http://localhost:${METRICS_PORT}/metrics | grep -q "smart_confidant" && echo "✓ Metrics port ${METRICS_PORT} is accessible"
199
+ curl -s http://localhost:${NODE_EXPORTER_PORT}/metrics | grep -q "node_" && echo "✓ Node exporter port ${NODE_EXPORTER_PORT} is accessible"
200
+
201
+ ENDSSH
202
 
203
+ if [ $? -eq 0 ]; then
204
+ log_success "Deployment to melnibone completed successfully"
 
 
 
 
205
  else
206
+ log_error "Deployment failed"
207
+ exit 1
208
  fi
209
 
210
+ # ============================================================================
211
+ # Step 5: Setup ngrok Tunnel
212
+ # ============================================================================
213
+ echo ""
214
+ echo "========================================"
215
+ echo "Step 5: Setting up ngrok Tunnel"
216
+ echo "========================================"
217
+
218
+ log_info "Setting up ngrok tunnel for global access..."
219
+
220
+ ${SSH_CMD} bash -s << 'ENDSSH'
221
+ set -e
222
+
223
+ # Kill any existing ngrok processes for this port
224
+ echo "→ Stopping existing ngrok tunnels for port 2727..."
225
+ pkill -f "ngrok http 2727" 2>/dev/null || echo " (no existing tunnel to stop)"
226
+ sleep 2
227
+
228
+ # Start new ngrok tunnel
229
+ echo "→ Starting ngrok tunnel..."
230
+ nohup ngrok http 2727 --log=stdout > ~/ngrok_smart_confidant.log 2>&1 &
231
+ NGROK_PID=$!
232
+
233
+ # Wait for ngrok to start
234
  sleep 5
235
 
236
+ # Check if ngrok started successfully
237
+ if ps -p $NGROK_PID > /dev/null 2>&1; then
238
+ echo "✓ ngrok started successfully (PID: $NGROK_PID)"
239
+
240
+ # Extract the ngrok URL from the log
241
+ NGROK_URL=$(grep -o "url=https://[^ ]*" ~/ngrok_smart_confidant.log | head -1 | cut -d'=' -f2)
242
+
243
+ if [ ! -z "$NGROK_URL" ]; then
244
+ echo "NGROK_URL=$NGROK_URL"
245
+ else
246
+ echo "⚠ Could not extract ngrok URL from log"
247
+ echo "Check ~/ngrok_smart_confidant.log on the server"
248
+ fi
249
+ else
250
+ echo "✗ ngrok failed to start"
251
+ cat ~/ngrok_smart_confidant.log
252
+ exit 1
253
+ fi
254
  ENDSSH
255
 
256
+ # Extract the ngrok URL from the SSH output
257
+ NGROK_URL=$(${SSH_CMD} "grep -o 'url=https://[^ ]*' ~/ngrok_smart_confidant.log | head -1 | cut -d'=' -f2")
258
 
259
+ # ============================================================================
260
+ # Deployment Summary
261
+ # ============================================================================
262
+ echo ""
263
+ echo "=========================================="
264
+ echo "🎉 DEPLOYMENT COMPLETE!"
265
+ echo "=========================================="
266
+ echo ""
267
+ echo "Docker Image: ${FULL_IMAGE_NAME}"
268
+ echo "Container Name: ${CONTAINER_NAME}"
269
+ echo ""
270
+ echo "Access URLs:"
271
+ echo " 🌐 Public URL (ngrok): ${NGROK_URL}"
272
+ echo " 🏠 Local Gradio: http://localhost:${GRADIO_PORT}"
273
+ echo " 📊 App Metrics: http://localhost:${METRICS_PORT}/metrics"
274
+ echo " 🖥️ System Metrics: http://localhost:${NODE_EXPORTER_PORT}/metrics"
275
+ echo ""
276
+ echo "Port Mappings:"
277
+ echo " ${GRADIO_PORT} → 8012 (Gradio Interface)"
278
+ echo " ${METRICS_PORT} → 8000 (Application Metrics)"
279
+ echo " ${NODE_EXPORTER_PORT} → 9100 (Node Exporter)"
280
+ echo ""
281
+ echo "Container Management:"
282
+ echo " View logs: ssh -i ${MY_KEY_PATH} -p ${PORT} ${USER}@${MACHINE} 'docker logs ${CONTAINER_NAME}'"
283
+ echo " Stop: ssh -i ${MY_KEY_PATH} -p ${PORT} ${USER}@${MACHINE} 'docker stop ${CONTAINER_NAME}'"
284
+ echo " Restart: ssh -i ${MY_KEY_PATH} -p ${PORT} ${USER}@${MACHINE} 'docker restart ${CONTAINER_NAME}'"
285
  echo ""
286
  echo "=========================================="
 
 
 
 
 
prometheus.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ scrape_interval: 15s
3
+
4
+ scrape_configs:
5
+ - job_name: 'smart_confidant_app'
6
+ static_configs:
7
+ - targets: ['localhost:8000']
8
+
9
+ - job_name: 'node_exporter'
10
+ static_configs:
11
+ - targets: ['localhost:9100']
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.43.0
2
+ huggingface-hub>=0.27.0
3
+ transformers>=4.43.0
4
+ torch>=2.2
5
+ accelerate>=0.33.0
6
+ pydantic>=2.6.0
7
+ psutil>=5.9.0
8
+ sentencepiece>=0.1.99
9
+ protobuf>=3.20.0
10
+ prometheus_client>=0.20
11
+ python-dotenv>=1.0.0
run-local.bat ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ REM ============================================================================
3
+ REM Smart Confidant - Local Development Runner (using uv)
4
+ REM Run the app directly on Windows for fast iteration
5
+ REM ============================================================================
6
+
7
+ echo ========================================
8
+ echo Smart Confidant - Local Development
9
+ echo ========================================
10
+ echo.
11
+
12
+ REM Check if uv is installed
13
+ where uv >nul 2>nul
14
+ if errorlevel 1 (
15
+ echo ERROR: uv is not installed
16
+ echo Install it with: pip install uv
17
+ echo Or visit: https://docs.astral.sh/uv/
18
+ pause
19
+ exit /b 1
20
+ )
21
+
22
+ echo [1/2] Installing dependencies with uv...
23
+ uv pip install -r requirements.txt
24
+ if errorlevel 1 (
25
+ echo ERROR: Failed to install dependencies
26
+ echo Make sure requirements.txt is present
27
+ pause
28
+ exit /b 1
29
+ )
30
+
31
+ echo.
32
+ echo [2/2] Starting Smart Confidant application...
33
+ echo.
34
+ echo ========================================
35
+ echo Application Starting
36
+ echo ========================================
37
+ echo.
38
+ echo Access the app at: http://localhost:8012
39
+ echo Metrics available at: http://localhost:8000/metrics
40
+ echo.
41
+ echo Press Ctrl+C to stop the application
42
+ echo ========================================
43
+ echo.
44
+
45
+ REM Check if .env file exists
46
+ if not exist ".env" (
47
+ echo WARNING: .env file not found
48
+ echo API models will not work without HF_TOKEN
49
+ echo Copy .env.example to .env and add your token
50
+ echo.
51
+ )
52
+
53
+ REM Run the application with uv
54
+ uv run --no-project python app.py
55
+
56
+ REM Keep window open if there's an error
57
+ if errorlevel 1 (
58
+ echo.
59
+ echo ========================================
60
+ echo Application stopped with error
61
+ echo ========================================
62
+ pause
63
+ )