Soumik555 commited on
Commit
9ef9a4e
Β·
1 Parent(s): 46aca47
Files changed (2) hide show
  1. Dockerfile +11 -8
  2. main.py +71 -70
Dockerfile CHANGED
@@ -4,6 +4,7 @@ FROM python:3.9-slim as builder
4
  # Install build dependencies
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
 
7
  && rm -rf /var/lib/apt/lists/*
8
 
9
  # Copy requirements and install Python dependencies
@@ -26,11 +27,12 @@ WORKDIR /app
26
  ENV PYTHONDONTWRITEBYTECODE=1 \
27
  PYTHONUNBUFFERED=1 \
28
  TRANSFORMERS_CACHE=/app/model_cache \
29
- HF_HOME=/app/hf_cache \
30
- HUGGINGFACE_HUB_CACHE=/app/hf_cache
 
31
 
32
  # Create cache directories with proper permissions
33
- RUN mkdir -p /app/model_cache /app/hf_cache
34
 
35
  # Copy installed packages from builder stage
36
  COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
@@ -41,7 +43,8 @@ COPY . .
41
 
42
  # Create non-root user and set permissions
43
  RUN useradd -m -u 1000 user && \
44
- chown -R user:user /app
 
45
 
46
  USER user
47
 
@@ -49,8 +52,8 @@ USER user
49
  EXPOSE 7860
50
 
51
  # Health check
52
- HEALTHCHECK --interval=30s --timeout=30s --start-period=90s --retries=3 \
53
- CMD curl -f http://localhost:7860/ || curl -f http://localhost:7860/docs || exit 1
54
 
55
- # Run FastAPI application on port 7860 for HuggingFace Spaces
56
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
4
  # Install build dependencies
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
7
+ curl \
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
  # Copy requirements and install Python dependencies
 
27
  ENV PYTHONDONTWRITEBYTECODE=1 \
28
  PYTHONUNBUFFERED=1 \
29
  TRANSFORMERS_CACHE=/app/model_cache \
30
+ HF_HOME=/app/model_cache \
31
+ HUGGINGFACE_HUB_CACHE=/app/model_cache \
32
+ MODEL_NAME=microsoft/DialoGPT-medium
33
 
34
  # Create cache directories with proper permissions
35
+ RUN mkdir -p /app/model_cache && chmod 777 /app/model_cache
36
 
37
  # Copy installed packages from builder stage
38
  COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
 
43
 
44
  # Create non-root user and set permissions
45
  RUN useradd -m -u 1000 user && \
46
+ chown -R user:user /app && \
47
+ chmod 777 /app/model_cache
48
 
49
  USER user
50
 
 
52
  EXPOSE 7860
53
 
54
  # Health check
55
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=3 \
56
+ CMD curl -f http://localhost:7860/health || exit 1
57
 
58
+ # Run FastAPI application
59
+ CMD ["python", "main.py"]
main.py CHANGED
@@ -1,8 +1,7 @@
1
  import os
2
  from fastapi import FastAPI, HTTPException
3
  from fastapi.middleware.cors import CORSMiddleware
4
- from fastapi.staticfiles import StaticFiles
5
- from fastapi.responses import FileResponse, JSONResponse
6
  from pydantic import BaseModel
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
8
  import torch
@@ -22,7 +21,7 @@ logger = logging.getLogger(__name__)
22
  # FastAPI app
23
  app = FastAPI(
24
  title="FastAPI Chatbot",
25
- description="Chatbot with FastAPI backend and Gradio frontend",
26
  version="1.0.0"
27
  )
28
 
@@ -35,30 +34,40 @@ app.add_middleware(
35
  allow_headers=["*"],
36
  )
37
 
38
- # Pydantic models
39
  class ChatRequest(BaseModel):
40
  message: str
41
  max_length: int = 100
42
  temperature: float = 0.7
43
  top_p: float = 0.9
44
 
 
 
 
45
  class ChatResponse(BaseModel):
46
  response: str
47
- model_used: str
48
  response_time: float
49
 
 
 
 
50
  class HealthResponse(BaseModel):
51
  status: str
52
- model_loaded: bool
53
  model_name: str
54
  cache_directory: str
55
  startup_time: float
56
 
 
 
 
57
  # Global variables
58
  tokenizer = None
59
  model = None
60
  generator = None
61
  startup_time = time.time()
 
62
 
63
  # Configuration
64
  MODEL_NAME = os.getenv("MODEL_NAME", "microsoft/DialoGPT-medium")
@@ -73,26 +82,30 @@ def ensure_cache_dir():
73
 
74
  def is_model_cached(model_name: str) -> bool:
75
  """Check if model is already cached"""
76
- model_path = Path(CACHE_DIR) / f"models--{model_name.replace('/', '--')}"
77
- is_cached = model_path.exists() and any(model_path.iterdir())
78
- logger.info(f"Model cached: {is_cached}")
79
- return is_cached
 
 
 
 
80
 
81
  def load_model():
82
  """Load the Hugging Face model with caching"""
83
- global tokenizer, model, generator
84
 
85
  try:
86
  ensure_cache_dir()
87
 
88
- if is_model_cached(MODEL_NAME):
89
- logger.info(f"βœ… Loading cached model: {MODEL_NAME}")
90
- else:
91
- logger.info(f"πŸ“₯ Downloading and caching model: {MODEL_NAME}")
92
 
93
  start_time = time.time()
94
 
95
- # Load tokenizer
 
96
  tokenizer = AutoTokenizer.from_pretrained(
97
  MODEL_NAME,
98
  cache_dir=CACHE_DIR,
@@ -103,16 +116,19 @@ def load_model():
103
  if tokenizer.pad_token is None:
104
  tokenizer.pad_token = tokenizer.eos_token
105
 
106
- # Load model with optimization
 
107
  model = AutoModelForCausalLM.from_pretrained(
108
  MODEL_NAME,
109
  cache_dir=CACHE_DIR,
110
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
 
111
  low_cpu_mem_usage=True,
112
  local_files_only=False
113
  )
114
 
115
  # Create text generation pipeline
 
116
  device = 0 if torch.cuda.is_available() else -1
117
  generator = pipeline(
118
  "text-generation",
@@ -123,18 +139,20 @@ def load_model():
123
  )
124
 
125
  load_time = time.time() - start_time
 
126
  logger.info(f"βœ… Model loaded successfully in {load_time:.2f} seconds!")
 
127
 
128
  return True
129
 
130
  except Exception as e:
131
- logger.error(f"❌ Error loading model: {str(e)}")
132
  return False
133
 
134
  def generate_response(message: str, max_length: int = 100, temperature: float = 0.7, top_p: float = 0.9) -> str:
135
  """Generate response using the loaded model"""
136
  if not generator:
137
- return "❌ Model not loaded. Please wait for initialization..."
138
 
139
  try:
140
  start_time = time.time()
@@ -171,28 +189,21 @@ def generate_response(message: str, max_length: int = 100, temperature: float =
171
  return bot_response, response_time
172
 
173
  except Exception as e:
174
- logger.error(f"Error generating response: {str(e)}")
175
  return f"❌ Error generating response: {str(e)}", 0.0
176
 
177
  # FastAPI endpoints
178
- @app.get("/", response_class=FileResponse)
179
- async def serve_frontend():
180
- """Serve the frontend HTML file"""
181
- html_path = Path("static/index.html")
182
- if html_path.exists():
183
- return FileResponse("static/index.html")
184
- else:
185
- return JSONResponse(
186
- content={"message": "Frontend not available. Use /docs for API documentation."},
187
- status_code=200
188
- )
189
 
190
  @app.get("/health", response_model=HealthResponse)
191
  async def health_check():
192
  """Health check endpoint with detailed information"""
193
  return HealthResponse(
194
- status="healthy" if model is not None else "initializing",
195
- model_loaded=model is not None,
196
  model_name=MODEL_NAME,
197
  cache_directory=CACHE_DIR,
198
  startup_time=time.time() - startup_time
@@ -201,7 +212,7 @@ async def health_check():
201
  @app.post("/chat", response_model=ChatResponse)
202
  async def chat_endpoint(request: ChatRequest):
203
  """Chat endpoint for API access"""
204
- if not generator:
205
  raise HTTPException(
206
  status_code=503,
207
  detail="Model not loaded yet. Please wait for initialization."
@@ -224,17 +235,21 @@ async def chat_endpoint(request: ChatRequest):
224
 
225
  return ChatResponse(
226
  response=response_text,
227
- model_used=MODEL_NAME,
228
  response_time=response_time
229
  )
230
 
231
  @app.get("/model-info")
232
  async def get_model_info():
233
  """Get detailed model information"""
 
 
 
 
234
  return {
235
  "model_name": MODEL_NAME,
236
- "model_loaded": model is not None,
237
- "device": "cuda" if torch.cuda.is_available() else "cpu",
238
  "cache_directory": CACHE_DIR,
239
  "model_cached": is_model_cached(MODEL_NAME),
240
  "parameters": {
@@ -243,47 +258,33 @@ async def get_model_info():
243
  }
244
  }
245
 
246
- @app.get("/status")
247
- async def get_status():
248
- """Get current application status"""
249
- return {
250
- "status": "running",
251
- "model_ready": model is not None,
252
- "uptime": time.time() - startup_time,
253
- "endpoints": ["/", "/health", "/chat", "/model-info", "/docs"]
254
- }
 
 
 
 
 
 
 
 
255
 
256
  def run_fastapi():
257
  """Run FastAPI server"""
258
  uvicorn.run(
259
  app,
260
  host="0.0.0.0",
261
- port=8000,
262
  log_level="info",
263
  access_log=True
264
  )
265
 
266
- def main():
267
- """Main function to run both FastAPI and Gradio"""
268
- logger.info("πŸš€ Starting FastAPI Chatbot...")
269
-
270
- # Load model first
271
- logger.info("πŸ“¦ Loading model...")
272
- model_loaded = load_model()
273
-
274
- if not model_loaded:
275
- logger.error("❌ Failed to load model. Exiting...")
276
- return
277
-
278
- logger.info("βœ… Model loaded successfully!")
279
-
280
- # Start FastAPI server in a separate thread
281
- logger.info("🌐 Starting FastAPI server...")
282
- fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
283
- fastapi_thread.start()
284
-
285
  if __name__ == "__main__":
286
- main()
287
-
288
-
289
-
 
1
  import os
2
  from fastapi import FastAPI, HTTPException
3
  from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.responses import JSONResponse
 
5
  from pydantic import BaseModel
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
  import torch
 
21
  # FastAPI app
22
  app = FastAPI(
23
  title="FastAPI Chatbot",
24
+ description="Chatbot with FastAPI backend",
25
  version="1.0.0"
26
  )
27
 
 
34
  allow_headers=["*"],
35
  )
36
 
37
+ # Pydantic models with fixed namespace conflicts
38
  class ChatRequest(BaseModel):
39
  message: str
40
  max_length: int = 100
41
  temperature: float = 0.7
42
  top_p: float = 0.9
43
 
44
+ class Config:
45
+ protected_namespaces = ()
46
+
47
  class ChatResponse(BaseModel):
48
  response: str
49
+ model_name: str
50
  response_time: float
51
 
52
+ class Config:
53
+ protected_namespaces = ()
54
+
55
  class HealthResponse(BaseModel):
56
  status: str
57
+ is_model_loaded: bool
58
  model_name: str
59
  cache_directory: str
60
  startup_time: float
61
 
62
+ class Config:
63
+ protected_namespaces = ()
64
+
65
  # Global variables
66
  tokenizer = None
67
  model = None
68
  generator = None
69
  startup_time = time.time()
70
+ model_loaded = False
71
 
72
  # Configuration
73
  MODEL_NAME = os.getenv("MODEL_NAME", "microsoft/DialoGPT-medium")
 
82
 
83
  def is_model_cached(model_name: str) -> bool:
84
  """Check if model is already cached"""
85
+ try:
86
+ model_path = Path(CACHE_DIR) / f"models--{model_name.replace('/', '--')}"
87
+ is_cached = model_path.exists() and any(model_path.iterdir())
88
+ logger.info(f"Model cached: {is_cached}")
89
+ return is_cached
90
+ except Exception as e:
91
+ logger.error(f"Error checking cache: {e}")
92
+ return False
93
 
94
  def load_model():
95
  """Load the Hugging Face model with caching"""
96
+ global tokenizer, model, generator, model_loaded
97
 
98
  try:
99
  ensure_cache_dir()
100
 
101
+ logger.info(f"Loading model: {MODEL_NAME}")
102
+ logger.info(f"Cache dir: {CACHE_DIR}")
103
+ logger.info(f"CUDA available: {torch.cuda.is_available()}")
 
104
 
105
  start_time = time.time()
106
 
107
+ # Load tokenizer first
108
+ logger.info("Loading tokenizer...")
109
  tokenizer = AutoTokenizer.from_pretrained(
110
  MODEL_NAME,
111
  cache_dir=CACHE_DIR,
 
116
  if tokenizer.pad_token is None:
117
  tokenizer.pad_token = tokenizer.eos_token
118
 
119
+ # Load model
120
+ logger.info("Loading model...")
121
  model = AutoModelForCausalLM.from_pretrained(
122
  MODEL_NAME,
123
  cache_dir=CACHE_DIR,
124
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
125
+ device_map="auto" if torch.cuda.is_available() else None,
126
  low_cpu_mem_usage=True,
127
  local_files_only=False
128
  )
129
 
130
  # Create text generation pipeline
131
+ logger.info("Creating pipeline...")
132
  device = 0 if torch.cuda.is_available() else -1
133
  generator = pipeline(
134
  "text-generation",
 
139
  )
140
 
141
  load_time = time.time() - start_time
142
+ model_loaded = True
143
  logger.info(f"βœ… Model loaded successfully in {load_time:.2f} seconds!")
144
+ logger.info(f"Model device: {model.device}")
145
 
146
  return True
147
 
148
  except Exception as e:
149
+ logger.error(f"❌ Error loading model: {str(e)}", exc_info=True)
150
  return False
151
 
152
  def generate_response(message: str, max_length: int = 100, temperature: float = 0.7, top_p: float = 0.9) -> str:
153
  """Generate response using the loaded model"""
154
  if not generator:
155
+ return "❌ Model not loaded. Please wait for initialization...", 0.0
156
 
157
  try:
158
  start_time = time.time()
 
189
  return bot_response, response_time
190
 
191
  except Exception as e:
192
+ logger.error(f"Error generating response: {str(e)}", exc_info=True)
193
  return f"❌ Error generating response: {str(e)}", 0.0
194
 
195
  # FastAPI endpoints
196
+ @app.get("/")
197
+ async def root():
198
+ """Root endpoint"""
199
+ return {"message": "FastAPI Chatbot API", "status": "running"}
 
 
 
 
 
 
 
200
 
201
  @app.get("/health", response_model=HealthResponse)
202
  async def health_check():
203
  """Health check endpoint with detailed information"""
204
  return HealthResponse(
205
+ status="healthy" if model_loaded else "initializing",
206
+ is_model_loaded=model_loaded,
207
  model_name=MODEL_NAME,
208
  cache_directory=CACHE_DIR,
209
  startup_time=time.time() - startup_time
 
212
  @app.post("/chat", response_model=ChatResponse)
213
  async def chat_endpoint(request: ChatRequest):
214
  """Chat endpoint for API access"""
215
+ if not model_loaded:
216
  raise HTTPException(
217
  status_code=503,
218
  detail="Model not loaded yet. Please wait for initialization."
 
235
 
236
  return ChatResponse(
237
  response=response_text,
238
+ model_name=MODEL_NAME,
239
  response_time=response_time
240
  )
241
 
242
  @app.get("/model-info")
243
  async def get_model_info():
244
  """Get detailed model information"""
245
+ device = "cuda" if torch.cuda.is_available() else "cpu"
246
+ if model and hasattr(model, 'device'):
247
+ device = str(model.device)
248
+
249
  return {
250
  "model_name": MODEL_NAME,
251
+ "model_loaded": model_loaded,
252
+ "device": device,
253
  "cache_directory": CACHE_DIR,
254
  "model_cached": is_model_cached(MODEL_NAME),
255
  "parameters": {
 
258
  }
259
  }
260
 
261
+ @app.on_event("startup")
262
+ async def startup_event():
263
+ """Load model on startup"""
264
+ logger.info("πŸš€ Starting FastAPI Chatbot...")
265
+ logger.info("πŸ“¦ Loading model...")
266
+
267
+ # Load model in background thread to not block startup
268
+ def load_model_background():
269
+ global model_loaded
270
+ model_loaded = load_model()
271
+ if model_loaded:
272
+ logger.info("βœ… Model loaded successfully!")
273
+ else:
274
+ logger.error("❌ Failed to load model.")
275
+
276
+ # Start model loading in background
277
+ threading.Thread(target=load_model_background, daemon=True).start()
278
 
279
  def run_fastapi():
280
  """Run FastAPI server"""
281
  uvicorn.run(
282
  app,
283
  host="0.0.0.0",
284
+ port=7860, # Changed to 7860 for HuggingFace
285
  log_level="info",
286
  access_log=True
287
  )
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  if __name__ == "__main__":
290
+ run_fastapi()