Spaces:

cronjob-python
/

chatbot

Sleeping

App Files Files Community

Soumik555 commited on Sep 14, 2025

Commit

9ef9a4e

1 Parent(s): 46aca47

hello

Browse files

Files changed (2) hide show

Dockerfile +11 -8
main.py +71 -70

Dockerfile CHANGED Viewed

@@ -4,6 +4,7 @@ FROM python:3.9-slim as builder
 # Install build dependencies
 RUN apt-get update && apt-get install -y \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
 # Copy requirements and install Python dependencies
@@ -26,11 +27,12 @@ WORKDIR /app
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     TRANSFORMERS_CACHE=/app/model_cache \
-    HF_HOME=/app/hf_cache \
-    HUGGINGFACE_HUB_CACHE=/app/hf_cache
 # Create cache directories with proper permissions
-RUN mkdir -p /app/model_cache /app/hf_cache
 # Copy installed packages from builder stage
 COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
@@ -41,7 +43,8 @@ COPY . .
 # Create non-root user and set permissions
 RUN useradd -m -u 1000 user && \
-    chown -R user:user /app
 USER user
@@ -49,8 +52,8 @@ USER user
 EXPOSE 7860
 # Health check
-HEALTHCHECK --interval=30s --timeout=30s --start-period=90s --retries=3 \
-    CMD curl -f http://localhost:7860/ || curl -f http://localhost:7860/docs || exit 1
-# Run FastAPI application on port 7860 for HuggingFace Spaces
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 # Install build dependencies
 RUN apt-get update && apt-get install -y \
     build-essential \
+    curl \
     && rm -rf /var/lib/apt/lists/*
 # Copy requirements and install Python dependencies
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     TRANSFORMERS_CACHE=/app/model_cache \
+    HF_HOME=/app/model_cache \
+    HUGGINGFACE_HUB_CACHE=/app/model_cache \
+    MODEL_NAME=microsoft/DialoGPT-medium
 # Create cache directories with proper permissions
+RUN mkdir -p /app/model_cache && chmod 777 /app/model_cache
 # Copy installed packages from builder stage
 COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
 # Create non-root user and set permissions
 RUN useradd -m -u 1000 user && \
+    chown -R user:user /app && \
+    chmod 777 /app/model_cache
 USER user
 EXPOSE 7860
 # Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+# Run FastAPI application
+CMD ["python", "main.py"]

main.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import os
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.staticfiles import StaticFiles
-from fastapi.responses import FileResponse, JSONResponse
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
@@ -22,7 +21,7 @@ logger = logging.getLogger(__name__)
 # FastAPI app
 app = FastAPI(
     title="FastAPI Chatbot",
-    description="Chatbot with FastAPI backend and Gradio frontend",
     version="1.0.0"
 )
@@ -35,30 +34,40 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Pydantic models
 class ChatRequest(BaseModel):
     message: str
     max_length: int = 100
     temperature: float = 0.7
     top_p: float = 0.9
 class ChatResponse(BaseModel):
     response: str
-    model_used: str
     response_time: float
 class HealthResponse(BaseModel):
     status: str
-    model_loaded: bool
     model_name: str
     cache_directory: str
     startup_time: float
 # Global variables
 tokenizer = None
 model = None
 generator = None
 startup_time = time.time()
 # Configuration
 MODEL_NAME = os.getenv("MODEL_NAME", "microsoft/DialoGPT-medium")
@@ -73,26 +82,30 @@ def ensure_cache_dir():
 def is_model_cached(model_name: str) -> bool:
     """Check if model is already cached"""
-    model_path = Path(CACHE_DIR) / f"models--{model_name.replace('/', '--')}"
-    is_cached = model_path.exists() and any(model_path.iterdir())
-    logger.info(f"Model cached: {is_cached}")
-    return is_cached
 def load_model():
     """Load the Hugging Face model with caching"""
-    global tokenizer, model, generator
     try:
         ensure_cache_dir()
-        if is_model_cached(MODEL_NAME):
-            logger.info(f"✅ Loading cached model: {MODEL_NAME}")
-        else:
-            logger.info(f"📥 Downloading and caching model: {MODEL_NAME}")
         start_time = time.time()
-        # Load tokenizer
         tokenizer = AutoTokenizer.from_pretrained(
             MODEL_NAME,
             cache_dir=CACHE_DIR,
@@ -103,16 +116,19 @@ def load_model():
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
-        # Load model with optimization
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_NAME,
             cache_dir=CACHE_DIR,
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             low_cpu_mem_usage=True,
             local_files_only=False
         )
         # Create text generation pipeline
         device = 0 if torch.cuda.is_available() else -1
         generator = pipeline(
             "text-generation",
@@ -123,18 +139,20 @@ def load_model():
         )
         load_time = time.time() - start_time
         logger.info(f"✅ Model loaded successfully in {load_time:.2f} seconds!")
         return True
     except Exception as e:
-        logger.error(f"❌ Error loading model: {str(e)}")
         return False
 def generate_response(message: str, max_length: int = 100, temperature: float = 0.7, top_p: float = 0.9) -> str:
     """Generate response using the loaded model"""
     if not generator:
-        return "❌ Model not loaded. Please wait for initialization..."
     try:
         start_time = time.time()
@@ -171,28 +189,21 @@ def generate_response(message: str, max_length: int = 100, temperature: float =
         return bot_response, response_time
     except Exception as e:
-        logger.error(f"Error generating response: {str(e)}")
         return f"❌ Error generating response: {str(e)}", 0.0
 # FastAPI endpoints
-@app.get("/", response_class=FileResponse)
-async def serve_frontend():
-    """Serve the frontend HTML file"""
-    html_path = Path("static/index.html")
-    if html_path.exists():
-        return FileResponse("static/index.html")
-    else:
-        return JSONResponse(
-            content={"message": "Frontend not available. Use /docs for API documentation."},
-            status_code=200
-        )
 @app.get("/health", response_model=HealthResponse)
 async def health_check():
     """Health check endpoint with detailed information"""
     return HealthResponse(
-        status="healthy" if model is not None else "initializing",
-        model_loaded=model is not None,
         model_name=MODEL_NAME,
         cache_directory=CACHE_DIR,
         startup_time=time.time() - startup_time
@@ -201,7 +212,7 @@ async def health_check():
 @app.post("/chat", response_model=ChatResponse)
 async def chat_endpoint(request: ChatRequest):
     """Chat endpoint for API access"""
-    if not generator:
         raise HTTPException(
             status_code=503,
             detail="Model not loaded yet. Please wait for initialization."
@@ -224,17 +235,21 @@ async def chat_endpoint(request: ChatRequest):
     return ChatResponse(
         response=response_text,
-        model_used=MODEL_NAME,
         response_time=response_time
     )
 @app.get("/model-info")
 async def get_model_info():
     """Get detailed model information"""
     return {
         "model_name": MODEL_NAME,
-        "model_loaded": model is not None,
-        "device": "cuda" if torch.cuda.is_available() else "cpu",
         "cache_directory": CACHE_DIR,
         "model_cached": is_model_cached(MODEL_NAME),
         "parameters": {
@@ -243,47 +258,33 @@ async def get_model_info():
         }
     }
-@app.get("/status")
-async def get_status():
-    """Get current application status"""
-    return {
-        "status": "running",
-        "model_ready": model is not None,
-        "uptime": time.time() - startup_time,
-        "endpoints": ["/", "/health", "/chat", "/model-info", "/docs"]
-    }
 def run_fastapi():
     """Run FastAPI server"""
     uvicorn.run(
         app,
         host="0.0.0.0",
-        port=8000,
         log_level="info",
         access_log=True
     )
-def main():
-    """Main function to run both FastAPI and Gradio"""
-    logger.info("🚀 Starting FastAPI Chatbot...")
-    # Load model first
-    logger.info("📦 Loading model...")
-    model_loaded = load_model()
-    if not model_loaded:
-        logger.error("❌ Failed to load model. Exiting...")
-        return
-    logger.info("✅ Model loaded successfully!")
-    # Start FastAPI server in a separate thread
-    logger.info("🌐 Starting FastAPI server...")
-    fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
-    fastapi_thread.start()
 if __name__ == "__main__":
-    main()

 import os
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 # FastAPI app
 app = FastAPI(
     title="FastAPI Chatbot",
+    description="Chatbot with FastAPI backend",
     version="1.0.0"
 )
     allow_headers=["*"],
 )
+# Pydantic models with fixed namespace conflicts
 class ChatRequest(BaseModel):
     message: str
     max_length: int = 100
     temperature: float = 0.7
     top_p: float = 0.9
+    class Config:
+        protected_namespaces = ()
 class ChatResponse(BaseModel):
     response: str
+    model_name: str
     response_time: float
+    class Config:
+        protected_namespaces = ()
 class HealthResponse(BaseModel):
     status: str
+    is_model_loaded: bool
     model_name: str
     cache_directory: str
     startup_time: float
+    class Config:
+        protected_namespaces = ()
 # Global variables
 tokenizer = None
 model = None
 generator = None
 startup_time = time.time()
+model_loaded = False
 # Configuration
 MODEL_NAME = os.getenv("MODEL_NAME", "microsoft/DialoGPT-medium")
 def is_model_cached(model_name: str) -> bool:
     """Check if model is already cached"""
+    try:
+        model_path = Path(CACHE_DIR) / f"models--{model_name.replace('/', '--')}"
+        is_cached = model_path.exists() and any(model_path.iterdir())
+        logger.info(f"Model cached: {is_cached}")
+        return is_cached
+    except Exception as e:
+        logger.error(f"Error checking cache: {e}")
+        return False
 def load_model():
     """Load the Hugging Face model with caching"""
+    global tokenizer, model, generator, model_loaded
     try:
         ensure_cache_dir()
+        logger.info(f"Loading model: {MODEL_NAME}")
+        logger.info(f"Cache dir: {CACHE_DIR}")
+        logger.info(f"CUDA available: {torch.cuda.is_available()}")
         start_time = time.time()
+        # Load tokenizer first
+        logger.info("Loading tokenizer...")
         tokenizer = AutoTokenizer.from_pretrained(
             MODEL_NAME,
             cache_dir=CACHE_DIR,
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
+        # Load model
+        logger.info("Loading model...")
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_NAME,
             cache_dir=CACHE_DIR,
             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None,
             low_cpu_mem_usage=True,
             local_files_only=False
         )
         # Create text generation pipeline
+        logger.info("Creating pipeline...")
         device = 0 if torch.cuda.is_available() else -1
         generator = pipeline(
             "text-generation",
         )
         load_time = time.time() - start_time
+        model_loaded = True
         logger.info(f"✅ Model loaded successfully in {load_time:.2f} seconds!")
+        logger.info(f"Model device: {model.device}")
         return True
     except Exception as e:
+        logger.error(f"❌ Error loading model: {str(e)}", exc_info=True)
         return False
 def generate_response(message: str, max_length: int = 100, temperature: float = 0.7, top_p: float = 0.9) -> str:
     """Generate response using the loaded model"""
     if not generator:
+        return "❌ Model not loaded. Please wait for initialization...", 0.0
     try:
         start_time = time.time()
         return bot_response, response_time
     except Exception as e:
+        logger.error(f"Error generating response: {str(e)}", exc_info=True)
         return f"❌ Error generating response: {str(e)}", 0.0
 # FastAPI endpoints
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return {"message": "FastAPI Chatbot API", "status": "running"}
 @app.get("/health", response_model=HealthResponse)
 async def health_check():
     """Health check endpoint with detailed information"""
     return HealthResponse(
+        status="healthy" if model_loaded else "initializing",
+        is_model_loaded=model_loaded,
         model_name=MODEL_NAME,
         cache_directory=CACHE_DIR,
         startup_time=time.time() - startup_time
 @app.post("/chat", response_model=ChatResponse)
 async def chat_endpoint(request: ChatRequest):
     """Chat endpoint for API access"""
+    if not model_loaded:
         raise HTTPException(
             status_code=503,
             detail="Model not loaded yet. Please wait for initialization."
     return ChatResponse(
         response=response_text,
+        model_name=MODEL_NAME,
         response_time=response_time
     )
 @app.get("/model-info")
 async def get_model_info():
     """Get detailed model information"""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    if model and hasattr(model, 'device'):
+        device = str(model.device)
     return {
         "model_name": MODEL_NAME,
+        "model_loaded": model_loaded,
+        "device": device,
         "cache_directory": CACHE_DIR,
         "model_cached": is_model_cached(MODEL_NAME),
         "parameters": {
         }
     }
+@app.on_event("startup")
+async def startup_event():
+    """Load model on startup"""
+    logger.info("🚀 Starting FastAPI Chatbot...")
+    logger.info("📦 Loading model...")
+    # Load model in background thread to not block startup
+    def load_model_background():
+        global model_loaded
+        model_loaded = load_model()
+        if model_loaded:
+            logger.info("✅ Model loaded successfully!")
+        else:
+            logger.error("❌ Failed to load model.")
+    # Start model loading in background
+    threading.Thread(target=load_model_background, daemon=True).start()
 def run_fastapi():
     """Run FastAPI server"""
     uvicorn.run(
         app,
         host="0.0.0.0",
+        port=7860,  # Changed to 7860 for HuggingFace
         log_level="info",
         access_log=True
     )
 if __name__ == "__main__":
+    run_fastapi()