Spaces:

cronjob-python
/

chatbot

Sleeping

App Files Files Community

Soumik555 commited on Sep 14, 2025

Commit

ef34958

1 Parent(s): 5344861

hello

Browse files

Files changed (1) hide show

main.py +278 -15

main.py CHANGED Viewed

@@ -1,27 +1,290 @@
-from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from chat_routes import router as chat_router
-from model_service import load_model
 import threading
 import uvicorn
-from logger import logger
-app = FastAPI(title="FastAPI Chatbot", version="1.0.0")
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"], allow_credentials=True,
-    allow_methods=["*"], allow_headers=["*"],
 )
-app.include_router(chat_router)
 @app.on_event("startup")
-def startup():
-    def load_in_bg():
-        success = load_model()
-        if success: logger.info("Model loaded on startup.")
-        else: logger.error("Model failed to load.")
-    threading.Thread(target=load_in_bg, daemon=True).start()
 if __name__ == "__main__":
-    uvicorn.run("app.main:app", host="0.0.0.0", port=7860, reload=False)

+import os
+from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
+import logging
 import threading
 import uvicorn
+from pathlib import Path
+import time
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# FastAPI app
+app = FastAPI(
+    title="FastAPI Chatbot",
+    description="Chatbot with FastAPI backend",
+    version="1.0.0"
+)
+# Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
+# Pydantic models with fixed namespace conflicts
+class ChatRequest(BaseModel):
+    message: str
+    max_length: int = 100
+    temperature: float = 0.7
+    top_p: float = 0.9
+    class Config:
+        protected_namespaces = ()
+class ChatResponse(BaseModel):
+    response: str
+    model_name: str
+    response_time: float
+    class Config:
+        protected_namespaces = ()
+class HealthResponse(BaseModel):
+    status: str
+    is_model_loaded: bool
+    model_name: str
+    cache_directory: str
+    startup_time: float
+    class Config:
+        protected_namespaces = ()
+# Global variables
+tokenizer = None
+model = None
+generator = None
+startup_time = time.time()
+model_loaded = False
+# Configuration
+MODEL_NAME = os.getenv("MODEL_NAME", "microsoft/DialoGPT-medium")
+CACHE_DIR = os.getenv("TRANSFORMERS_CACHE", "/app/model_cache")
+MAX_LENGTH = int(os.getenv("MAX_LENGTH", "100"))
+DEFAULT_TEMPERATURE = float(os.getenv("DEFAULT_TEMPERATURE", "0.7"))
+def ensure_cache_dir():
+    """Ensure cache directory exists"""
+    Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
+    logger.info(f"Cache directory: {CACHE_DIR}")
+def is_model_cached(model_name: str) -> bool:
+    """Check if model is already cached"""
+    try:
+        model_path = Path(CACHE_DIR) / f"models--{model_name.replace('/', '--')}"
+        is_cached = model_path.exists() and any(model_path.iterdir())
+        logger.info(f"Model cached: {is_cached}")
+        return is_cached
+    except Exception as e:
+        logger.error(f"Error checking cache: {e}")
+        return False
+def load_model():
+    """Load the Hugging Face model with caching"""
+    global tokenizer, model, generator, model_loaded
+    try:
+        ensure_cache_dir()
+        logger.info(f"Loading model: {MODEL_NAME}")
+        logger.info(f"Cache dir: {CACHE_DIR}")
+        logger.info(f"CUDA available: {torch.cuda.is_available()}")
+        start_time = time.time()
+        # Load tokenizer first
+        logger.info("Loading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_NAME,
+            cache_dir=CACHE_DIR,
+            local_files_only=False
+        )
+        # Add padding token if it doesn't exist
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        # Load model
+        logger.info("Loading model...")
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            cache_dir=CACHE_DIR,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None,
+            low_cpu_mem_usage=True,
+            local_files_only=False
+        )
+        # Create text generation pipeline
+        logger.info("Creating pipeline...")
+        device = 0 if torch.cuda.is_available() else -1
+        generator = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            device=device,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        )
+        load_time = time.time() - start_time
+        model_loaded = True
+        logger.info(f"✅ Model loaded successfully in {load_time:.2f} seconds!")
+        logger.info(f"Model device: {model.device}")
+        return True
+    except Exception as e:
+        logger.error(f"❌ Error loading model: {str(e)}", exc_info=True)
+        return False
+def generate_response(message: str, max_length: int = 100, temperature: float = 0.7, top_p: float = 0.9) -> str:
+    """Generate response using the loaded model"""
+    if not generator:
+        return "❌ Model not loaded. Please wait for initialization...", 0.0
+    try:
+        start_time = time.time()
+        # Generate response with parameters
+        response = generator(
+            message,
+            max_length=max_length,
+            temperature=temperature,
+            top_p=top_p,
+            num_return_sequences=1,
+            pad_token_id=tokenizer.eos_token_id,
+            do_sample=True,
+            truncation=True,
+            repetition_penalty=1.1
+        )
+        # Extract generated text
+        generated_text = response[0]['generated_text']
+        # Clean up response
+        if generated_text.startswith(message):
+            bot_response = generated_text[len(message):].strip()
+        else:
+            bot_response = generated_text.strip()
+        # Fallback if empty response
+        if not bot_response:
+            bot_response = "I'm not sure how to respond to that. Could you try rephrasing?"
+        response_time = time.time() - start_time
+        logger.info(f"Generated response in {response_time:.2f}s")
+        return bot_response, response_time
+    except Exception as e:
+        logger.error(f"Error generating response: {str(e)}", exc_info=True)
+        return f"❌ Error generating response: {str(e)}", 0.0
+# FastAPI endpoints
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return {"message": "FastAPI Chatbot API", "status": "running"}
+@app.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Health check endpoint with detailed information"""
+    return HealthResponse(
+        status="healthy" if model_loaded else "initializing",
+        is_model_loaded=model_loaded,
+        model_name=MODEL_NAME,
+        cache_directory=CACHE_DIR,
+        startup_time=time.time() - startup_time
+    )
+@app.post("/chat", response_model=ChatResponse)
+async def chat_endpoint(request: ChatRequest):
+    """Chat endpoint for API access"""
+    if not model_loaded:
+        raise HTTPException(
+            status_code=503,
+            detail="Model not loaded yet. Please wait for initialization."
+        )
+    # Validate input
+    if not request.message.strip():
+        raise HTTPException(status_code=400, detail="Message cannot be empty")
+    if len(request.message) > 1000:
+        raise HTTPException(status_code=400, detail="Message too long (max 1000 characters)")
+    # Generate response
+    response_text, response_time = generate_response(
+        request.message.strip(),
+        request.max_length,
+        request.temperature,
+        request.top_p
+    )
+    return ChatResponse(
+        response=response_text,
+        model_name=MODEL_NAME,
+        response_time=response_time
+    )
+@app.get("/model-info")
+async def get_model_info():
+    """Get detailed model information"""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    if model and hasattr(model, 'device'):
+        device = str(model.device)
+    return {
+        "model_name": MODEL_NAME,
+        "model_loaded": model_loaded,
+        "device": device,
+        "cache_directory": CACHE_DIR,
+        "model_cached": is_model_cached(MODEL_NAME),
+        "parameters": {
+            "max_length": MAX_LENGTH,
+            "default_temperature": DEFAULT_TEMPERATURE
+        }
+    }
 @app.on_event("startup")
+async def startup_event():
+    """Load model on startup"""
+    logger.info("🚀 Starting FastAPI Chatbot...")
+    logger.info("📦 Loading model...")
+    # Load model in background thread to not block startup
+    def load_model_background():
+        global model_loaded
+        model_loaded = load_model()
+        if model_loaded:
+            logger.info("✅ Model loaded successfully!")
+        else:
+            logger.error("❌ Failed to load model.")
+    # Start model loading in background
+    threading.Thread(target=load_model_background, daemon=True).start()
+def run_fastapi():
+    """Run FastAPI server"""
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=7860,  # Changed to 7860 for HuggingFace
+        log_level="info",
+        access_log=True
+    )
 if __name__ == "__main__":
+    run_fastapi()