Spaces:

gaidasalsaa
/

xstress-api

Runtime error

App Files Files Community

gaidasalsaa commited on 23 days ago

Commit

e73b762

1 Parent(s): b38b1f8

Added Dockerfile

Browse files

Files changed (3) hide show

Dockerfile +34 -0
README.md +14 -2
app.py +214 -132

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# Use official Python runtime
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first (for better caching)
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY app.py .
+# Expose port 7860 (required by HF Spaces)
+EXPOSE 7860
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+# Run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

README.md CHANGED Viewed

@@ -7,6 +7,18 @@ sdk: docker
 pinned: false
 ---
-# Stress Detection API
-Deteksi tingkat stress dari postingan Twitter menggunakan IndoBERTweet.

 pinned: false
 ---
+# Twitter Stress Detection API
+API untuk mendeteksi tingkat stress dari postingan Twitter menggunakan model IndoBERTweet.
+## Endpoints
+- `GET /` - Info API
+- `GET /health` - Health check
+- `GET /analyze/{username}` - Analyze user stress level
+- `GET /docs` - Interactive API documentation
+## Usage
+```bash
+curl https://your-space.hf.space/analyze/username
+```

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import Optional
 import requests
@@ -7,6 +8,11 @@ from transformers import AutoTokenizer, BertForSequenceClassification
 from huggingface_hub import hf_hub_download
 import os
 import gc
 # -----------------------------
 # CONFIG
@@ -21,217 +27,281 @@ BEARER_TOKEN = os.getenv(
 )
 # -----------------------------
-# FASTAPI (Initialize FIRST)
 # -----------------------------
 app = FastAPI(
     title="Stress Detection API",
-    description="Detect stress levels from X(Twitter) user posts",
-    version="1.0.0"
 )
-# Global variables untuk lazy loading
 model = None
 tokenizer = None
 device = None
 class StressResponse(BaseModel):
     message: str
     data: Optional[dict] = None
 # -----------------------------
-# LAZY LOAD MODEL
 # -----------------------------
 def load_model_once():
-    """Load model hanya sekali saat pertama kali dipanggil"""
-    global model, tokenizer, device
-    if model is not None:
-        return  # Sudah di-load
-    print("Loading model (first time only)...")
-    # Set device
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"Using device: {device}")
-    # 1. Load tokenizer
-    print("Loading tokenizer...")
-    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
-    # 2. Download .pth file
-    print(f"⬇Downloading {PT_FILE}...")
-    model_path = hf_hub_download(
-        repo_id=HF_MODEL_REPO,
-        filename=PT_FILE
-    )
-    print(f"Downloaded to: {model_path}")
-    # 3. Load base model dengan optimasi memory
-    print("Loading base model...")
-    model = BertForSequenceClassification.from_pretrained(
-        BASE_MODEL,
-        num_labels=2,
-        low_cpu_mem_usage=True,
-        torch_dtype=torch.float16 if device == "cuda" else torch.float32
-    )
-    # 4. Load fine-tuned weights
-    print("Loading fine-tuned weights...")
-    state_dict = torch.load(model_path, map_location=device)
-    model.load_state_dict(state_dict, strict=False)
-    # 5. Move to device dan set eval mode
-    model.to(device)
-    model.eval()
-    # 6. Clear cache
-    gc.collect()
-    if device == "cuda":
-        torch.cuda.empty_cache()
-    print("Model loaded successfully!")
 # -----------------------------
 # HELPER FUNCTIONS
 # -----------------------------
-def get_user_id(username):
     """Get Twitter user ID from username"""
     url = f"https://api.x.com/2/users/by/username/{username}"
     headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
     try:
-        r = requests.get(url, headers=headers, timeout=10)
-        r.raise_for_status()
-        return r.json()["data"]["id"], None
-    except Exception as e:
-        return None, {"error": str(e)}
-def fetch_tweets(user_id, limit=25):
-    """Fetch user's recent tweets"""
     url = f"https://api.x.com/2/users/{user_id}/tweets"
-    params = {"max_results": limit, "tweet.fields": "id,text,created_at"}
     headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
     try:
-        r = requests.get(url, headers=headers, params=params, timeout=10)
-        r.raise_for_status()
-        tweets = r.json().get("data", [])
-        return [t["text"] for t in tweets], None
-    except Exception as e:
-        return None, {"error": str(e)}
-def predict_stress(text):
     """Predict stress level from text"""
-    inputs = tokenizer(
-        text,
-        return_tensors="pt",
-        truncation=True,
-        padding=True,
-        max_length=128
-    ).to(device)
-    with torch.no_grad():
-        outputs = model(**inputs)
-        probs = torch.softmax(outputs.logits, dim=1)[0]
-    label = torch.argmax(probs).item()
-    return label, float(probs[1])
 # -----------------------------
-# API ENDPOINTS
 # -----------------------------
 @app.on_event("startup")
 async def startup_event():
-    """Load model saat aplikasi start"""
-    print("Starting application...")
     load_model_once()
-    print("Application ready!")
-@app.get("/")
-def root():
-    """Health check endpoint"""
     return {
         "status": "online",
-        "message": "Stress Detection API is running",
-        "model_loaded": model is not None
-    }
-@app.get("/health")
-def health():
-    """Detailed health check"""
-    return {
-        "status": "healthy",
-        "model_loaded": model is not None,
-        "device": str(device) if device else "not loaded",
-        "tokenizer_loaded": tokenizer is not None
     }
 @app.get("/analyze/{username}", response_model=StressResponse)
-def analyze(username: str):
-    """Analyze stress level from user's tweets"""
-    # Pastikan model sudah loaded
-    if model is None:
         load_model_once()
     # 1. Get user ID
     user_id, error = get_user_id(username)
     if error:
-        return StressResponse(
-            message=f"Failed to fetch user profile: {error.get('error', 'Unknown error')}",
-            data=None
         )
     # 2. Fetch tweets
     tweets, error = fetch_tweets(user_id)
     if error:
-        return StressResponse(
-            message=f"Failed to fetch tweets: {error.get('error', 'Unknown error')}",
-            data=None
         )
     if not tweets:
         return StressResponse(
-            message="User has no tweets or account is protected.",
             data=None
         )
-    # 3. Predict stress for each tweet
     labels = []
-    for tweet in tweets:
         try:
-            label, _ = predict_stress(tweet)
             labels.append(label)
         except Exception as e:
-            print(f"Skipping tweet due to error: {e}")
             continue
     if not labels:
-        return StressResponse(
-            message="Failed to analyze tweets.",
-            data=None
         )
-    # 4. Calculate stress statistics
     stress_percentage = round(sum(labels) / len(labels) * 100, 2)
     # Determine stress status
     if stress_percentage <= 25:
-        status = 0  # Low
     elif stress_percentage <= 50:
-        status = 1  # Medium
     elif stress_percentage <= 75:
-        status = 2  # High
     else:
-        status = 3  # Very High
     return StressResponse(
         message="Analysis successful",
@@ -240,13 +310,25 @@ def analyze(username: str):
             "total_tweets": len(tweets),
             "analyzed_tweets": len(labels),
             "stress_level": stress_percentage,
-            "stress_status": status
         }
     )
 # -----------------------------
-# RUN (untuk local testing)
 # -----------------------------
 if __name__ == "__main__":
     import uvicorn

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import Optional
 import requests
 from huggingface_hub import hf_hub_download
 import os
 import gc
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # -----------------------------
 # CONFIG
 )
 # -----------------------------
+# FASTAPI APP
 # -----------------------------
 app = FastAPI(
     title="Stress Detection API",
+    description="Detect stress levels from X(Twitter) user posts using IndoBERTweet",
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc"
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
+# Global variables
 model = None
 tokenizer = None
 device = None
+model_loaded = False
+# -----------------------------
+# MODELS
+# -----------------------------
 class StressResponse(BaseModel):
     message: str
     data: Optional[dict] = None
+class HealthResponse(BaseModel):
+    status: str
+    model_loaded: bool
+    device: Optional[str] = None
 # -----------------------------
+# MODEL LOADING
 # -----------------------------
 def load_model_once():
+    """Load model only once at startup"""
+    global model, tokenizer, device, model_loaded
+    if model_loaded:
+        logger.info("Model already loaded, skipping...")
+        return
+    try:
+        logger.info("🔄 Starting model loading...")
+        # Set device
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"📱 Using device: {device}")
+        # Load tokenizer
+        logger.info("📝 Loading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+        logger.info("✅ Tokenizer loaded")
+        # Download model weights
+        logger.info(f"⬇️  Downloading {PT_FILE}...")
+        model_path = hf_hub_download(
+            repo_id=HF_MODEL_REPO,
+            filename=PT_FILE
+        )
+        logger.info(f"✅ Model file downloaded: {model_path}")
+        # Load base model
+        logger.info("🧠 Loading base model architecture...")
+        model = BertForSequenceClassification.from_pretrained(
+            BASE_MODEL,
+            num_labels=2,
+            low_cpu_mem_usage=True,
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32
+        )
+        logger.info("✅ Base model loaded")
+        # Load fine-tuned weights
+        logger.info("🔧 Loading fine-tuned weights...")
+        state_dict = torch.load(model_path, map_location=device)
+        model.load_state_dict(state_dict, strict=False)
+        logger.info("✅ Weights loaded")
+        # Move to device and set eval mode
+        model.to(device)
+        model.eval()
+        logger.info(f"✅ Model moved to {device} and set to eval mode")
+        # Clear memory
+        gc.collect()
+        if device == "cuda":
+            torch.cuda.empty_cache()
+        model_loaded = True
+        logger.info("✅ Model loading complete!")
+    except Exception as e:
+        logger.error(f"❌ Failed to load model: {str(e)}")
+        raise
 # -----------------------------
 # HELPER FUNCTIONS
 # -----------------------------
+def get_user_id(username: str):
     """Get Twitter user ID from username"""
     url = f"https://api.x.com/2/users/by/username/{username}"
     headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
     try:
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        return response.json()["data"]["id"], None
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Twitter API error (get_user_id): {str(e)}")
+        return None, str(e)
+    except KeyError:
+        return None, "User not found"
+def fetch_tweets(user_id: str, limit: int = 25):
+    """Fetch recent tweets from user"""
     url = f"https://api.x.com/2/users/{user_id}/tweets"
+    params = {
+        "max_results": min(limit, 100),  # Twitter API max is 100
+        "tweet.fields": "id,text,created_at"
+    }
     headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
     try:
+        response = requests.get(url, headers=headers, params=params, timeout=10)
+        response.raise_for_status()
+        tweets = response.json().get("data", [])
+        return [tweet["text"] for tweet in tweets], None
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Twitter API error (fetch_tweets): {str(e)}")
+        return None, str(e)
+def predict_stress(text: str):
     """Predict stress level from text"""
+    try:
+        inputs = tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            padding=True,
+            max_length=128
+        ).to(device)
+        with torch.no_grad():
+            outputs = model(**inputs)
+            probs = torch.softmax(outputs.logits, dim=1)[0]
+        label = torch.argmax(probs).item()
+        confidence = float(probs[1])
+        return label, confidence
+    except Exception as e:
+        logger.error(f"Prediction error: {str(e)}")
+        raise
 # -----------------------------
+# STARTUP EVENT
 # -----------------------------
 @app.on_event("startup")
 async def startup_event():
+    """Load model when app starts"""
+    logger.info("🚀 Application starting...")
     load_model_once()
+    logger.info("✅ Application ready!")
+# -----------------------------
+# API ENDPOINTS
+# -----------------------------
+@app.get("/", response_model=dict)
+async def root():
+    """Root endpoint with API info"""
     return {
+        "name": "Stress Detection API",
+        "version": "1.0.0",
         "status": "online",
+        "endpoints": {
+            "health": "/health",
+            "analyze": "/analyze/{username}",
+            "docs": "/docs"
+        }
     }
+@app.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Health check endpoint"""
+    return HealthResponse(
+        status="healthy" if model_loaded else "loading",
+        model_loaded=model_loaded,
+        device=str(device) if device else None
+    )
 @app.get("/analyze/{username}", response_model=StressResponse)
+async def analyze_user(username: str):
+    """
+    Analyze stress level from Twitter user's recent tweets
+    - **username**: Twitter username (without @)
+    """
+    # Ensure model is loaded
+    if not model_loaded:
+        logger.warning("Model not loaded yet, loading now...")
         load_model_once()
+    # Remove @ if user included it
+    username = username.lstrip("@")
+    logger.info(f"📊 Analyzing user: @{username}")
     # 1. Get user ID
     user_id, error = get_user_id(username)
     if error:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Failed to fetch user profile: {error}"
         )
     # 2. Fetch tweets
     tweets, error = fetch_tweets(user_id)
     if error:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to fetch tweets: {error}"
         )
     if not tweets:
         return StressResponse(
+            message="No tweets found. User may be protected or has no tweets.",
             data=None
         )
+    # 3. Analyze each tweet
     labels = []
+    confidences = []
+    for i, tweet in enumerate(tweets):
         try:
+            label, confidence = predict_stress(tweet)
             labels.append(label)
+            confidences.append(confidence)
+            logger.info(f"Tweet {i+1}/{len(tweets)}: label={label}, confidence={confidence:.2f}")
         except Exception as e:
+            logger.warning(f"Skipping tweet {i+1} due to error: {str(e)}")
             continue
     if not labels:
+        raise HTTPException(
+            status_code=500,
+            detail="Failed to analyze any tweets"
         )
+    # 4. Calculate statistics
     stress_percentage = round(sum(labels) / len(labels) * 100, 2)
+    avg_confidence = round(sum(confidences) / len(confidences) * 100, 2)
     # Determine stress status
     if stress_percentage <= 25:
+        status = 0
+        status_text = "Low Stress"
     elif stress_percentage <= 50:
+        status = 1
+        status_text = "Medium Stress"
     elif stress_percentage <= 75:
+        status = 2
+        status_text = "High Stress"
     else:
+        status = 3
+        status_text = "Very High Stress"
+    logger.info(f"✅ Analysis complete: {stress_percentage}% stress ({status_text})")
     return StressResponse(
         message="Analysis successful",
             "total_tweets": len(tweets),
             "analyzed_tweets": len(labels),
             "stress_level": stress_percentage,
+            "stress_status": status,
+            "stress_status_text": status_text,
+            "average_confidence": avg_confidence
         }
     )
+# -----------------------------
+# ERROR HANDLERS
+# -----------------------------
+@app.exception_handler(Exception)
+async def global_exception_handler(request, exc):
+    logger.error(f"Unhandled exception: {str(exc)}")
+    return StressResponse(
+        message=f"Internal server error: {str(exc)}",
+        data=None
+    )
 # -----------------------------
+# RUN (for local testing only)
 # -----------------------------
 if __name__ == "__main__":
     import uvicorn