Spaces:

gaidasalsaa
/

xstress-api

Runtime error

App Files Files Community

gaidasalsaa commited on 23 days ago

Commit

b38b1f8

1 Parent(s): c173783

Fixed app and requirements

Browse files

Files changed (3) hide show

README.md +8 -9
app.py +187 -59
requirements.txt +7 -7

README.md CHANGED Viewed

@@ -1,13 +1,12 @@
 ---
-title: Xstress Api
-emoji: 📈
-colorFrom: green
-colorTo: yellow
-sdk: gradio
-sdk_version: 6.0.0
-app_file: app.py
 pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Stress Detection API
+emoji: 🧠
+colorFrom: blue
+colorTo: purple
+sdk: docker
 pinned: false
 ---
+# Stress Detection API
+Deteksi tingkat stress dari postingan Twitter menggunakan IndoBERTweet.

app.py CHANGED Viewed

@@ -3,8 +3,10 @@ from pydantic import BaseModel
 from typing import Optional
 import requests
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, BertForSequenceClassification
 from huggingface_hub import hf_hub_download
 # -----------------------------
 # CONFIG
@@ -13,38 +15,13 @@ HF_MODEL_REPO = "gaidasalsaa/indobertweet-xstress-model"
 BASE_MODEL = "indolem/indobertweet-base-uncased"
 PT_FILE = "best_indobertweet.pth"
-BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAAOGp3AEAAAAAMEaOafsh1pNGVFrK%2BN2atq0Cba4%3DE2Gw0MDFfJ1bE4veBIIxhOUqbaqQKOqRxMhGybH4FfOETDNpow"
-# -----------------------------
-# LOAD MODEL
-# -----------------------------
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load tokenizer dari base model (AMAN)
-tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
-# Download file .pth dari HuggingFace
-model_path = hf_hub_download(
-    repo_id=HF_MODEL_REPO,
-    filename=PT_FILE
-)
-# Load base model dulu
-model = BertForSequenceClassification.from_pretrained(
-    "indolem/indobertweet-base-uncased",
-    num_labels=2
 )
-# Load weight fine-tuned kamu
-state_dict = torch.load(model_path, map_location=device)
-model.load_state_dict(state_dict, strict=True)
-model.to(device)
-model.eval()
 # -----------------------------
-# FASTAPI
 # -----------------------------
 app = FastAPI(
     title="Stress Detection API",
@@ -52,74 +29,225 @@ app = FastAPI(
     version="1.0.0"
 )
 class StressResponse(BaseModel):
     message: str
     data: Optional[dict] = None
 def get_user_id(username):
     url = f"https://api.x.com/2/users/by/username/{username}"
     headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
-    r = requests.get(url, headers=headers)
-    if r.status_code != 200:
-        return None, r.json()
-    return r.json()["data"]["id"], r.json()
 def fetch_tweets(user_id, limit=25):
     url = f"https://api.x.com/2/users/{user_id}/tweets"
     params = {"max_results": limit, "tweet.fields": "id,text,created_at"}
     headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
-    r = requests.get(url, headers=headers, params=params)
-    if r.status_code != 200:
-        return None, r.json()
-    tweets = r.json().get("data", [])
-    return [t["text"] for t in tweets], r.json()
 def predict_stress(text):
-    inputs = tokenizer(text, return_tensors="pt",
-                       truncation=True, padding=True,
-                       max_length=128).to(device)
     with torch.no_grad():
         outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=1)[0]
     label = torch.argmax(probs).item()
     return label, float(probs[1])
 @app.get("/analyze/{username}", response_model=StressResponse)
 def analyze(username: str):
-    user_id, _ = get_user_id(username)
-    if user_id is None:
-        return StressResponse(message="Failed to fetch user profile.", data=None)
-    tweets, _ = fetch_tweets(user_id)
     if not tweets:
-        return StressResponse(message="User protected or has no tweets.", data=None)
     labels = []
-    for t in tweets:
-        label, _ = predict_stress(t)
-        labels.append(label)
     stress_percentage = round(sum(labels) / len(labels) * 100, 2)
     if stress_percentage <= 25:
-        status = 0
     elif stress_percentage <= 50:
-        status = 1
     elif stress_percentage <= 75:
-        status = 2
     else:
-        status = 3
     return StressResponse(
         message="Analysis successful",
         data={
             "username": username,
             "total_tweets": len(tweets),
             "stress_level": stress_percentage,
             "stress_status": status
         }
     )

 from typing import Optional
 import requests
 import torch
+from transformers import AutoTokenizer, BertForSequenceClassification
 from huggingface_hub import hf_hub_download
+import os
+import gc
 # -----------------------------
 # CONFIG
 BASE_MODEL = "indolem/indobertweet-base-uncased"
 PT_FILE = "best_indobertweet.pth"
+BEARER_TOKEN = os.getenv(
+    "TWITTER_BEARER_TOKEN",
+    "AAAAAAAAAAAAAAAAAAAAAOGp3AEAAAAAMEaOafsh1pNGVFrK%2BN2atq0Cba4%3DE2Gw0MDFfJ1bE4veBIIxhOUqbaqQKOqRxMhGybH4FfOETDNpow"
 )
 # -----------------------------
+# FASTAPI (Initialize FIRST)
 # -----------------------------
 app = FastAPI(
     title="Stress Detection API",
     version="1.0.0"
 )
+# Global variables untuk lazy loading
+model = None
+tokenizer = None
+device = None
 class StressResponse(BaseModel):
     message: str
     data: Optional[dict] = None
+# -----------------------------
+# LAZY LOAD MODEL
+# -----------------------------
+def load_model_once():
+    """Load model hanya sekali saat pertama kali dipanggil"""
+    global model, tokenizer, device
+    if model is not None:
+        return  # Sudah di-load
+    print("Loading model (first time only)...")
+    # Set device
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Using device: {device}")
+    # 1. Load tokenizer
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+    # 2. Download .pth file
+    print(f"⬇Downloading {PT_FILE}...")
+    model_path = hf_hub_download(
+        repo_id=HF_MODEL_REPO,
+        filename=PT_FILE
+    )
+    print(f"Downloaded to: {model_path}")
+    # 3. Load base model dengan optimasi memory
+    print("Loading base model...")
+    model = BertForSequenceClassification.from_pretrained(
+        BASE_MODEL,
+        num_labels=2,
+        low_cpu_mem_usage=True,
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32
+    )
+    # 4. Load fine-tuned weights
+    print("Loading fine-tuned weights...")
+    state_dict = torch.load(model_path, map_location=device)
+    model.load_state_dict(state_dict, strict=False)
+    # 5. Move to device dan set eval mode
+    model.to(device)
+    model.eval()
+    # 6. Clear cache
+    gc.collect()
+    if device == "cuda":
+        torch.cuda.empty_cache()
+    print("Model loaded successfully!")
+# -----------------------------
+# HELPER FUNCTIONS
+# -----------------------------
 def get_user_id(username):
+    """Get Twitter user ID from username"""
     url = f"https://api.x.com/2/users/by/username/{username}"
     headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
+    try:
+        r = requests.get(url, headers=headers, timeout=10)
+        r.raise_for_status()
+        return r.json()["data"]["id"], None
+    except Exception as e:
+        return None, {"error": str(e)}
 def fetch_tweets(user_id, limit=25):
+    """Fetch user's recent tweets"""
     url = f"https://api.x.com/2/users/{user_id}/tweets"
     params = {"max_results": limit, "tweet.fields": "id,text,created_at"}
     headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
+    try:
+        r = requests.get(url, headers=headers, params=params, timeout=10)
+        r.raise_for_status()
+        tweets = r.json().get("data", [])
+        return [t["text"] for t in tweets], None
+    except Exception as e:
+        return None, {"error": str(e)}
 def predict_stress(text):
+    """Predict stress level from text"""
+    inputs = tokenizer(
+        text,
+        return_tensors="pt",
+        truncation=True,
+        padding=True,
+        max_length=128
+    ).to(device)
     with torch.no_grad():
         outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=1)[0]
     label = torch.argmax(probs).item()
     return label, float(probs[1])
+# -----------------------------
+# API ENDPOINTS
+# -----------------------------
+@app.on_event("startup")
+async def startup_event():
+    """Load model saat aplikasi start"""
+    print("Starting application...")
+    load_model_once()
+    print("Application ready!")
+@app.get("/")
+def root():
+    """Health check endpoint"""
+    return {
+        "status": "online",
+        "message": "Stress Detection API is running",
+        "model_loaded": model is not None
+    }
+@app.get("/health")
+def health():
+    """Detailed health check"""
+    return {
+        "status": "healthy",
+        "model_loaded": model is not None,
+        "device": str(device) if device else "not loaded",
+        "tokenizer_loaded": tokenizer is not None
+    }
 @app.get("/analyze/{username}", response_model=StressResponse)
 def analyze(username: str):
+    """Analyze stress level from user's tweets"""
+    # Pastikan model sudah loaded
+    if model is None:
+        load_model_once()
+    # 1. Get user ID
+    user_id, error = get_user_id(username)
+    if error:
+        return StressResponse(
+            message=f"Failed to fetch user profile: {error.get('error', 'Unknown error')}",
+            data=None
+        )
+    # 2. Fetch tweets
+    tweets, error = fetch_tweets(user_id)
+    if error:
+        return StressResponse(
+            message=f"Failed to fetch tweets: {error.get('error', 'Unknown error')}",
+            data=None
+        )
     if not tweets:
+        return StressResponse(
+            message="User has no tweets or account is protected.",
+            data=None
+        )
+    # 3. Predict stress for each tweet
     labels = []
+    for tweet in tweets:
+        try:
+            label, _ = predict_stress(tweet)
+            labels.append(label)
+        except Exception as e:
+            print(f"Skipping tweet due to error: {e}")
+            continue
+    if not labels:
+        return StressResponse(
+            message="Failed to analyze tweets.",
+            data=None
+        )
+    # 4. Calculate stress statistics
     stress_percentage = round(sum(labels) / len(labels) * 100, 2)
+    # Determine stress status
     if stress_percentage <= 25:
+        status = 0  # Low
     elif stress_percentage <= 50:
+        status = 1  # Medium
     elif stress_percentage <= 75:
+        status = 2  # High
     else:
+        status = 3  # Very High
     return StressResponse(
         message="Analysis successful",
         data={
             "username": username,
             "total_tweets": len(tweets),
+            "analyzed_tweets": len(labels),
             "stress_level": stress_percentage,
             "stress_status": status
         }
     )
+# -----------------------------
+# RUN (untuk local testing)
+# -----------------------------
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-numpy<2
-torch==2.2.2
-transformers==4.39.3
-accelerate
-gradio
-uvicorn
-fastapi

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+torch==2.1.0
+transformers==4.35.0
+huggingface_hub==0.19.4
+requests==2.31.0
+pydantic==2.5.0