gaidasalsaa commited on
Commit
e92da9e
·
1 Parent(s): d183062
Files changed (3) hide show
  1. Dockerfile +34 -0
  2. app.py +200 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python runtime
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ curl \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Copy requirements first (for better caching)
14
+ COPY requirements.txt .
15
+
16
+ # Install Python dependencies
17
+ RUN pip install --no-cache-dir --upgrade pip && \
18
+ pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Copy application code
21
+ COPY app.py .
22
+
23
+ # Expose port 7860 (required by HF Spaces)
24
+ EXPOSE 7860
25
+
26
+ # Set environment variables
27
+ ENV PYTHONUNBUFFERED=1
28
+
29
+ # Health check
30
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
31
+ CMD curl -f http://localhost:7860/health || exit 1
32
+
33
+ # Run the application
34
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
app.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from typing import Optional
4
+ import requests
5
+ import torch
6
+ from transformers import AutoTokenizer, BertForSequenceClassification
7
+ from huggingface_hub import hf_hub_download
8
+
9
+ import logging
10
+ logger = logging.getLogger("app")
11
+ logging.basicConfig(level=logging.INFO)
12
+
13
+ # =====================================================
14
+ # CONFIG
15
+ # =====================================================
16
+ HF_MODEL_REPO = "gaidasalsaa/indobertweet-xstress-model"
17
+ BASE_MODEL = "indolem/indobertweet-base-uncased"
18
+ PT_FILE = "model_indobertweet.pth"
19
+
20
+ BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAADXr5gEAAAAAnQZgkYRrC4iM5WTblBxDyt58oj8%3DriQZkuHuvRL6Suc3rmDhD3umqbHaxwim2Tfb34rfQpnKqf9Xhd"
21
+
22
+ # =====================================================
23
+ # GLOBAL MODEL STORAGE
24
+ # =====================================================
25
+ tokenizer = None
26
+ model = None
27
+
28
+
29
+ # =====================================================
30
+ # LOAD MODEL
31
+ # =====================================================
32
+ def load_model_once():
33
+ global tokenizer, model
34
+
35
+ if tokenizer is not None and model is not None:
36
+ logger.info("Model already loaded.")
37
+ return
38
+
39
+ logger.info("Starting model loading...")
40
+
41
+ device = "cpu"
42
+ logger.info(f"Using device: {device}")
43
+
44
+ # ---- load tokenizer ----
45
+ logger.info("Loading tokenizer...")
46
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
47
+ logger.info("Tokenizer loaded")
48
+
49
+ # ---- download .pth ----
50
+ logger.info("Downloading best_indobertweet.pth...")
51
+ model_path = hf_hub_download(
52
+ repo_id=HF_MODEL_REPO,
53
+ filename=PT_FILE,
54
+ )
55
+ logger.info(f"Model file downloaded: {model_path}")
56
+
57
+ logger.info("Loading base model architecture...")
58
+ model = BertForSequenceClassification.from_pretrained(
59
+ BASE_MODEL,
60
+ num_labels=2,
61
+ )
62
+
63
+ logger.info("Loading fine-tuned weights (.pth)...")
64
+ state_dict = torch.load(model_path, map_location="cpu")
65
+
66
+ model.load_state_dict(state_dict, strict=True)
67
+ logger.info("Weights loaded successfully")
68
+
69
+ model.to(device)
70
+ model.eval()
71
+
72
+ logger.info("MODEL READY")
73
+
74
+
75
+ # =====================================================
76
+ # FASTAPI
77
+ # =====================================================
78
+ app = FastAPI(title="Stress Detection API")
79
+
80
+
81
+ @app.on_event("startup")
82
+ def startup_event():
83
+ logger.info("Starting model loading on startup...")
84
+ load_model_once()
85
+
86
+
87
+ class StressResponse(BaseModel):
88
+ message: str
89
+ data: Optional[dict] = None
90
+
91
+
92
+ # =====================================================
93
+ # TWITTER API
94
+ # =====================================================
95
+ def get_user_id(username):
96
+ url = f"https://api.x.com/2/users/by/username/{username}"
97
+ headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
98
+ r = requests.get(url, headers=headers)
99
+ if r.status_code != 200:
100
+ return None, r.json()
101
+ return r.json()["data"]["id"], r.json()
102
+
103
+
104
+ def fetch_tweets(user_id, limit=25):
105
+ url = f"https://api.x.com/2/users/{user_id}/tweets"
106
+ params = {"max_results": limit, "tweet.fields": "id,text,created_at"}
107
+ headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
108
+ r = requests.get(url, headers=headers, params=params)
109
+ if r.status_code != 200:
110
+ return None, r.json()
111
+ tweets = r.json().get("data", [])
112
+ return [t["text"] for t in tweets], r.json()
113
+
114
+
115
+ # =====================================================
116
+ # KEYWORD EXTRACTION
117
+ # =====================================================
118
+ def extract_keywords(tweets):
119
+ stress_words = [
120
+ "gelisah","cemas","tidur","takut","hati",
121
+ "resah","sampe","tenang","suka","mulu",
122
+ "sedih","ngerasa","gimana","gatau",
123
+ "perasaan","nangis","deg","khawatir",
124
+ "pikiran","harap","gabisa","bener","pengen",
125
+ "sakit","susah","bangun","biar","jam","kaya",
126
+ "bingung","mikir","tuhan","mikirin",
127
+ "bawaannya","marah","tbtb","anjir","cape",
128
+ "panik","enak","kali","pusing","semoga",
129
+ "kadang","langsung","kemarin","tugas",
130
+ "males"
131
+ ]
132
+
133
+ found = set()
134
+ for t in tweets:
135
+ lower = t.lower()
136
+ for word in stress_words:
137
+ if word in lower:
138
+ found.add(word)
139
+
140
+ return list(found)
141
+
142
+
143
+ # =====================================================
144
+ # INFERENCE
145
+ # =====================================================
146
+ def predict_stress(text):
147
+ inputs = tokenizer(
148
+ text,
149
+ return_tensors="pt",
150
+ truncation=True,
151
+ padding=True,
152
+ max_length=128
153
+ )
154
+
155
+ with torch.no_grad():
156
+ outputs = model(**inputs)
157
+ probs = torch.softmax(outputs.logits, dim=1)[0]
158
+
159
+ label = torch.argmax(probs).item()
160
+ return label, float(probs[1])
161
+
162
+
163
+ # =====================================================
164
+ # API ROUTE
165
+ # =====================================================
166
+ @app.get("/analyze/{username}", response_model=StressResponse)
167
+ def analyze(username: str):
168
+ user_id, _ = get_user_id(username)
169
+ if user_id is None:
170
+ return StressResponse(message="Failed to fetch profile", data=None)
171
+
172
+ tweets, _ = fetch_tweets(user_id)
173
+ if not tweets:
174
+ return StressResponse(message="No tweets available", data=None)
175
+
176
+ labels = [predict_stress(t)[0] for t in tweets]
177
+
178
+ stress_percentage = round(sum(labels) / len(labels) * 100, 2)
179
+
180
+ if stress_percentage <= 25:
181
+ status = 0
182
+ elif stress_percentage <= 50:
183
+ status = 1
184
+ elif stress_percentage <= 75:
185
+ status = 2
186
+ else:
187
+ status = 3
188
+
189
+ keywords = extract_keywords(tweets)
190
+
191
+ return StressResponse(
192
+ message="Analysis complete",
193
+ data={
194
+ "username": username,
195
+ "total_tweets": len(tweets),
196
+ "stress_level": stress_percentage,
197
+ "keywords": keywords,
198
+ "stress_status": status
199
+ }
200
+ )
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.6
2
+ uvicorn[standard]==0.34.0
3
+ torch==2.5.1
4
+ transformers==4.48.0
5
+ huggingface-hub==0.27.1
6
+ requests==2.32.3
7
+ pydantic==2.10.6