gaidasalsaa commited on
Commit
776b898
·
1 Parent(s): 176915d
Files changed (1) hide show
  1. app.py +73 -68
app.py CHANGED
@@ -3,32 +3,45 @@ from pydantic import BaseModel
3
  from typing import Optional
4
  import requests
5
  import torch
6
- from transformers import AutoTokenizer, BertForSequenceClassification
 
7
  from huggingface_hub import hf_hub_download
8
 
9
  import logging
10
  logger = logging.getLogger("app")
11
  logging.basicConfig(level=logging.INFO)
12
 
13
- # =====================================================
14
  # CONFIG
15
- # =====================================================
16
  HF_MODEL_REPO = "gaidasalsaa/model-indobertweet-terbaru"
17
  BASE_MODEL = "indolem/indobertweet-base-uncased"
18
  PT_FILE = "model_indobertweet.pth"
19
 
20
- BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAAInr5gEAAAAAmyK4L1juMWEzyxjbaaa8%2BqlxhDo%3DuDE84RKZ6Dd602ir7XB1Ts6gYOOUMAFFnlr0y1Owu3OV7Dk2SX"
21
 
22
- # =====================================================
23
- # GLOBAL MODEL STORAGE
24
- # =====================================================
 
25
  tokenizer = None
26
  model = None
27
 
28
 
29
- # =====================================================
30
- # LOAD MODEL
31
- # =====================================================
 
 
 
 
 
 
 
 
 
 
 
32
  def load_model_once():
33
  global tokenizer, model
34
 
@@ -36,51 +49,37 @@ def load_model_once():
36
  logger.info("Model already loaded.")
37
  return
38
 
39
- logger.info("Starting model loading...")
40
-
41
- device = "cpu"
42
- logger.info(f"Using device: {device}")
43
-
44
- # ---- load tokenizer ----
45
  logger.info("Loading tokenizer...")
46
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
47
- logger.info("Tokenizer loaded")
48
 
49
- # ---- download .pth ----
50
- logger.info("Downloading best_indobertweet.pth...")
51
  model_path = hf_hub_download(
52
  repo_id=HF_MODEL_REPO,
53
  filename=PT_FILE,
54
  )
55
- logger.info(f"Model file downloaded: {model_path}")
56
 
57
- logger.info("Loading base model architecture...")
58
- model = BertForSequenceClassification.from_pretrained(
59
  BASE_MODEL,
60
- num_labels=2,
61
  )
62
 
63
- logger.info("Loading fine-tuned weights (.pth)...")
64
  state_dict = torch.load(model_path, map_location="cpu")
 
65
 
66
- model.load_state_dict(state_dict, strict=True)
67
- logger.info("Weights loaded successfully")
68
-
69
- model.to(device)
70
  model.eval()
71
-
72
  logger.info("MODEL READY")
73
 
74
 
75
- # =====================================================
76
  # FASTAPI
77
- # =====================================================
78
  app = FastAPI(title="Stress Detection API")
79
 
80
 
81
  @app.on_event("startup")
82
  def startup_event():
83
- logger.info("Starting model loading on startup...")
84
  load_model_once()
85
 
86
 
@@ -89,61 +88,66 @@ class StressResponse(BaseModel):
89
  data: Optional[dict] = None
90
 
91
 
92
- # =====================================================
93
  # TWITTER API
94
- # =====================================================
95
  def get_user_id(username):
96
  url = f"https://api.x.com/2/users/by/username/{username}"
97
  headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
98
- r = requests.get(url, headers=headers)
99
- if r.status_code != 200:
100
- return None, r.json()
101
- return r.json()["data"]["id"], r.json()
 
 
 
 
102
 
103
 
104
  def fetch_tweets(user_id, limit=25):
105
  url = f"https://api.x.com/2/users/{user_id}/tweets"
106
  params = {"max_results": limit, "tweet.fields": "id,text,created_at"}
107
  headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
108
- r = requests.get(url, headers=headers, params=params)
109
- if r.status_code != 200:
110
- return None, r.json()
111
- tweets = r.json().get("data", [])
112
- return [t["text"] for t in tweets], r.json()
 
 
 
 
113
 
114
 
115
- # =====================================================
116
- # KEYWORD EXTRACTION
117
- # =====================================================
118
  def extract_keywords(tweets):
119
  stress_words = [
120
- "gelisah","cemas","tidur","takut","hati",
121
- "resah","sampe","tenang","suka","mulu",
122
- "sedih","ngerasa","gimana","gatau",
123
- "perasaan","nangis","deg","khawatir",
124
- "pikiran","harap","gabisa","bener","pengen",
125
- "sakit","susah","bangun","biar","jam","kaya",
126
- "bingung","mikir","tuhan","mikirin",
127
- "bawaannya","marah","tbtb","anjir","cape",
128
- "panik","enak","kali","pusing","semoga",
129
- "kadang","langsung","kemarin","tugas",
130
- "males"
131
  ]
132
 
133
  found = set()
134
  for t in tweets:
135
  lower = t.lower()
136
- for word in stress_words:
137
- if word in lower:
138
- found.add(word)
139
-
140
  return list(found)
141
 
142
 
143
- # =====================================================
144
  # INFERENCE
145
- # =====================================================
146
  def predict_stress(text):
 
 
147
  inputs = tokenizer(
148
  text,
149
  return_tensors="pt",
@@ -156,16 +160,17 @@ def predict_stress(text):
156
  outputs = model(**inputs)
157
  probs = torch.softmax(outputs.logits, dim=1)[0]
158
 
159
- label = torch.argmax(probs).item()
160
  return label, float(probs[1])
161
 
162
 
163
- # =====================================================
164
- # API ROUTE
165
- # =====================================================
166
  @app.get("/analyze/{username}", response_model=StressResponse)
167
  def analyze(username: str):
168
  user_id, _ = get_user_id(username)
 
169
  if user_id is None:
170
  return StressResponse(message="Failed to fetch profile", data=None)
171
 
@@ -174,9 +179,9 @@ def analyze(username: str):
174
  return StressResponse(message="No tweets available", data=None)
175
 
176
  labels = [predict_stress(t)[0] for t in tweets]
177
-
178
  stress_percentage = round(sum(labels) / len(labels) * 100, 2)
179
 
 
180
  if stress_percentage <= 25:
181
  status = 0
182
  elif stress_percentage <= 50:
 
3
  from typing import Optional
4
  import requests
5
  import torch
6
+ import re
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
  from huggingface_hub import hf_hub_download
9
 
10
  import logging
11
  logger = logging.getLogger("app")
12
  logging.basicConfig(level=logging.INFO)
13
 
14
+ # ===========================
15
  # CONFIG
16
+ # ===========================
17
  HF_MODEL_REPO = "gaidasalsaa/model-indobertweet-terbaru"
18
  BASE_MODEL = "indolem/indobertweet-base-uncased"
19
  PT_FILE = "model_indobertweet.pth"
20
 
21
+ BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAADXr5gEAAAAAnQZgkYRrC4iM5WTblBxDyt58oj8%3DriQZkuHuvRL6Suc3rmDhD3umqbHaxwim2Tfb34rfQpnKqf9Xhd"
22
 
23
+
24
+ # ===========================
25
+ # GLOBAL MODEL
26
+ # ===========================
27
  tokenizer = None
28
  model = None
29
 
30
 
31
+ # ===========================
32
+ # TEXT CLEANING
33
+ # ===========================
34
+ def clean_text(t):
35
+ t = t.lower()
36
+ t = re.sub(r"http\S+|www\.\S+", "", t)
37
+ t = re.sub(r"@\w+", "", t)
38
+ t = re.sub(r"#(\w+)", r"\1", t)
39
+ return t.strip()
40
+
41
+
42
+ # ===========================
43
+ # LOAD MODEL
44
+ # ===========================
45
  def load_model_once():
46
  global tokenizer, model
47
 
 
49
  logger.info("Model already loaded.")
50
  return
51
 
 
 
 
 
 
 
52
  logger.info("Loading tokenizer...")
53
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
 
54
 
55
+ logger.info("Downloading model weights...")
 
56
  model_path = hf_hub_download(
57
  repo_id=HF_MODEL_REPO,
58
  filename=PT_FILE,
59
  )
 
60
 
61
+ logger.info("Loading IndoBERTweet architecture...")
62
+ model = AutoModelForSequenceClassification.from_pretrained(
63
  BASE_MODEL,
64
+ num_labels=2
65
  )
66
 
67
+ logger.info("Loading state_dict...")
68
  state_dict = torch.load(model_path, map_location="cpu")
69
+ model.load_state_dict(state_dict)
70
 
 
 
 
 
71
  model.eval()
 
72
  logger.info("MODEL READY")
73
 
74
 
75
+ # ===========================
76
  # FASTAPI
77
+ # ===========================
78
  app = FastAPI(title="Stress Detection API")
79
 
80
 
81
  @app.on_event("startup")
82
  def startup_event():
 
83
  load_model_once()
84
 
85
 
 
88
  data: Optional[dict] = None
89
 
90
 
91
+ # ===========================
92
  # TWITTER API
93
+ # ===========================
94
  def get_user_id(username):
95
  url = f"https://api.x.com/2/users/by/username/{username}"
96
  headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
97
+
98
+ try:
99
+ r = requests.get(url, headers=headers, timeout=10)
100
+ if r.status_code != 200:
101
+ return None, r.json()
102
+ return r.json()["data"]["id"], r.json()
103
+ except:
104
+ return None, {"error": "Request failed"}
105
 
106
 
107
  def fetch_tweets(user_id, limit=25):
108
  url = f"https://api.x.com/2/users/{user_id}/tweets"
109
  params = {"max_results": limit, "tweet.fields": "id,text,created_at"}
110
  headers = {"Authorization": f"Bearer {BEARER_TOKEN}"}
111
+
112
+ try:
113
+ r = requests.get(url, headers=headers, params=params, timeout=10)
114
+ if r.status_code != 200:
115
+ return None, r.json()
116
+ data = r.json().get("data", [])
117
+ return [t["text"] for t in data], r.json()
118
+ except:
119
+ return None, {"error": "Request failed"}
120
 
121
 
122
+ # ===========================
123
+ # KEYWORDS
124
+ # ===========================
125
  def extract_keywords(tweets):
126
  stress_words = [
127
+ "gelisah","cemas","tidur","takut","hati","resah","sampe","tenang",
128
+ "suka","mulu","sedih","ngerasa","gimana","gatau","perasaan",
129
+ "nangis","deg","khawatir","pikiran","harap","gabisa","bener",
130
+ "pengen","sakit","susah","bangun","biar","jam","kaya","bingung",
131
+ "mikir","tuhan","mikirin","bawaannya","marah","tbtb","anjir",
132
+ "cape","panik","enak","kali","pusing","semoga","kadang","langsung",
133
+ "kemarin","tugas","males"
 
 
 
 
134
  ]
135
 
136
  found = set()
137
  for t in tweets:
138
  lower = t.lower()
139
+ for w in stress_words:
140
+ if w in lower:
141
+ found.add(w)
 
142
  return list(found)
143
 
144
 
145
+ # ===========================
146
  # INFERENCE
147
+ # ===========================
148
  def predict_stress(text):
149
+ text = clean_text(text)
150
+
151
  inputs = tokenizer(
152
  text,
153
  return_tensors="pt",
 
160
  outputs = model(**inputs)
161
  probs = torch.softmax(outputs.logits, dim=1)[0]
162
 
163
+ label = int(torch.argmax(probs).item())
164
  return label, float(probs[1])
165
 
166
 
167
+ # ===========================
168
+ # ROUTE
169
+ # ===========================
170
  @app.get("/analyze/{username}", response_model=StressResponse)
171
  def analyze(username: str):
172
  user_id, _ = get_user_id(username)
173
+
174
  if user_id is None:
175
  return StressResponse(message="Failed to fetch profile", data=None)
176
 
 
179
  return StressResponse(message="No tweets available", data=None)
180
 
181
  labels = [predict_stress(t)[0] for t in tweets]
 
182
  stress_percentage = round(sum(labels) / len(labels) * 100, 2)
183
 
184
+ # 4-level status
185
  if stress_percentage <= 25:
186
  status = 0
187
  elif stress_percentage <= 50: