AD232025 commited on
Commit
7dd775e
Β·
verified Β·
1 Parent(s): 81752b4

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +351 -340
model.py CHANGED
@@ -1,340 +1,351 @@
1
- import os
2
- import re
3
- from typing import Dict, List
4
-
5
- from huggingface_hub import InferenceClient
6
-
7
- # =========================================================
8
- # HUGGING FACE INFERENCE CLIENT
9
- # =========================================================
10
-
11
- HF_API_TOKEN = os.getenv("HF_API_TOKEN") # optional, set in HF Space secrets
12
- if HF_API_TOKEN:
13
- client = InferenceClient(token=HF_API_TOKEN)
14
- else:
15
- client = InferenceClient() # anonymous for public models (rate-limited)
16
-
17
- # Model IDs
18
- TOX_MODEL_ID = "unitary/toxic-bert"
19
- OFF_MODEL_ID = "cardiffnlp/twitter-roberta-base-offensive"
20
- EMO_MODEL_ID = "j-hartmann/emotion-english-distilroberta-base"
21
- SENT_MODEL_ID = "distilbert-base-uncased-finetuned-sst-2-english"
22
-
23
- # =========================================================
24
- # RULE KEYWORDS / PATTERNS
25
- # =========================================================
26
-
27
- AGGRESSION_KEYWORDS = [
28
- "stupid", "idiot", "dumb", "incompetent", "useless",
29
- "trash", "garbage", "worthless", "pathetic", "clown",
30
- "moron", "failure", "shut up", "hate you"
31
- ]
32
-
33
- THREAT_PHRASES = [
34
- "you will regret", "there will be consequences", "watch your back",
35
- "this is your last warning", "i'm coming for you",
36
- "or else", "i'll ruin you", "i'll make you pay",
37
- "i am gonna hurt you", "i'm going to hurt you",
38
- ]
39
-
40
- PROFANITY = [
41
- "fuck", "shit", "bitch", "asshole", "bastard",
42
- "motherfucker", "prick", "dickhead"
43
- ]
44
-
45
- POLITE_KEYWORDS = [
46
- "please", "thank you", "thanks", "would you mind",
47
- "if possible", "kindly", "when you have a chance",
48
- "if you don't mind"
49
- ]
50
-
51
- FRIENDLY_KEYWORDS = [
52
- "awesome", "amazing", "great job", "fantastic",
53
- "love this", "appreciate you", "good vibes",
54
- "wonderful", "you're the best", "you are the best",
55
- ]
56
-
57
- SARCASM_PATTERNS = [
58
- r"yeah right",
59
- r"sure you did",
60
- r"great job (idiot|genius)",
61
- r"nice work (moron|buddy)",
62
- r"well done.*not",
63
- r"nice job.*not",
64
- ]
65
-
66
-
67
- # =========================================================
68
- # HF INFERENCE HELPERS
69
- # =========================================================
70
-
71
- def _safe_text_classification(model_id: str, text: str) -> List[Dict]:
72
- """
73
- Wrapper around HF Inference API text classification.
74
-
75
- Returns a list of dicts like:
76
- [
77
- {"label": "POSITIVE", "score": 0.95},
78
- ...
79
- ]
80
- or [] on error.
81
- """
82
- try:
83
- out = client.text_classification(text, model=model_id)
84
- # Some clients may return a single dict; normalize to list
85
- if isinstance(out, dict):
86
- return [out]
87
- return out or []
88
- except Exception as e:
89
- print(f"[WARN] HF Inference error for {model_id}: {e}")
90
- return []
91
-
92
-
93
- def _get_sentiment(text: str):
94
- """
95
- Returns (pos, neg) based on distilbert sentiment.
96
- """
97
- results = _safe_text_classification(SENT_MODEL_ID, text)
98
- pos = 0.5
99
- neg = 0.5
100
-
101
- if results:
102
- scores = {r["label"].upper(): float(r["score"]) for r in results}
103
- # typical labels: POSITIVE / NEGATIVE
104
- if "POSITIVE" in scores:
105
- pos = scores["POSITIVE"]
106
- neg = 1.0 - pos
107
- elif "NEGATIVE" in scores:
108
- neg = scores["NEGATIVE"]
109
- pos = 1.0 - neg
110
-
111
- return pos, neg
112
-
113
-
114
- def _get_toxicity(text: str) -> float:
115
- """
116
- Return a toxicity-like score in [0, 1].
117
- For unitary/toxic-bert, we consider any 'toxic-like' label as signal.
118
- """
119
- results = _safe_text_classification(TOX_MODEL_ID, text)
120
- if not results:
121
- return 0.0
122
-
123
- toxic_score = 0.0
124
- for r in results:
125
- label = r["label"].lower()
126
- if any(key in label for key in ["toxic", "obscene", "insult", "hate", "threat"]):
127
- toxic_score = max(toxic_score, float(r["score"]))
128
- return toxic_score
129
-
130
-
131
- def _get_offensive(text: str) -> float:
132
- """
133
- Return an offensive score in [0, 1].
134
- For cardiffnlp/twitter-roberta-base-offensive, look for OFFENSE-like labels.
135
- """
136
- results = _safe_text_classification(OFF_MODEL_ID, text)
137
- if not results:
138
- return 0.0
139
-
140
- off_score = 0.0
141
- for r in results:
142
- label = r["label"].lower()
143
- if "offense" in label or "offensive" in label:
144
- off_score = max(off_score, float(r["score"]))
145
- return off_score
146
-
147
-
148
- def _get_emotions(text: str):
149
- """
150
- Returns a dict like {"anger": 0.3, "joy": 0.6}.
151
- """
152
- results = _safe_text_classification(EMO_MODEL_ID, text)
153
- if not results:
154
- return {"anger": 0.0, "joy": 0.0}
155
-
156
- emo = {}
157
- for r in results:
158
- emo[r["label"].lower()] = float(r["score"])
159
-
160
- anger = emo.get("anger", 0.0)
161
- joy = emo.get("joy", 0.0)
162
- return {"anger": anger, "joy": joy}
163
-
164
-
165
- # =========================================================
166
- # MAIN CLASSIFIER (STRICT OPTION A)
167
- # =========================================================
168
-
169
- def classify_tone_rich(text: str):
170
- lowered = text.lower()
171
- explanation = []
172
-
173
- # --- Model signals ---
174
- pos, neg = _get_sentiment(text)
175
- tox_score = _get_toxicity(text)
176
- off_score = _get_offensive(text)
177
- emo = _get_emotions(text)
178
- anger = emo.get("anger", 0.0)
179
- joy = emo.get("joy", 0.0)
180
-
181
- explanation.append(f"Sentiment pos={pos:.2f}, neg={neg:.2f}")
182
- explanation.append(f"Toxicity={tox_score:.2f}, Offensive={off_score:.2f}")
183
- explanation.append(f"Emotion anger={anger:.2f}, joy={joy:.2f}")
184
-
185
- # --- Rule flags ---
186
- has_insult = any(w in lowered for w in AGGRESSION_KEYWORDS)
187
- has_threat = any(p in lowered for p in THREAT_PHRASES)
188
- has_profanity = any(bad in lowered for bad in PROFANITY)
189
- has_polite = any(w in lowered for w in POLITE_KEYWORDS)
190
- has_friendly = any(w in lowered for w in FRIENDLY_KEYWORDS)
191
- has_sarcasm = any(re.search(p, lowered) for p in SARCASM_PATTERNS)
192
-
193
- if has_insult:
194
- explanation.append("Detected explicit insult keyword.")
195
- if has_threat:
196
- explanation.append("Detected explicit threat phrase.")
197
- if has_profanity:
198
- explanation.append("Detected profanity.")
199
- if has_polite:
200
- explanation.append("Detected polite phrasing.")
201
- if has_friendly:
202
- explanation.append("Detected friendly / appreciative wording.")
203
- if has_sarcasm:
204
- explanation.append("Matched a sarcasm pattern.")
205
-
206
- # =====================================================
207
- # STRICT AGGRESSIVE RULES
208
- # =====================================================
209
-
210
- # 1) Threats override everything
211
- if has_threat:
212
- return {
213
- "label": "Aggressive",
214
- "confidence": 95,
215
- "severity": 95,
216
- "threat_score": 95,
217
- "politeness_score": 0,
218
- "friendly_score": 0,
219
- "has_threat": True,
220
- "has_profanity": has_profanity,
221
- "has_sarcasm": has_sarcasm,
222
- "explanation": explanation,
223
- }
224
-
225
- # 2) Profanity β†’ aggressive
226
- if has_profanity:
227
- sev = max(85, int((tox_score + off_score) / 2 * 100))
228
- return {
229
- "label": "Aggressive",
230
- "confidence": 90,
231
- "severity": sev,
232
- "threat_score": int(tox_score * 100),
233
- "politeness_score": 0,
234
- "friendly_score": 0,
235
- "has_threat": has_threat,
236
- "has_profanity": True,
237
- "has_sarcasm": has_sarcasm,
238
- "explanation": explanation,
239
- }
240
-
241
- # 3) Direct insults β†’ aggressive
242
- if has_insult:
243
- sev = max(80, int((tox_score + off_score) / 2 * 100))
244
- return {
245
- "label": "Aggressive",
246
- "confidence": 88,
247
- "severity": sev,
248
- "threat_score": int(tox_score * 100),
249
- "politeness_score": 0,
250
- "friendly_score": 0,
251
- "has_threat": has_threat,
252
- "has_profanity": has_profanity,
253
- "has_sarcasm": has_sarcasm,
254
- "explanation": explanation,
255
- }
256
-
257
- # 4) Sarcasm + negative sentiment β†’ aggressive
258
- if has_sarcasm and neg > 0.55:
259
- return {
260
- "label": "Aggressive",
261
- "confidence": 85,
262
- "severity": 85,
263
- "threat_score": int(tox_score * 100),
264
- "politeness_score": 0,
265
- "friendly_score": 0,
266
- "has_threat": has_threat,
267
- "has_profanity": has_profanity,
268
- "has_sarcasm": True,
269
- "explanation": explanation,
270
- }
271
-
272
- # 5) High anger + toxicity
273
- if anger + tox_score > 1.1:
274
- return {
275
- "label": "Aggressive",
276
- "confidence": 80,
277
- "severity": 80,
278
- "threat_score": int(tox_score * 100),
279
- "politeness_score": 0,
280
- "friendly_score": 0,
281
- "has_threat": has_threat,
282
- "has_profanity": has_profanity,
283
- "has_sarcasm": has_sarcasm,
284
- "explanation": explanation,
285
- }
286
-
287
- # =====================================================
288
- # POSITIVE LABELS – FRIENDLY / POLITE
289
- # =====================================================
290
- if has_friendly and pos > 0.60:
291
- return {
292
- "label": "Friendly",
293
- "confidence": int(pos * 100),
294
- "severity": 0,
295
- "threat_score": int(tox_score * 100),
296
- "politeness_score": int(pos * 100),
297
- "friendly_score": int(pos * 100),
298
- "has_threat": has_threat,
299
- "has_profanity": has_profanity,
300
- "has_sarcasm": has_sarcasm,
301
- "explanation": explanation,
302
- }
303
-
304
- if has_polite and pos > 0.50:
305
- return {
306
- "label": "Polite",
307
- "confidence": int(pos * 100),
308
- "severity": 0,
309
- "threat_score": int(tox_score * 100),
310
- "politeness_score": int(pos * 100),
311
- "friendly_score": 0,
312
- "has_threat": has_threat,
313
- "has_profanity": has_profanity,
314
- "has_sarcasm": has_sarcasm,
315
- "explanation": explanation,
316
- }
317
-
318
- # =====================================================
319
- # NEUTRAL FALLBACK
320
- # =====================================================
321
- return {
322
- "label": "Neutral",
323
- "confidence": int((1 - neg) * 100),
324
- "severity": 0,
325
- "threat_score": int(tox_score * 100),
326
- "politeness_score": int(pos * 100),
327
- "friendly_score": int(pos * 100),
328
- "has_threat": has_threat,
329
- "has_profanity": has_profanity,
330
- "has_sarcasm": has_sarcasm,
331
- "explanation": explanation,
332
- }
333
-
334
-
335
- # Optional wrapper for backwards compatibility
336
- def classify_tone(text: str):
337
- r = classify_tone_rich(text)
338
- aggressive_prob = r["severity"] / 100.0
339
- positive_prob = r["friendly_score"] / 100.0
340
- return r["label"], r["confidence"], aggressive_prob, positive_prob
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from typing import Dict, List
4
+
5
+ from huggingface_hub import InferenceClient
6
+
7
+ # =========================================================
8
+ # HUGGING FACE INFERENCE CLIENT
9
+ # =========================================================
10
+
11
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN") # optional, set in HF Space secrets
12
+ if HF_API_TOKEN:
13
+ client = InferenceClient(token=HF_API_TOKEN)
14
+ else:
15
+ client = InferenceClient() # anonymous for public models (rate-limited)
16
+
17
+ # Model IDs
18
+ TOX_MODEL_ID = "unitary/toxic-bert"
19
+ OFF_MODEL_ID = "cardiffnlp/twitter-roberta-base-offensive"
20
+ EMO_MODEL_ID = "j-hartmann/emotion-english-distilroberta-base"
21
+ SENT_MODEL_ID = "distilbert-base-uncased-finetuned-sst-2-english"
22
+
23
+ # =========================================================
24
+ # RULE KEYWORDS / PATTERNS
25
+ # =========================================================
26
+
27
+ AGGRESSION_KEYWORDS = [
28
+ "stupid", "idiot", "dumb", "incompetent", "useless",
29
+ "trash", "garbage", "worthless", "pathetic", "clown",
30
+ "moron", "failure", "shut up", "hate you"
31
+ ]
32
+
33
+ THREAT_PHRASES = [
34
+ "you will regret", "there will be consequences", "watch your back",
35
+ "this is your last warning", "i'm coming for you",
36
+ "or else", "i'll ruin you", "i'll make you pay",
37
+ "i am gonna hurt you", "i'm going to hurt you",
38
+ "im gonna hurt you", # <-- added for your exact example
39
+ ]
40
+
41
+ PROFANITY = [
42
+ "fuck", "shit", "bitch", "asshole", "bastard",
43
+ "motherfucker", "prick", "dickhead"
44
+ ]
45
+
46
+ POLITE_KEYWORDS = [
47
+ "please", "thank you", "thanks", "would you mind",
48
+ "if possible", "kindly", "when you have a chance",
49
+ "if you don't mind"
50
+ ]
51
+
52
+ FRIENDLY_KEYWORDS = [
53
+ "awesome", "amazing", "great job", "fantastic",
54
+ "love this", "appreciate you", "good vibes",
55
+ "wonderful", "you're the best", "you are the best",
56
+ ]
57
+
58
+ SARCASM_PATTERNS = [
59
+ r"yeah right",
60
+ r"sure you did",
61
+ r"great job (idiot|genius)",
62
+ r"nice work (moron|buddy)",
63
+ r"well done.*not",
64
+ r"nice job.*not",
65
+ ]
66
+
67
+ # Generic threat regex: β€œgonna/going to/will hurt you”
68
+ THREAT_REGEX = re.compile(r"\b(gonna|going to|will)\s+hurt you\b")
69
+
70
+
71
+ # =========================================================
72
+ # HF INFERENCE HELPERS
73
+ # =========================================================
74
+
75
+ def _safe_text_classification(model_id: str, text: str) -> List[Dict]:
76
+ """
77
+ Wrapper around HF Inference API text classification.
78
+
79
+ Returns a list of dicts like:
80
+ [
81
+ {"label": "POSITIVE", "score": 0.95},
82
+ ...
83
+ ]
84
+ or [] on error.
85
+ """
86
+ try:
87
+ out = client.text_classification(text, model=model_id)
88
+ # Some clients may return a single dict; normalize to list
89
+ if isinstance(out, dict):
90
+ return [out]
91
+ return out or []
92
+ except Exception as e:
93
+ print(f"[WARN] HF Inference error for {model_id}: {e}")
94
+ return []
95
+
96
+
97
+ def _get_sentiment(text: str):
98
+ """
99
+ Returns (pos, neg) based on distilbert sentiment.
100
+ """
101
+ results = _safe_text_classification(SENT_MODEL_ID, text)
102
+ pos = 0.5
103
+ neg = 0.5
104
+
105
+ if results:
106
+ scores = {r["label"].upper(): float(r["score"]) for r in results}
107
+ # typical labels: POSITIVE / NEGATIVE
108
+ if "POSITIVE" in scores:
109
+ pos = scores["POSITIVE"]
110
+ neg = 1.0 - pos
111
+ elif "NEGATIVE" in scores:
112
+ neg = scores["NEGATIVE"]
113
+ pos = 1.0 - neg
114
+
115
+ return pos, neg
116
+
117
+
118
+ def _get_toxicity(text: str) -> float:
119
+ """
120
+ Return a toxicity-like score in [0, 1].
121
+ For unitary/toxic-bert, we consider any 'toxic-like' label as signal.
122
+ """
123
+ results = _safe_text_classification(TOX_MODEL_ID, text)
124
+ if not results:
125
+ return 0.0
126
+
127
+ toxic_score = 0.0
128
+ for r in results:
129
+ label = r["label"].lower()
130
+ if any(key in label for key in ["toxic", "obscene", "insult", "hate", "threat"]):
131
+ toxic_score = max(toxic_score, float(r["score"]))
132
+ return toxic_score
133
+
134
+
135
+ def _get_offensive(text: str) -> float:
136
+ """
137
+ Return an offensive score in [0, 1].
138
+ For cardiffnlp/twitter-roberta-base-offensive, look for OFFENSE-like labels.
139
+ """
140
+ results = _safe_text_classification(OFF_MODEL_ID, text)
141
+ if not results:
142
+ return 0.0
143
+
144
+ off_score = 0.0
145
+ for r in results:
146
+ label = r["label"].lower()
147
+ if "offense" in label or "offensive" in label:
148
+ off_score = max(off_score, float(r["score"]))
149
+ return off_score
150
+
151
+
152
+ def _get_emotions(text: str):
153
+ """
154
+ Returns a dict like {"anger": 0.3, "joy": 0.6}.
155
+ """
156
+ results = _safe_text_classification(EMO_MODEL_ID, text)
157
+ if not results:
158
+ return {"anger": 0.0, "joy": 0.0}
159
+
160
+ emo = {}
161
+ for r in results:
162
+ emo[r["label"].lower()] = float(r["score"])
163
+
164
+ anger = emo.get("anger", 0.0)
165
+ joy = emo.get("joy", 0.0)
166
+ return {"anger": anger, "joy": joy}
167
+
168
+
169
+ # =========================================================
170
+ # MAIN CLASSIFIER (STRICT OPTION A)
171
+ # =========================================================
172
+
173
+ def classify_tone_rich(text: str):
174
+ lowered = text.lower()
175
+ explanation = []
176
+
177
+ # --- Model signals ---
178
+ pos, neg = _get_sentiment(text)
179
+ tox_score = _get_toxicity(text)
180
+ off_score = _get_offensive(text)
181
+ emo = _get_emotions(text)
182
+ anger = emo.get("anger", 0.0)
183
+ joy = emo.get("joy", 0.0)
184
+
185
+ explanation.append(f"Sentiment pos={pos:.2f}, neg={neg:.2f}")
186
+ explanation.append(f"Toxicity={tox_score:.2f}, Offensive={off_score:.2f}")
187
+ explanation.append(f"Emotion anger={anger:.2f}, joy={joy:.2f}")
188
+
189
+ # --- Rule flags ---
190
+ has_insult = any(w in lowered for w in AGGRESSION_KEYWORDS)
191
+
192
+ # THREATS: list OR generic regex
193
+ has_threat_phrase = any(p in lowered for p in THREAT_PHRASES)
194
+ has_threat_regex = bool(THREAT_REGEX.search(lowered))
195
+ has_threat = has_threat_phrase or has_threat_regex
196
+
197
+ has_profanity = any(bad in lowered for bad in PROFANITY)
198
+ has_polite = any(w in lowered for w in POLITE_KEYWORDS)
199
+ has_friendly = any(w in lowered for w in FRIENDLY_KEYWORDS)
200
+ has_sarcasm = any(re.search(p, lowered) for p in SARCASM_PATTERNS)
201
+
202
+ if has_insult:
203
+ explanation.append("Detected explicit insult keyword.")
204
+ if has_threat_phrase:
205
+ explanation.append("Detected explicit threat phrase.")
206
+ if has_threat_regex:
207
+ explanation.append("Matched generic threat pattern (gonna/going to/will hurt you).")
208
+ if has_profanity:
209
+ explanation.append("Detected profanity.")
210
+ if has_polite:
211
+ explanation.append("Detected polite phrasing.")
212
+ if has_friendly:
213
+ explanation.append("Detected friendly / appreciative wording.")
214
+ if has_sarcasm:
215
+ explanation.append("Matched a sarcasm pattern.")
216
+
217
+ # =====================================================
218
+ # STRICT AGGRESSIVE RULES
219
+ # =====================================================
220
+
221
+ # 1) Threats override everything
222
+ if has_threat:
223
+ return {
224
+ "label": "Aggressive",
225
+ "confidence": 95,
226
+ "severity": 95,
227
+ "threat_score": 95,
228
+ "politeness_score": 0,
229
+ "friendly_score": 0,
230
+ "has_threat": True,
231
+ "has_profanity": has_profanity,
232
+ "has_sarcasm": has_sarcasm,
233
+ "explanation": explanation,
234
+ }
235
+
236
+ # 2) Profanity β†’ aggressive
237
+ if has_profanity:
238
+ sev = max(85, int((tox_score + off_score) / 2 * 100))
239
+ return {
240
+ "label": "Aggressive",
241
+ "confidence": 90,
242
+ "severity": sev,
243
+ "threat_score": int(tox_score * 100),
244
+ "politeness_score": 0,
245
+ "friendly_score": 0,
246
+ "has_threat": has_threat,
247
+ "has_profanity": True,
248
+ "has_sarcasm": has_sarcasm,
249
+ "explanation": explanation,
250
+ }
251
+
252
+ # 3) Direct insults β†’ aggressive
253
+ if has_insult:
254
+ sev = max(80, int((tox_score + off_score) / 2 * 100))
255
+ return {
256
+ "label": "Aggressive",
257
+ "confidence": 88,
258
+ "severity": sev,
259
+ "threat_score": int(tox_score * 100),
260
+ "politeness_score": 0,
261
+ "friendly_score": 0,
262
+ "has_threat": has_threat,
263
+ "has_profanity": has_profanity,
264
+ "has_sarcasm": has_sarcasm,
265
+ "explanation": explanation,
266
+ }
267
+
268
+ # 4) Sarcasm + negative sentiment β†’ aggressive
269
+ if has_sarcasm and neg > 0.55:
270
+ return {
271
+ "label": "Aggressive",
272
+ "confidence": 85,
273
+ "severity": 85,
274
+ "threat_score": int(tox_score * 100),
275
+ "politeness_score": 0,
276
+ "friendly_score": 0,
277
+ "has_threat": has_threat,
278
+ "has_profanity": has_profanity,
279
+ "has_sarcasm": True,
280
+ "explanation": explanation,
281
+ }
282
+
283
+ # 5) High anger + toxicity
284
+ if anger + tox_score > 1.1:
285
+ return {
286
+ "label": "Aggressive",
287
+ "confidence": 80,
288
+ "severity": 80,
289
+ "threat_score": int(tox_score * 100),
290
+ "politeness_score": 0,
291
+ "friendly_score": 0,
292
+ "has_threat": has_threat,
293
+ "has_profanity": has_profanity,
294
+ "has_sarcasm": has_sarcasm,
295
+ "explanation": explanation,
296
+ }
297
+
298
+ # =====================================================
299
+ # POSITIVE LABELS – FRIENDLY / POLITE
300
+ # =====================================================
301
+ if has_friendly and pos > 0.60:
302
+ return {
303
+ "label": "Friendly",
304
+ "confidence": int(pos * 100),
305
+ "severity": 0,
306
+ "threat_score": int(tox_score * 100),
307
+ "politeness_score": int(pos * 100),
308
+ "friendly_score": int(pos * 100),
309
+ "has_threat": has_threat,
310
+ "has_profanity": has_profanity,
311
+ "has_sarcasm": has_sarcasm,
312
+ "explanation": explanation,
313
+ }
314
+
315
+ if has_polite and pos > 0.50:
316
+ return {
317
+ "label": "Polite",
318
+ "confidence": int(pos * 100),
319
+ "severity": 0,
320
+ "threat_score": int(tox_score * 100),
321
+ "politeness_score": int(pos * 100),
322
+ "friendly_score": 0,
323
+ "has_threat": has_threat,
324
+ "has_profanity": has_profanity,
325
+ "has_sarcasm": has_sarcasm,
326
+ "explanation": explanation,
327
+ }
328
+
329
+ # =====================================================
330
+ # NEUTRAL FALLBACK
331
+ # =====================================================
332
+ return {
333
+ "label": "Neutral",
334
+ "confidence": int((1 - neg) * 100),
335
+ "severity": 0,
336
+ "threat_score": int(tox_score * 100),
337
+ "politeness_score": int(pos * 100),
338
+ "friendly_score": int(pos * 100),
339
+ "has_threat": has_threat,
340
+ "has_profanity": has_profanity,
341
+ "has_sarcasm": has_sarcasm,
342
+ "explanation": explanation,
343
+ }
344
+
345
+
346
+ # Optional wrapper for backwards compatibility
347
+ def classify_tone(text: str):
348
+ r = classify_tone_rich(text)
349
+ aggressive_prob = r["severity"] / 100.0
350
+ positive_prob = r["friendly_score"] / 100.0
351
+ return r["label"], r["confidence"], aggressive_prob, positive_prob