WENior commited on
Commit
02acc58
·
verified ·
1 Parent(s): 199864a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +264 -0
app.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+
5
+ import gradio as gr
6
+ from transformers import pipeline
7
+
8
+
9
+ # ---------- 1. 加载 Hugging Face 模型 ----------
10
+
11
+ # 中 → 英 翻译
12
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
13
+
14
+ # 英文礼貌度(4 类:polite / somewhat polite / neutral / impolite)
15
+ politeness_cls = pipeline("text-classification", model="Intel/polite-guard")
16
+
17
+ # 英文正式度(3 类:formal / neutral / informal)
18
+ formality_cls = pipeline("text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base")
19
+
20
+ # 英文 hedge / uncertainty(委婉/模糊表达)
21
+ hedge_cls = pipeline("text-classification", model="ChrisLiewJY/BERTweet-Hedge")
22
+
23
+
24
+ # ---------- 2. 一些简单的中文 & 英文规则打分函数 ----------
25
+
26
+ POLITE_WORDS_ZH = ["请", "麻烦您", "劳烦", "敬请", "拜托", "打扰了", "烦请"]
27
+ HEDGE_WORDS_ZH = ["是否", "可能", "大概", "也许", "好像", "觉得", "有点"]
28
+ IMPERATIVE_WORDS_ZH = ["必须", "务必", "不得", "不准", "立即", "马上", "必须要"]
29
+
30
+ def score_chinese_features(text: str):
31
+ """非常简单的中文语气打分:返回 0~1 之间的几个指标"""
32
+ if not text.strip():
33
+ return 0.5, 0.5, 0.0 # 默认中等
34
+
35
+ length = max(len(text), 1)
36
+
37
+ polite_hits = sum(text.count(w) for w in POLITE_WORDS_ZH)
38
+ hedge_hits = sum(text.count(w) for w in HEDGE_WORDS_ZH)
39
+ imp_hits = sum(text.count(w) for w in IMPERATIVE_WORDS_ZH)
40
+
41
+ polite_score = np.clip(polite_hits / 3.0, 0, 1) # 出现次数越多分越高
42
+ hedge_score = np.clip(hedge_hits / 3.0, 0, 1)
43
+ imp_score = np.clip(imp_hits / 2.0, 0, 1)
44
+
45
+ return float(polite_score), float(hedge_score), float(imp_score)
46
+
47
+
48
+ def map_polite_guard_to_score(label: str):
49
+ """把 Intel/polite-guard 的 4 类映射到 [0,1] 礼貌度"""
50
+ label = label.lower()
51
+ if "polite" == label:
52
+ return 1.0
53
+ if "somewhat polite" in label:
54
+ return 0.75
55
+ if "neutral" in label:
56
+ return 0.5
57
+ if "impolite" in label:
58
+ return 0.0
59
+ return 0.5
60
+
61
+
62
+ def map_formality_to_score(label: str):
63
+ """formal / neutral / informal → [0,1] 正式度"""
64
+ label = label.lower()
65
+ if "formal" in label:
66
+ return 1.0
67
+ if "neutral" in label:
68
+ return 0.5
69
+ if "informal" in label:
70
+ return 0.0
71
+ return 0.5
72
+
73
+
74
+ def map_hedge_to_score(label: str):
75
+ """
76
+ BERTweet-Hedge 的 label 可能类似 "Hedge" / "No_Hedge" / 多类。
77
+ 这里只是示意:如果包含 hedge 就算高 hedge。
78
+ """
79
+ label = label.lower()
80
+ if "hedge" in label and "no" not in label:
81
+ return 1.0
82
+ if "no_hedge" in label:
83
+ return 0.0
84
+ # 多类时可以更细分,这里先给中等
85
+ return 0.5
86
+
87
+
88
+ IMPERATIVE_TRIGGER_EN = [
89
+ r"^please\b",
90
+ r"^kindly\b",
91
+ r"^do\b",
92
+ r"^make\b",
93
+ r"^send\b",
94
+ r"^provide\b",
95
+ r"\byou must\b",
96
+ r"\byou have to\b",
97
+ r"\byou are required to\b",
98
+ ]
99
+
100
+
101
+ def score_imperative_en(text: str):
102
+ """用很简单的规则估计英文命令语气强度"""
103
+ t = text.strip().lower()
104
+ if not t:
105
+ return 0.0
106
+ hits = 0
107
+ for pat in IMPERATIVE_TRIGGER_EN:
108
+ if re.search(pat, t):
109
+ hits += 1
110
+ # 多个命令触发就提高分数
111
+ return float(np.clip(hits / 2.0, 0, 1))
112
+
113
+
114
+ # ---------- 3. 核心:分析函数 ----------
115
+
116
+ def analyze_letter(chinese_text: str):
117
+ if not chinese_text.strip():
118
+ return (
119
+ "", # 英文翻译
120
+ {}, # 中文指标
121
+ {}, # 英文指标
122
+ "N/A", # PD 等级
123
+ 0.0, # PD 分数
124
+ None, # bar fig
125
+ None, # radar fig
126
+ )
127
+
128
+ # 1) 中文语气分析(规则)
129
+ polite_zh, hedge_zh, imp_zh = score_chinese_features(chinese_text)
130
+
131
+ zh_stats = {
132
+ "politeness": polite_zh,
133
+ "hedging": hedge_zh,
134
+ "imperative": imp_zh,
135
+ }
136
+
137
+ # 2) 中 → 英 翻译
138
+ translated = translator(chinese_text, max_length=512)[0]["translation_text"]
139
+
140
+ # 3) 英文礼貌度
141
+ pol_out = politeness_cls(translated)[0]
142
+ polite_en = map_polite_guard_to_score(pol_out["label"])
143
+
144
+ # 4) 英文正式度
145
+ form_out = formality_cls(translated)[0]
146
+ formality_en = map_formality_to_score(form_out["label"])
147
+
148
+ # 5) 英文 hedge 程度
149
+ hedge_out = hedge_cls(translated)[0]
150
+ hedge_en = map_hedge_to_score(hedge_out["label"])
151
+
152
+ # 6) 英文命令式强度
153
+ imp_en = score_imperative_en(translated)
154
+
155
+ en_stats = {
156
+ "politeness": polite_en,
157
+ "formality": formality_en,
158
+ "hedging": hedge_en,
159
+ "imperative": imp_en,
160
+ }
161
+
162
+ # 7) 计算英文侧权力距离得分(0~1)
163
+ power_distance_score = (
164
+ 0.35 * (1 - polite_en)
165
+ + 0.25 * formality_en
166
+ + 0.25 * (1 - hedge_en)
167
+ + 0.15 * imp_en
168
+ )
169
+
170
+ # 三分类
171
+ if power_distance_score < 0.33:
172
+ level = "Low"
173
+ elif power_distance_score < 0.66:
174
+ level = "Medium"
175
+ else:
176
+ level = "High"
177
+
178
+ # ---------- 4. 画柱状图:中文 vs 英文对比 ----------
179
+ features = ["politeness", "formality", "hedging", "imperative"]
180
+ zh_vals = [zh_stats.get(k, 0.5 if k != "imperative" else 0.0) for k in features]
181
+ en_vals = [en_stats.get(k, 0.0) for k in features]
182
+
183
+ x = np.arange(len(features))
184
+ width = 0.35
185
+
186
+ fig_bar, ax_bar = plt.subplots()
187
+ ax_bar.bar(x - width/2, zh_vals, width, label="Chinese (source)")
188
+ ax_bar.bar(x + width/2, en_vals, width, label="English (translation)")
189
+ ax_bar.set_ylim(0, 1)
190
+ ax_bar.set_xticks(x)
191
+ ax_bar.set_xticklabels(features)
192
+ ax_bar.set_ylabel("Score (0–1)")
193
+ ax_bar.set_title("Chinese vs English stylistic features")
194
+ ax_bar.legend()
195
+ fig_bar.tight_layout()
196
+
197
+ # ---------- 5. 画雷达图 ----------
198
+ fig_radar = plt.figure()
199
+ ax_radar = fig_radar.add_subplot(111, polar=True)
200
+
201
+ labels = features
202
+ angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False)
203
+ zh_vals_closed = zh_vals + [zh_vals[0]]
204
+ en_vals_closed = en_vals + [en_vals[0]]
205
+ angles_closed = list(angles) + [angles[0]]
206
+
207
+ ax_radar.plot(angles_closed, zh_vals_closed, marker="o", label="Chinese")
208
+ ax_radar.fill(angles_closed, zh_vals_closed, alpha=0.1)
209
+
210
+ ax_radar.plot(angles_closed, en_vals_closed, marker="o", linestyle="--", label="English")
211
+ ax_radar.fill(angles_closed, en_vals_closed, alpha=0.1)
212
+
213
+ ax_radar.set_xticks(angles)
214
+ ax_radar.set_xticklabels(labels)
215
+ ax_radar.set_yticklabels([])
216
+ ax_radar.set_title("Stylistic profile (radar)")
217
+ ax_radar.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1))
218
+ fig_radar.tight_layout()
219
+
220
+ return translated, zh_stats, en_stats, level, round(power_distance_score, 3), fig_bar, fig_radar
221
+
222
+
223
+ # ---------- 6. Gradio 界面 ----------
224
+
225
+ with gr.Blocks(title="Power Distance Checker") as demo:
226
+ gr.Markdown(
227
+ """
228
+ # 📨 中译英权力距离检测(Power Distance)
229
+ 输入一段 **中文信件**,系统会:
230
+ 1. 自动翻译为英文
231
+ 2. 分析中英文两侧的礼貌度、正式度、委婉程度、命令语气
232
+ 3. 给出英文译文的 **权力距离等级:Low / Medium / High**
233
+ 4. 用柱状图 + 雷达图展示风格变化
234
+ """
235
+ )
236
+
237
+ with gr.Row():
238
+ input_box = gr.Textbox(label="输入中文信件", lines=6, placeholder="例如:您好,我想向您反馈近期的项目进度,如有不妥之处,还请您多多指正。")
239
+
240
+ run_btn = gr.Button("分析语气与权力距离")
241
+
242
+ with gr.Row():
243
+ output_en = gr.Textbox(label="英文翻译", lines=6)
244
+
245
+ with gr.Row():
246
+ zh_json = gr.JSON(label="中文侧语气指标(0–1)")
247
+ en_json = gr.JSON(label="英文侧语气指标(0–1)")
248
+
249
+ with gr.Row():
250
+ pd_label = gr.Label(label="Power Distance Level (English translation)")
251
+ pd_score = gr.Number(label="Power Distance Score (0–1)", precision=3)
252
+
253
+ with gr.Row():
254
+ bar_plot = gr.Plot(label="Bar Chart:Chinese vs English")
255
+ radar_plot = gr.Plot(label="Radar Chart:Stylistic Profile")
256
+
257
+ run_btn.click(
258
+ fn=analyze_letter,
259
+ inputs=[input_box],
260
+ outputs=[output_en, zh_json, en_json, pd_label, pd_score, bar_plot, radar_plot],
261
+ )
262
+
263
+ if __name__ == "__main__":
264
+ demo.launch()