Spaces:
Running
Running
Update cluster_insight.py
Browse files- cluster_insight.py +15 -2
cluster_insight.py
CHANGED
|
@@ -23,7 +23,20 @@ import pickle
|
|
| 23 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 24 |
import jieba
|
| 25 |
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
def extract_cluster_keywords_auto(sentences, labels, cluster_id, top_n=3):
|
| 29 |
"""
|
|
@@ -235,7 +248,7 @@ def cluster_and_visualize(
|
|
| 235 |
)
|
| 236 |
|
| 237 |
img_bytes = fig.to_image(format="png", width=900, height=600, scale=2 ) # 新增这一行!
|
| 238 |
-
b64 = base64.b64encode(img_bytes).decode()
|
| 239 |
# print(f"{b64}解析成功!")
|
| 240 |
# return b64, stats
|
| 241 |
return fig, b64, stats
|
|
|
|
| 23 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 24 |
import jieba
|
| 25 |
|
| 26 |
+
|
| 27 |
+
from plotly.io._kaleido import KaleidoProcess # 导入 Kaleido 进程类
|
| 28 |
+
|
| 29 |
+
# ========== 关键1:指定字体文件路径(HF Spaces 根目录) ==========
|
| 30 |
+
FONT_FILE_PATH = "./SourceHanSansSC-Light.otf" # 字体文件和 main.py 同级
|
| 31 |
+
CHINESE_FONT = "Source Han Sans SC" # 字体名称(必须和字体文件内置名称一致)
|
| 32 |
+
|
| 33 |
+
# ========== 关键2:验证字体文件是否存在(避免路径错误) ==========
|
| 34 |
+
if not os.path.exists(FONT_FILE_PATH):
|
| 35 |
+
print(f"⚠️ 字体文件不存在!当前路径:{os.getcwd()},文件列表:{os.listdir('.')}")
|
| 36 |
+
else:
|
| 37 |
+
print(f"✅ 找到字体文件:{FONT_FILE_PATH},大小:{os.path.getsize(FONT_FILE_PATH)/1024:.1f}KB")
|
| 38 |
+
|
| 39 |
+
# CHINESE_FONT = "Noto Sans SC" # 思源黑体(跨平台兼容,Plotly 自带)
|
| 40 |
|
| 41 |
def extract_cluster_keywords_auto(sentences, labels, cluster_id, top_n=3):
|
| 42 |
"""
|
|
|
|
| 248 |
)
|
| 249 |
|
| 250 |
img_bytes = fig.to_image(format="png", width=900, height=600, scale=2 ) # 新增这一行!
|
| 251 |
+
b64 = base64.b64encode(img_bytes).decode('utf-8')
|
| 252 |
# print(f"{b64}解析成功!")
|
| 253 |
# return b64, stats
|
| 254 |
return fig, b64, stats
|