Spaces:

wxy01giser
/

egisinsight

Running

wxy01giser commited on Nov 21

Commit

fadd831

verified ·

1 Parent(s): 8964366

Update cluster_insight.py

Files changed (1) hide show

cluster_insight.py CHANGED Viewed

@@ -23,7 +23,20 @@ import pickle
 from sklearn.feature_extraction.text import TfidfVectorizer
 import jieba
-CHINESE_FONT = "Noto Sans SC" # 思源黑体（跨平台兼容，Plotly 自带）
 def extract_cluster_keywords_auto(sentences, labels, cluster_id, top_n=3):
     """
@@ -235,7 +248,7 @@ def cluster_and_visualize(
     )
     img_bytes = fig.to_image(format="png", width=900, height=600, scale=2 )  # 新增这一行！
-    b64 = base64.b64encode(img_bytes).decode()
     # print(f"{b64}解析成功！")
     # return b64, stats
     return fig, b64, stats

 from sklearn.feature_extraction.text import TfidfVectorizer
 import jieba
+from plotly.io._kaleido import KaleidoProcess  # 导入 Kaleido 进程类
+# ========== 关键1：指定字体文件路径（HF Spaces 根目录） ==========
+FONT_FILE_PATH = "./SourceHanSansSC-Light.otf"  # 字体文件和 main.py 同级
+CHINESE_FONT = "Source Han Sans SC"  # 字体名称（必须和字体文件内置名称一致）
+# ========== 关键2：验证字体文件是否存在（避免路径错误） ==========
+if not os.path.exists(FONT_FILE_PATH):
+    print(f"⚠️  字体文件不存在！当前路径：{os.getcwd()}，文件列表：{os.listdir('.')}")
+else:
+    print(f"✅ 找到字体文件：{FONT_FILE_PATH}，大小：{os.path.getsize(FONT_FILE_PATH)/1024:.1f}KB")
+# CHINESE_FONT = "Noto Sans SC" # 思源黑体（跨平台兼容，Plotly 自带）
 def extract_cluster_keywords_auto(sentences, labels, cluster_id, top_n=3):
     """
     )
     img_bytes = fig.to_image(format="png", width=900, height=600, scale=2 )  # 新增这一行！
+    b64 = base64.b64encode(img_bytes).decode('utf-8')
     # print(f"{b64}解析成功！")
     # return b64, stats
     return fig, b64, stats