wxy01giser commited on
Commit
fadd831
·
verified ·
1 Parent(s): 8964366

Update cluster_insight.py

Browse files
Files changed (1) hide show
  1. cluster_insight.py +15 -2
cluster_insight.py CHANGED
@@ -23,7 +23,20 @@ import pickle
23
  from sklearn.feature_extraction.text import TfidfVectorizer
24
  import jieba
25
 
26
- CHINESE_FONT = "Noto Sans SC" # 思源黑体(跨平台兼容,Plotly 自带)
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def extract_cluster_keywords_auto(sentences, labels, cluster_id, top_n=3):
29
  """
@@ -235,7 +248,7 @@ def cluster_and_visualize(
235
  )
236
 
237
  img_bytes = fig.to_image(format="png", width=900, height=600, scale=2 ) # 新增这一行!
238
- b64 = base64.b64encode(img_bytes).decode()
239
  # print(f"{b64}解析成功!")
240
  # return b64, stats
241
  return fig, b64, stats
 
23
  from sklearn.feature_extraction.text import TfidfVectorizer
24
  import jieba
25
 
26
+
27
+ from plotly.io._kaleido import KaleidoProcess # 导入 Kaleido 进程类
28
+
29
+ # ========== 关键1:指定字体文件路径(HF Spaces 根目录) ==========
30
+ FONT_FILE_PATH = "./SourceHanSansSC-Light.otf" # 字体文件和 main.py 同级
31
+ CHINESE_FONT = "Source Han Sans SC" # 字体名称(必须和字体文件内置名称一致)
32
+
33
+ # ========== 关键2:验证字体文件是否存在(避免路径错误) ==========
34
+ if not os.path.exists(FONT_FILE_PATH):
35
+ print(f"⚠️ 字体文件不存在!当前路径:{os.getcwd()},文件列表:{os.listdir('.')}")
36
+ else:
37
+ print(f"✅ 找到字体文件:{FONT_FILE_PATH},大小:{os.path.getsize(FONT_FILE_PATH)/1024:.1f}KB")
38
+
39
+ # CHINESE_FONT = "Noto Sans SC" # 思源黑体(跨平台兼容,Plotly 自带)
40
 
41
  def extract_cluster_keywords_auto(sentences, labels, cluster_id, top_n=3):
42
  """
 
248
  )
249
 
250
  img_bytes = fig.to_image(format="png", width=900, height=600, scale=2 ) # 新增这一行!
251
+ b64 = base64.b64encode(img_bytes).decode('utf-8')
252
  # print(f"{b64}解析成功!")
253
  # return b64, stats
254
  return fig, b64, stats