feat: Custom dictionary and stop word in word cloud support
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,6 +1,8 @@
|
||||
.idea
|
||||
venv
|
||||
logs
|
||||
userdict.txt
|
||||
stopword.txt
|
||||
|
||||
starbot/commands/private
|
||||
main.py
|
||||
@@ -10,6 +10,7 @@ from typing import Union, Tuple, List, Dict, Any
|
||||
import jieba
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from loguru import logger
|
||||
from matplotlib import pyplot as plt
|
||||
from mpl_toolkits import axisartist
|
||||
from scipy.interpolate import make_interp_spline
|
||||
@@ -373,10 +374,26 @@ class LiveReportGenerator:
|
||||
if all_danmu:
|
||||
pic.draw_section("弹幕词云")
|
||||
|
||||
if config.get("DANMU_CLOUD_DICT"):
|
||||
try:
|
||||
jieba.load_userdict(config.get("DANMU_CLOUD_DICT"))
|
||||
except Exception:
|
||||
logger.error("载入弹幕词云自定义词典失败, 请检查配置的词典路径是否正确")
|
||||
|
||||
all_danmu_str = " ".join(all_danmu)
|
||||
words = list(jieba.cut(all_danmu_str))
|
||||
counts = dict(Counter(words))
|
||||
|
||||
stop_words = {}
|
||||
if config.get("DANMU_CLOUD_STOP_WORDS"):
|
||||
try:
|
||||
with open(config.get("DANMU_CLOUD_STOP_WORDS"), "r", encoding="utf-8") as f:
|
||||
stop_words = set(line.strip() for line in f)
|
||||
except Exception:
|
||||
logger.error("载入弹幕词云停用词失败, 请检查配置的停用词路径是否正确")
|
||||
for sw in stop_words:
|
||||
counts.pop(sw, None)
|
||||
|
||||
font_base_path = os.path.dirname(os.path.dirname(__file__))
|
||||
word_cloud = WordCloud(width=900,
|
||||
height=450,
|
||||
|
||||
@@ -75,6 +75,10 @@ SIMPLE_CONFIG = {
|
||||
"DANMU_CLOUD_MAX_FONT_SIZE": 200,
|
||||
# 弹幕词云最多词数
|
||||
"DANMU_CLOUD_MAX_WORDS": 80,
|
||||
# 弹幕词云停用词路径,存储时每行一个停用词,以纯文本方式存储,可过滤这些词使其不出现在词云中
|
||||
"DANMU_CLOUD_STOP_WORDS": "",
|
||||
# 弹幕词云自定义词典路径,存储时每行一个词,以纯文本方式存储,在对弹幕进行切词时,词典中的词不会被切分开
|
||||
"DANMU_CLOUD_DICT": "",
|
||||
|
||||
# 需加载的用户自定义命令包
|
||||
"CUSTOM_COMMANDS_PACKAGE": None,
|
||||
@@ -182,6 +186,10 @@ FULL_CONFIG = {
|
||||
"DANMU_CLOUD_MAX_FONT_SIZE": 200,
|
||||
# 弹幕词云最多词数
|
||||
"DANMU_CLOUD_MAX_WORDS": 80,
|
||||
# 弹幕词云停用词路径,存储时每行一个停用词,以纯文本方式存储,可过滤这些词使其不出现在词云中
|
||||
"DANMU_CLOUD_STOP_WORDS": "",
|
||||
# 弹幕词云自定义词典路径,存储时每行一个词,以纯文本方式存储,在对弹幕进行切词时,词典中的词不会被切分开
|
||||
"DANMU_CLOUD_DICT": "",
|
||||
|
||||
# 需加载的用户自定义命令包
|
||||
"CUSTOM_COMMANDS_PACKAGE": None,
|
||||
|
||||
Reference in New Issue
Block a user