feat: Custom dictionary and stop word in word cloud support

This commit is contained in:
LWR
2023-05-07 23:21:31 +08:00
parent 3a24802eb6
commit 22abe91380
3 changed files with 27 additions and 0 deletions

2
.gitignore vendored
View File

@@ -1,6 +1,8 @@
.idea .idea
venv venv
logs logs
userdict.txt
stopword.txt
starbot/commands/private starbot/commands/private
main.py main.py

View File

@@ -10,6 +10,7 @@ from typing import Union, Tuple, List, Dict, Any
import jieba import jieba
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from loguru import logger
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from mpl_toolkits import axisartist from mpl_toolkits import axisartist
from scipy.interpolate import make_interp_spline from scipy.interpolate import make_interp_spline
@@ -373,10 +374,26 @@ class LiveReportGenerator:
if all_danmu: if all_danmu:
pic.draw_section("弹幕词云") pic.draw_section("弹幕词云")
if config.get("DANMU_CLOUD_DICT"):
try:
jieba.load_userdict(config.get("DANMU_CLOUD_DICT"))
except Exception:
logger.error("载入弹幕词云自定义词典失败, 请检查配置的词典路径是否正确")
all_danmu_str = " ".join(all_danmu) all_danmu_str = " ".join(all_danmu)
words = list(jieba.cut(all_danmu_str)) words = list(jieba.cut(all_danmu_str))
counts = dict(Counter(words)) counts = dict(Counter(words))
stop_words = {}
if config.get("DANMU_CLOUD_STOP_WORDS"):
try:
with open(config.get("DANMU_CLOUD_STOP_WORDS"), "r", encoding="utf-8") as f:
stop_words = set(line.strip() for line in f)
except Exception:
logger.error("载入弹幕词云停用词失败, 请检查配置的停用词路径是否正确")
for sw in stop_words:
counts.pop(sw, None)
font_base_path = os.path.dirname(os.path.dirname(__file__)) font_base_path = os.path.dirname(os.path.dirname(__file__))
word_cloud = WordCloud(width=900, word_cloud = WordCloud(width=900,
height=450, height=450,

View File

@@ -75,6 +75,10 @@ SIMPLE_CONFIG = {
"DANMU_CLOUD_MAX_FONT_SIZE": 200, "DANMU_CLOUD_MAX_FONT_SIZE": 200,
# 弹幕词云最多词数 # 弹幕词云最多词数
"DANMU_CLOUD_MAX_WORDS": 80, "DANMU_CLOUD_MAX_WORDS": 80,
# 弹幕词云停用词路径,存储时每行一个停用词,以纯文本方式存储,可过滤这些词使其不出现在词云中
"DANMU_CLOUD_STOP_WORDS": "",
# 弹幕词云自定义词典路径,存储时每行一个词,以纯文本方式存储,在对弹幕进行切词时,词典中的词不会被切分开
"DANMU_CLOUD_DICT": "",
# 需加载的用户自定义命令包 # 需加载的用户自定义命令包
"CUSTOM_COMMANDS_PACKAGE": None, "CUSTOM_COMMANDS_PACKAGE": None,
@@ -182,6 +186,10 @@ FULL_CONFIG = {
"DANMU_CLOUD_MAX_FONT_SIZE": 200, "DANMU_CLOUD_MAX_FONT_SIZE": 200,
# 弹幕词云最多词数 # 弹幕词云最多词数
"DANMU_CLOUD_MAX_WORDS": 80, "DANMU_CLOUD_MAX_WORDS": 80,
# 弹幕词云停用词路径,存储时每行一个停用词,以纯文本方式存储,可过滤这些词使其不出现在词云中
"DANMU_CLOUD_STOP_WORDS": "",
# 弹幕词云自定义词典路径,存储时每行一个词,以纯文本方式存储,在对弹幕进行切词时,词典中的词不会被切分开
"DANMU_CLOUD_DICT": "",
# 需加载的用户自定义命令包 # 需加载的用户自定义命令包
"CUSTOM_COMMANDS_PACKAGE": None, "CUSTOM_COMMANDS_PACKAGE": None,