feat: Custom dictionary and stop word in word cloud support

This commit is contained in:
LWR
2023-05-07 23:21:31 +08:00
parent 3a24802eb6
commit 22abe91380
3 changed files with 27 additions and 0 deletions

2
.gitignore vendored
View File

@@ -1,6 +1,8 @@
.idea
venv
logs
userdict.txt
stopword.txt
starbot/commands/private
main.py

View File

@@ -10,6 +10,7 @@ from typing import Union, Tuple, List, Dict, Any
import jieba
import numpy as np
from PIL import Image
from loguru import logger
from matplotlib import pyplot as plt
from mpl_toolkits import axisartist
from scipy.interpolate import make_interp_spline
@@ -373,10 +374,26 @@ class LiveReportGenerator:
if all_danmu:
pic.draw_section("弹幕词云")
if config.get("DANMU_CLOUD_DICT"):
try:
jieba.load_userdict(config.get("DANMU_CLOUD_DICT"))
except Exception:
logger.error("载入弹幕词云自定义词典失败, 请检查配置的词典路径是否正确")
all_danmu_str = " ".join(all_danmu)
words = list(jieba.cut(all_danmu_str))
counts = dict(Counter(words))
stop_words = {}
if config.get("DANMU_CLOUD_STOP_WORDS"):
try:
with open(config.get("DANMU_CLOUD_STOP_WORDS"), "r", encoding="utf-8") as f:
stop_words = set(line.strip() for line in f)
except Exception:
logger.error("载入弹幕词云停用词失败, 请检查配置的停用词路径是否正确")
for sw in stop_words:
counts.pop(sw, None)
font_base_path = os.path.dirname(os.path.dirname(__file__))
word_cloud = WordCloud(width=900,
height=450,

View File

@@ -75,6 +75,10 @@ SIMPLE_CONFIG = {
"DANMU_CLOUD_MAX_FONT_SIZE": 200,
# 弹幕词云最多词数
"DANMU_CLOUD_MAX_WORDS": 80,
# 弹幕词云停用词路径,存储时每行一个停用词,以纯文本方式存储,可过滤这些词使其不出现在词云中
"DANMU_CLOUD_STOP_WORDS": "",
# 弹幕词云自定义词典路径,存储时每行一个词,以纯文本方式存储,在对弹幕进行切词时,词典中的词不会被切分开
"DANMU_CLOUD_DICT": "",
# 需加载的用户自定义命令包
"CUSTOM_COMMANDS_PACKAGE": None,
@@ -182,6 +186,10 @@ FULL_CONFIG = {
"DANMU_CLOUD_MAX_FONT_SIZE": 200,
# 弹幕词云最多词数
"DANMU_CLOUD_MAX_WORDS": 80,
# 弹幕词云停用词路径,存储时每行一个停用词,以纯文本方式存储,可过滤这些词使其不出现在词云中
"DANMU_CLOUD_STOP_WORDS": "",
# 弹幕词云自定义词典路径,存储时每行一个词,以纯文本方式存储,在对弹幕进行切词时,词典中的词不会被切分开
"DANMU_CLOUD_DICT": "",
# 需加载的用户自定义命令包
"CUSTOM_COMMANDS_PACKAGE": None,