diff --git a/.gitignore b/.gitignore index b84aeba..d168189 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ .idea venv logs +userdict.txt +stopword.txt starbot/commands/private main.py \ No newline at end of file diff --git a/starbot/painter/LiveReportGenerator.py b/starbot/painter/LiveReportGenerator.py index 06aacac..ea023a5 100644 --- a/starbot/painter/LiveReportGenerator.py +++ b/starbot/painter/LiveReportGenerator.py @@ -10,6 +10,7 @@ from typing import Union, Tuple, List, Dict, Any import jieba import numpy as np from PIL import Image +from loguru import logger from matplotlib import pyplot as plt from mpl_toolkits import axisartist from scipy.interpolate import make_interp_spline @@ -373,10 +374,26 @@ class LiveReportGenerator: if all_danmu: pic.draw_section("弹幕词云") + if config.get("DANMU_CLOUD_DICT"): + try: + jieba.load_userdict(config.get("DANMU_CLOUD_DICT")) + except Exception: + logger.error("载入弹幕词云自定义词典失败, 请检查配置的词典路径是否正确") + all_danmu_str = " ".join(all_danmu) words = list(jieba.cut(all_danmu_str)) counts = dict(Counter(words)) + stop_words = {} + if config.get("DANMU_CLOUD_STOP_WORDS"): + try: + with open(config.get("DANMU_CLOUD_STOP_WORDS"), "r", encoding="utf-8") as f: + stop_words = set(line.strip() for line in f) + except Exception: + logger.error("载入弹幕词云停用词失败, 请检查配置的停用词路径是否正确") + for sw in stop_words: + counts.pop(sw, None) + font_base_path = os.path.dirname(os.path.dirname(__file__)) word_cloud = WordCloud(width=900, height=450, diff --git a/starbot/utils/config.py b/starbot/utils/config.py index c39c165..32d45db 100644 --- a/starbot/utils/config.py +++ b/starbot/utils/config.py @@ -75,6 +75,10 @@ SIMPLE_CONFIG = { "DANMU_CLOUD_MAX_FONT_SIZE": 200, # 弹幕词云最多词数 "DANMU_CLOUD_MAX_WORDS": 80, + # 弹幕词云停用词路径,存储时每行一个停用词,以纯文本方式存储,可过滤这些词使其不出现在词云中 + "DANMU_CLOUD_STOP_WORDS": "", + # 弹幕词云自定义词典路径,存储时每行一个词,以纯文本方式存储,在对弹幕进行切词时,词典中的词不会被切分开 + "DANMU_CLOUD_DICT": "", # 需加载的用户自定义命令包 "CUSTOM_COMMANDS_PACKAGE": None, @@ -182,6 +186,10 @@ FULL_CONFIG = { "DANMU_CLOUD_MAX_FONT_SIZE": 200, # 弹幕词云最多词数 "DANMU_CLOUD_MAX_WORDS": 80, + # 弹幕词云停用词路径,存储时每行一个停用词,以纯文本方式存储,可过滤这些词使其不出现在词云中 + "DANMU_CLOUD_STOP_WORDS": "", + # 弹幕词云自定义词典路径,存储时每行一个词,以纯文本方式存储,在对弹幕进行切词时,词典中的词不会被切分开 + "DANMU_CLOUD_DICT": "", # 需加载的用户自定义命令包 "CUSTOM_COMMANDS_PACKAGE": None,