feat: Custom dictionary and stop word in word cloud support
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,6 +1,8 @@
|
|||||||
.idea
|
.idea
|
||||||
venv
|
venv
|
||||||
logs
|
logs
|
||||||
|
userdict.txt
|
||||||
|
stopword.txt
|
||||||
|
|
||||||
starbot/commands/private
|
starbot/commands/private
|
||||||
main.py
|
main.py
|
||||||
@@ -10,6 +10,7 @@ from typing import Union, Tuple, List, Dict, Any
|
|||||||
import jieba
|
import jieba
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
from loguru import logger
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
from mpl_toolkits import axisartist
|
from mpl_toolkits import axisartist
|
||||||
from scipy.interpolate import make_interp_spline
|
from scipy.interpolate import make_interp_spline
|
||||||
@@ -373,10 +374,26 @@ class LiveReportGenerator:
|
|||||||
if all_danmu:
|
if all_danmu:
|
||||||
pic.draw_section("弹幕词云")
|
pic.draw_section("弹幕词云")
|
||||||
|
|
||||||
|
if config.get("DANMU_CLOUD_DICT"):
|
||||||
|
try:
|
||||||
|
jieba.load_userdict(config.get("DANMU_CLOUD_DICT"))
|
||||||
|
except Exception:
|
||||||
|
logger.error("载入弹幕词云自定义词典失败, 请检查配置的词典路径是否正确")
|
||||||
|
|
||||||
all_danmu_str = " ".join(all_danmu)
|
all_danmu_str = " ".join(all_danmu)
|
||||||
words = list(jieba.cut(all_danmu_str))
|
words = list(jieba.cut(all_danmu_str))
|
||||||
counts = dict(Counter(words))
|
counts = dict(Counter(words))
|
||||||
|
|
||||||
|
stop_words = {}
|
||||||
|
if config.get("DANMU_CLOUD_STOP_WORDS"):
|
||||||
|
try:
|
||||||
|
with open(config.get("DANMU_CLOUD_STOP_WORDS"), "r", encoding="utf-8") as f:
|
||||||
|
stop_words = set(line.strip() for line in f)
|
||||||
|
except Exception:
|
||||||
|
logger.error("载入弹幕词云停用词失败, 请检查配置的停用词路径是否正确")
|
||||||
|
for sw in stop_words:
|
||||||
|
counts.pop(sw, None)
|
||||||
|
|
||||||
font_base_path = os.path.dirname(os.path.dirname(__file__))
|
font_base_path = os.path.dirname(os.path.dirname(__file__))
|
||||||
word_cloud = WordCloud(width=900,
|
word_cloud = WordCloud(width=900,
|
||||||
height=450,
|
height=450,
|
||||||
|
|||||||
@@ -75,6 +75,10 @@ SIMPLE_CONFIG = {
|
|||||||
"DANMU_CLOUD_MAX_FONT_SIZE": 200,
|
"DANMU_CLOUD_MAX_FONT_SIZE": 200,
|
||||||
# 弹幕词云最多词数
|
# 弹幕词云最多词数
|
||||||
"DANMU_CLOUD_MAX_WORDS": 80,
|
"DANMU_CLOUD_MAX_WORDS": 80,
|
||||||
|
# 弹幕词云停用词路径,存储时每行一个停用词,以纯文本方式存储,可过滤这些词使其不出现在词云中
|
||||||
|
"DANMU_CLOUD_STOP_WORDS": "",
|
||||||
|
# 弹幕词云自定义词典路径,存储时每行一个词,以纯文本方式存储,在对弹幕进行切词时,词典中的词不会被切分开
|
||||||
|
"DANMU_CLOUD_DICT": "",
|
||||||
|
|
||||||
# 需加载的用户自定义命令包
|
# 需加载的用户自定义命令包
|
||||||
"CUSTOM_COMMANDS_PACKAGE": None,
|
"CUSTOM_COMMANDS_PACKAGE": None,
|
||||||
@@ -182,6 +186,10 @@ FULL_CONFIG = {
|
|||||||
"DANMU_CLOUD_MAX_FONT_SIZE": 200,
|
"DANMU_CLOUD_MAX_FONT_SIZE": 200,
|
||||||
# 弹幕词云最多词数
|
# 弹幕词云最多词数
|
||||||
"DANMU_CLOUD_MAX_WORDS": 80,
|
"DANMU_CLOUD_MAX_WORDS": 80,
|
||||||
|
# 弹幕词云停用词路径,存储时每行一个停用词,以纯文本方式存储,可过滤这些词使其不出现在词云中
|
||||||
|
"DANMU_CLOUD_STOP_WORDS": "",
|
||||||
|
# 弹幕词云自定义词典路径,存储时每行一个词,以纯文本方式存储,在对弹幕进行切词时,词典中的词不会被切分开
|
||||||
|
"DANMU_CLOUD_DICT": "",
|
||||||
|
|
||||||
# 需加载的用户自定义命令包
|
# 需加载的用户自定义命令包
|
||||||
"CUSTOM_COMMANDS_PACKAGE": None,
|
"CUSTOM_COMMANDS_PACKAGE": None,
|
||||||
|
|||||||
Reference in New Issue
Block a user