From 9372cb353dc14068a754f13dde4cf5f9c31a601d Mon Sep 17 00:00:00 2001 From: Miyamizu-MitsuhaSang <2510681107@qq.com> Date: Sat, 30 Aug 2025 14:25:16 +0800 Subject: [PATCH] =?UTF-8?q?update=5Fjp.py:=20=E6=9B=B4=E6=96=B0=E5=88=86?= =?UTF-8?q?=E8=AF=8D=E5=99=A8=E8=B0=83=E7=94=A8unidict[lite]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/update_jp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/update_jp.py b/scripts/update_jp.py index db3a752..5e588ae 100644 --- a/scripts/update_jp.py +++ b/scripts/update_jp.py @@ -6,6 +6,8 @@ from pathlib import Path import pandas as pd from fugashi import Tagger +import unidic_lite +from importlib import resources from pykakasi import kakasi from tortoise import Tortoise from tortoise.exceptions import MultipleObjectsReturned @@ -54,7 +56,8 @@ async def pos_process(pos: str): # 初始化分词器 -tagger = Tagger() +dicdir = resources.files('unidic_lite').joinpath('dicdir') +tagger = Tagger(f"-d {dicdir}") # 初始化 kakasi 转换器 kakasi_inst = kakasi()