update_jp.py:

更新分词器调用unidict[lite]
This commit is contained in:
Miyamizu-MitsuhaSang 2025-08-30 14:25:16 +08:00
parent 6efd72a596
commit 9372cb353d
1 changed files with 4 additions and 1 deletions

View File

@ -6,6 +6,8 @@ from pathlib import Path
import pandas as pd
from fugashi import Tagger
import unidic_lite
from importlib import resources
from pykakasi import kakasi
from tortoise import Tortoise
from tortoise.exceptions import MultipleObjectsReturned
@ -54,7 +56,8 @@ async def pos_process(pos: str):
# 初始化分词器
tagger = Tagger()
dicdir = resources.files('unidic_lite').joinpath('dicdir')
tagger = Tagger(f"-d {dicdir}")
# 初始化 kakasi 转换器
kakasi_inst = kakasi()