From a18bd826542ccdeb57a897c38dc3c26513b5e72a Mon Sep 17 00:00:00 2001 From: Miyamizu-MitsuhaSang <2510681107@qq.com> Date: Tue, 2 Sep 2025 14:20:27 +0800 Subject: [PATCH] =?UTF-8?q?search.py:=20=E6=8F=90=E4=BE=9Bautocomplete?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3=E8=BF=9B=E8=A1=8C=E8=81=94=E6=83=B3=E5=92=8C?= =?UTF-8?q?=E6=8F=90=E7=A4=BA=E5=8A=9F=E8=83=BD=20all=5Fkana.py:=20?= =?UTF-8?q?=E5=B0=86=E6=97=A5=E8=AF=AD=E5=8D=95=E8=AF=8D=E8=BD=AC=E6=8D=A2?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E5=8D=95=E7=8B=AC=E8=AE=BE=E7=AB=8B=E4=B8=BA?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/api/search.py | 17 +++++++++----- app/utils/all_kana.py | 53 +++++++++++++++++++++++++------------------ 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/app/api/search.py b/app/api/search.py index 05e4895..304a2ff 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -8,6 +8,7 @@ from app.models import DefinitionJp from app.models.fr import DefinitionFr from app.schemas.search_schemas import SearchRequest, SearchResponse, SearchItemFr, SearchItemJp from app.utils.all_kana import all_in_kana +from app.utils.autocomplete import suggest_autocomplete from app.utils.security import get_current_user from app.utils.textnorm import normalize_text from scripts.update_jp import normalize_jp_text @@ -79,9 +80,13 @@ async def search(request: Request, body: SearchRequest, user=Depends(get_current # TODO 相关度排序(转换为模糊匹配) # TODO 输入搜索框时反馈内容 -# @dict_search.post("search/list") -# async def search_list(body: SearchRequest, user=Depends(get_current_user)): -# query = body.query -# if body.language == 'fr': -# query = normalize_text(query) -# prefix = await DefinitionFr.filter(word__text__icontains=query) +@dict_search.post("search/list") +async def search_list(query_word: SearchRequest, user=Depends(get_current_user)): + """ + 检索时的提示接口 + :param query_word: 用户输入的内容 + :param user: + :return: 待选列表 + """ + word_contents = await suggest_autocomplete(query=query_word) + return word_contents diff --git a/app/utils/all_kana.py b/app/utils/all_kana.py index 293f0e4..9be9e6f 100644 --- a/app/utils/all_kana.py +++ b/app/utils/all_kana.py @@ -1,35 +1,44 @@ +import unicodedata + import jaconv import pykakasi +from pykakasi import kakasi -kks = pykakasi.kakasi() -kks.setMode("H", "a") # 平假名 -> ascii (罗马字) -kks.setMode("K", "a") # 片假名 -> ascii -kks.setMode("J", "a") # 汉字 -> ascii -kks.setMode("r", "Hepburn") # 转换成 Hepburn 罗马字 -conv = kks.getConverter() - +# ---- 全局初始化(只做一次)---- +_kakasi = kakasi() +_kakasi.setMode("J", "H") # Kanji -> Hiragana(依据词典近似读音) +_kakasi.setMode("K", "H") # Katakana -> Hiragana +_kakasi.setMode("H", "H") # Hiragana -> Hiragana(不变) +# 可选:保留原文空格/标点;如需去除空格可自行处理 +_converter = _kakasi.getConverter() def all_in_kana(text: str) -> str: """ - 将输入统一转换为平假名,支持: - - 平假名 - - 片假名 - - 罗马字 (Hepburn 转写) - - 返回:平假名字符串 + 将任意日文输入(汉字/平假名/片假名/半角假名混排) + 统一转换为“标准化的平假名”。 """ if not text: return "" - # 1. 片假名 → 平假名 - normalized = jaconv.kata2hira(text) + # 1) 规格化(全半角/兼容等):避免隐形差异 + s = unicodedata.normalize("NFKC", text).strip() - # 2. 如果里面含有罗马字字符,就先转成假名 - if any("a" <= ch.lower() <= "z" for ch in normalized): - hira = conv.do(normalized) # 罗马字 -> 平假名 - normalized = jaconv.kata2hira(hira) + # 2) 先做假名统一(片假名 -> 平假名;半角片假名也会被 NFKC 规范化) + # 这一步对只有假名的输入能直接得到平假名 + s = jaconv.kata2hira(s) - # 3. 再次片假名 -> 平假名保险 - normalized = jaconv.kata2hira(normalized) + # 3) 用 pykakasi 将汉字(以及残留的片假名)转换为“平假名读音” + # - 对纯假名基本保持不变 + # - 对汉字给出近似读音(依赖内置词典,个别专有名词可能不完美) + hira = _converter.do(s) - return normalized \ No newline at end of file + # 4) 兜底:再转一次平假名,保证输出统一 + hira = jaconv.kata2hira(hira) + + # 5) 可选清洗:去掉多余空白(如果你不想保留空格) + # hira = "".join(hira.split()) + + return hira + +if __name__ == '__main__': + print(all_in_kana('能力')) \ No newline at end of file