search.py:
提供autocomplete接口进行联想和提示功能 all_kana.py: 将日语单词转换功能单独设立为功能模块
This commit is contained in:
parent
da14488d4f
commit
a18bd82654
|
|
@ -8,6 +8,7 @@ from app.models import DefinitionJp
|
|||
from app.models.fr import DefinitionFr
|
||||
from app.schemas.search_schemas import SearchRequest, SearchResponse, SearchItemFr, SearchItemJp
|
||||
from app.utils.all_kana import all_in_kana
|
||||
from app.utils.autocomplete import suggest_autocomplete
|
||||
from app.utils.security import get_current_user
|
||||
from app.utils.textnorm import normalize_text
|
||||
from scripts.update_jp import normalize_jp_text
|
||||
|
|
@ -79,9 +80,13 @@ async def search(request: Request, body: SearchRequest, user=Depends(get_current
|
|||
# TODO 相关度排序(转换为模糊匹配)
|
||||
# TODO 输入搜索框时反馈内容
|
||||
|
||||
# @dict_search.post("search/list")
|
||||
# async def search_list(body: SearchRequest, user=Depends(get_current_user)):
|
||||
# query = body.query
|
||||
# if body.language == 'fr':
|
||||
# query = normalize_text(query)
|
||||
# prefix = await DefinitionFr.filter(word__text__icontains=query)
|
||||
@dict_search.post("search/list")
|
||||
async def search_list(query_word: SearchRequest, user=Depends(get_current_user)):
|
||||
"""
|
||||
检索时的提示接口
|
||||
:param query_word: 用户输入的内容
|
||||
:param user:
|
||||
:return: 待选列表
|
||||
"""
|
||||
word_contents = await suggest_autocomplete(query=query_word)
|
||||
return word_contents
|
||||
|
|
|
|||
|
|
@ -1,35 +1,44 @@
|
|||
import unicodedata
|
||||
|
||||
import jaconv
|
||||
import pykakasi
|
||||
from pykakasi import kakasi
|
||||
|
||||
kks = pykakasi.kakasi()
|
||||
kks.setMode("H", "a") # 平假名 -> ascii (罗马字)
|
||||
kks.setMode("K", "a") # 片假名 -> ascii
|
||||
kks.setMode("J", "a") # 汉字 -> ascii
|
||||
kks.setMode("r", "Hepburn") # 转换成 Hepburn 罗马字
|
||||
conv = kks.getConverter()
|
||||
|
||||
# ---- 全局初始化(只做一次)----
|
||||
_kakasi = kakasi()
|
||||
_kakasi.setMode("J", "H") # Kanji -> Hiragana(依据词典近似读音)
|
||||
_kakasi.setMode("K", "H") # Katakana -> Hiragana
|
||||
_kakasi.setMode("H", "H") # Hiragana -> Hiragana(不变)
|
||||
# 可选:保留原文空格/标点;如需去除空格可自行处理
|
||||
_converter = _kakasi.getConverter()
|
||||
|
||||
def all_in_kana(text: str) -> str:
|
||||
"""
|
||||
将输入统一转换为平假名,支持:
|
||||
- 平假名
|
||||
- 片假名
|
||||
- 罗马字 (Hepburn 转写)
|
||||
|
||||
返回:平假名字符串
|
||||
将任意日文输入(汉字/平假名/片假名/半角假名混排)
|
||||
统一转换为“标准化的平假名”。
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# 1. 片假名 → 平假名
|
||||
normalized = jaconv.kata2hira(text)
|
||||
# 1) 规格化(全半角/兼容等):避免隐形差异
|
||||
s = unicodedata.normalize("NFKC", text).strip()
|
||||
|
||||
# 2. 如果里面含有罗马字字符,就先转成假名
|
||||
if any("a" <= ch.lower() <= "z" for ch in normalized):
|
||||
hira = conv.do(normalized) # 罗马字 -> 平假名
|
||||
normalized = jaconv.kata2hira(hira)
|
||||
# 2) 先做假名统一(片假名 -> 平假名;半角片假名也会被 NFKC 规范化)
|
||||
# 这一步对只有假名的输入能直接得到平假名
|
||||
s = jaconv.kata2hira(s)
|
||||
|
||||
# 3. 再次片假名 -> 平假名保险
|
||||
normalized = jaconv.kata2hira(normalized)
|
||||
# 3) 用 pykakasi 将汉字(以及残留的片假名)转换为“平假名读音”
|
||||
# - 对纯假名基本保持不变
|
||||
# - 对汉字给出近似读音(依赖内置词典,个别专有名词可能不完美)
|
||||
hira = _converter.do(s)
|
||||
|
||||
return normalized
|
||||
# 4) 兜底:再转一次平假名,保证输出统一
|
||||
hira = jaconv.kata2hira(hira)
|
||||
|
||||
# 5) 可选清洗:去掉多余空白(如果你不想保留空格)
|
||||
# hira = "".join(hira.split())
|
||||
|
||||
return hira
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(all_in_kana('能力'))
|
||||
Loading…
Reference in New Issue