search.py:
提供autocomplete接口进行联想和提示功能 all_kana.py: 将日语单词转换功能单独设立为功能模块
This commit is contained in:
parent
da14488d4f
commit
a18bd82654
|
|
@ -8,6 +8,7 @@ from app.models import DefinitionJp
|
||||||
from app.models.fr import DefinitionFr
|
from app.models.fr import DefinitionFr
|
||||||
from app.schemas.search_schemas import SearchRequest, SearchResponse, SearchItemFr, SearchItemJp
|
from app.schemas.search_schemas import SearchRequest, SearchResponse, SearchItemFr, SearchItemJp
|
||||||
from app.utils.all_kana import all_in_kana
|
from app.utils.all_kana import all_in_kana
|
||||||
|
from app.utils.autocomplete import suggest_autocomplete
|
||||||
from app.utils.security import get_current_user
|
from app.utils.security import get_current_user
|
||||||
from app.utils.textnorm import normalize_text
|
from app.utils.textnorm import normalize_text
|
||||||
from scripts.update_jp import normalize_jp_text
|
from scripts.update_jp import normalize_jp_text
|
||||||
|
|
@ -79,9 +80,13 @@ async def search(request: Request, body: SearchRequest, user=Depends(get_current
|
||||||
# TODO 相关度排序(转换为模糊匹配)
|
# TODO 相关度排序(转换为模糊匹配)
|
||||||
# TODO 输入搜索框时反馈内容
|
# TODO 输入搜索框时反馈内容
|
||||||
|
|
||||||
# @dict_search.post("search/list")
|
@dict_search.post("search/list")
|
||||||
# async def search_list(body: SearchRequest, user=Depends(get_current_user)):
|
async def search_list(query_word: SearchRequest, user=Depends(get_current_user)):
|
||||||
# query = body.query
|
"""
|
||||||
# if body.language == 'fr':
|
检索时的提示接口
|
||||||
# query = normalize_text(query)
|
:param query_word: 用户输入的内容
|
||||||
# prefix = await DefinitionFr.filter(word__text__icontains=query)
|
:param user:
|
||||||
|
:return: 待选列表
|
||||||
|
"""
|
||||||
|
word_contents = await suggest_autocomplete(query=query_word)
|
||||||
|
return word_contents
|
||||||
|
|
|
||||||
|
|
@ -1,35 +1,44 @@
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
import jaconv
|
import jaconv
|
||||||
import pykakasi
|
import pykakasi
|
||||||
|
from pykakasi import kakasi
|
||||||
|
|
||||||
kks = pykakasi.kakasi()
|
# ---- 全局初始化(只做一次)----
|
||||||
kks.setMode("H", "a") # 平假名 -> ascii (罗马字)
|
_kakasi = kakasi()
|
||||||
kks.setMode("K", "a") # 片假名 -> ascii
|
_kakasi.setMode("J", "H") # Kanji -> Hiragana(依据词典近似读音)
|
||||||
kks.setMode("J", "a") # 汉字 -> ascii
|
_kakasi.setMode("K", "H") # Katakana -> Hiragana
|
||||||
kks.setMode("r", "Hepburn") # 转换成 Hepburn 罗马字
|
_kakasi.setMode("H", "H") # Hiragana -> Hiragana(不变)
|
||||||
conv = kks.getConverter()
|
# 可选:保留原文空格/标点;如需去除空格可自行处理
|
||||||
|
_converter = _kakasi.getConverter()
|
||||||
|
|
||||||
def all_in_kana(text: str) -> str:
|
def all_in_kana(text: str) -> str:
|
||||||
"""
|
"""
|
||||||
将输入统一转换为平假名,支持:
|
将任意日文输入(汉字/平假名/片假名/半角假名混排)
|
||||||
- 平假名
|
统一转换为“标准化的平假名”。
|
||||||
- 片假名
|
|
||||||
- 罗马字 (Hepburn 转写)
|
|
||||||
|
|
||||||
返回:平假名字符串
|
|
||||||
"""
|
"""
|
||||||
if not text:
|
if not text:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# 1. 片假名 → 平假名
|
# 1) 规格化(全半角/兼容等):避免隐形差异
|
||||||
normalized = jaconv.kata2hira(text)
|
s = unicodedata.normalize("NFKC", text).strip()
|
||||||
|
|
||||||
# 2. 如果里面含有罗马字字符,就先转成假名
|
# 2) 先做假名统一(片假名 -> 平假名;半角片假名也会被 NFKC 规范化)
|
||||||
if any("a" <= ch.lower() <= "z" for ch in normalized):
|
# 这一步对只有假名的输入能直接得到平假名
|
||||||
hira = conv.do(normalized) # 罗马字 -> 平假名
|
s = jaconv.kata2hira(s)
|
||||||
normalized = jaconv.kata2hira(hira)
|
|
||||||
|
|
||||||
# 3. 再次片假名 -> 平假名保险
|
# 3) 用 pykakasi 将汉字(以及残留的片假名)转换为“平假名读音”
|
||||||
normalized = jaconv.kata2hira(normalized)
|
# - 对纯假名基本保持不变
|
||||||
|
# - 对汉字给出近似读音(依赖内置词典,个别专有名词可能不完美)
|
||||||
|
hira = _converter.do(s)
|
||||||
|
|
||||||
return normalized
|
# 4) 兜底:再转一次平假名,保证输出统一
|
||||||
|
hira = jaconv.kata2hira(hira)
|
||||||
|
|
||||||
|
# 5) 可选清洗:去掉多余空白(如果你不想保留空格)
|
||||||
|
# hira = "".join(hira.split())
|
||||||
|
|
||||||
|
return hira
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print(all_in_kana('能力'))
|
||||||
Loading…
Reference in New Issue