diff --git a/app/api/search_dict/routes.py b/app/api/search_dict/routes.py index a6ea8e1..241635d 100644 --- a/app/api/search_dict/routes.py +++ b/app/api/search_dict/routes.py @@ -8,7 +8,7 @@ from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse from app.api.search_dict.service import suggest_autocomplete from app.api.word_comment.word_comment_schemas import CommentSet from app.models import DefinitionJp, CommentFr, CommentJp -from app.models.fr import DefinitionFr +from app.models.fr import DefinitionFr, ProverbFr from app.utils.all_kana import all_in_kana from app.utils.security import get_current_user from app.utils.textnorm import normalize_text @@ -165,7 +165,12 @@ async def search_word_list(query_word: SearchRequest, user=Depends(get_current_u @dict_search.post("/search/proverb/list") -async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)): - lang: Literal['fr', 'zh'] = 'zh' if service.contains_chinese(query_word.query) else 'fr' - suggest_proverbs = await service.suggest_proverb(query=query_word, lang=lang) +async def search_proverb_list(query_word: ProverbSearchRequest): + lang = service.detect_language(text=query_word.query) + suggest_proverbs = await service.suggest_proverb( + query=query_word.query, + lang=lang, + model=ProverbFr, + ) + # TODO 使用法语词典时是否存在用英语输入的情况 return {"list": suggest_proverbs} diff --git a/app/api/search_dict/search_schemas.py b/app/api/search_dict/search_schemas.py index 4ec5272..0f6c1fa 100644 --- a/app/api/search_dict/search_schemas.py +++ b/app/api/search_dict/search_schemas.py @@ -13,7 +13,7 @@ class SearchRequest(BaseModel): class ProverbSearchRequest(BaseModel): query: str - language: Literal['fr', 'jp'] = "fr" + dict_language: Literal['fr', 'jp'] = "fr" class SearchItemJp(BaseModel): diff --git a/app/api/search_dict/service.py b/app/api/search_dict/service.py index 6fadeb0..184efaa 100644 --- a/app/api/search_dict/service.py +++ b/app/api/search_dict/service.py @@ -1,9 +1,8 @@ -import asyncio import re -from typing import List, Tuple, Dict, Literal +from typing import List, Tuple, Dict, Literal, Type from fastapi import HTTPException -from tortoise import Tortoise +from tortoise import Tortoise, Model from tortoise.expressions import Q from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchResponse, ProverbSearchRequest @@ -14,75 +13,95 @@ from app.utils.textnorm import normalize_text from settings import TORTOISE_ORM -def contains_chinese(text: str) -> bool: - """判断字符串中是否包含至少一个中文字符""" - return bool(re.search(r'[\u4e00-\u9fff]', text)) +def detect_language(text: str) -> Literal["fr", "zh", "jp", "other"]: + """ + 自动检测输入语言: + 返回 'zh' / 'jp' / 'fr' / 'other' + """ + if re.search(r"[\u4e00-\u9fff]", text): + return "zh" + elif re.search(r"[\u3040-\u30ff\u31f0-\u31ff]", text): # 日文假名范围 + return "jp" + elif re.search(r"[a-zA-ZÀ-ÿ]", text): + return "fr" + return "other" async def accurate_proverb(proverb_id: int) -> ProverbSearchResponse: + """对于查询法语谚语的精准查询,返回详细信息""" proverb = await ProverbFr.get_or_none(id=proverb_id) if not proverb: raise HTTPException(status_code=404, detail="Proverb not found") return ProverbSearchResponse( - proverb_text=proverb.proverb, + proverb_text=proverb.text, chi_exp=proverb.chi_exp, ) -async def suggest_proverb(query: ProverbSearchRequest, lang: Literal['fr', 'zh']) -> List[Dict[str, str]]: +async def suggest_proverb( + query: str, + lang: Literal["fr", "zh", "jp"], + model: Type[Model], + proverb_field: str = "text", + chi_exp_field: str = "chi_exp", + limit: int = 10, +) -> List[Dict[str, str]]: """ - 对法语谚语表进行搜索建议。 + 通用搜索建议函数,用于多语言谚语表。 参数: - query.query: 搜索关键词 + query: 搜索关键词 lang: 'fr' 或 'zh' - 逻辑: - 1. 若 lang='fr',按谚语字段 (proverb) 搜索; - 2. 若 lang='zh',按中文释义字段 (chi_exp) 搜索; - 3. 优先以输入开头的匹配; - 4. 其次为包含输入但不以其开头的匹配(按 freq 排序)。 - :return: [{'id': 1, 'proverb': 'xxx'}, ...] + model: Tortoise ORM 模型类,例如 ProverbFr + proverb_field: 外语谚语字段名 + chi_exp_field: 中文释义字段名 + limit: 每类匹配的最大返回数量 + + 搜索逻辑: + 1. 根据语言选择搜索字段; + 2. 优先匹配以输入开头的结果; + 3. 其次匹配包含输入但非开头的结果; + 4. 合并去重后返回。 """ - keyword = query.query.strip() - results: List[Dict[str, str]] = [] - + keyword = query.strip() if not keyword: - return results + return [] - # ✅ 根据语言决定搜索字段 + # ✅ 根据语言选择搜索字段 if lang == "zh": - startswith_field = "chi_exp__istartswith" - contains_field = "chi_exp__icontains" - else: # 默认法语 - startswith_field = "proverb__istartswith" - contains_field = "proverb__icontains" + startswith_field = f"{chi_exp_field}__istartswith" + contains_field = f"{chi_exp_field}__icontains" + else: + startswith_field = f"{proverb_field}__istartswith" + contains_field = f"{proverb_field}__icontains" # ✅ 1. 开头匹配 start_matches = await ( - ProverbFr.filter(**{startswith_field: keyword}) + model.filter(**{startswith_field: keyword}) .order_by("-freq") - .limit(10) - .values("id", "proverb", "chi_exp") + .limit(limit) + .values("id", proverb_field, chi_exp_field) ) - # ✅ 2. 包含匹配(但不是开头) + # ✅ 2. 包含匹配(非开头) contain_matches = await ( - ProverbFr.filter( + model.filter( Q(**{contains_field: keyword}) & ~Q(**{startswith_field: keyword}) ) .order_by("-freq") - .limit(10) - .values("id", "proverb", "chi_exp") + .limit(limit) + .values("id", proverb_field, chi_exp_field) ) - # ✅ 合并结果(去重并保持顺序) + # ✅ 3. 合并去重并保持顺序 + results: List[Dict[str, str]] = [] seen_ids = set() for row in start_matches + contain_matches: if row["id"] not in seen_ids: seen_ids.add(row["id"]) results.append({ "id": row["id"], - "proverb": row["proverb"], - "chi_exp": row["chi_exp"] + "proverb": row[proverb_field], + "chi_exp": row[chi_exp_field] }) return results @@ -205,4 +224,5 @@ async def __main(): if __name__ == '__main__': - asyncio.run(__main()) + # asyncio.run(__main()) + print(detect_language(text="ahsjdasd")) \ No newline at end of file diff --git a/app/api/translator.py b/app/api/translator.py index d1979b1..23a26b5 100644 --- a/app/api/translator.py +++ b/app/api/translator.py @@ -8,8 +8,8 @@ from fastapi import APIRouter, Depends, HTTPException from app.models import User from app.schemas.trans_schemas import TransResponse, TransRequest +from app.utils.md5 import make_md5 from app.utils.security import is_admin_user, get_current_user -from scripts.md5 import make_md5 from settings import settings translator_router = APIRouter() diff --git a/app/models/fr.py b/app/models/fr.py index b3c959d..422c745 100644 --- a/app/models/fr.py +++ b/app/models/fr.py @@ -45,11 +45,10 @@ class DefinitionFr(Model): class ProverbFr(Model): id = fields.IntField(pk=True) - proverb = fields.TextField(description="法语谚语及常用表达") + text = fields.TextField(description="法语谚语及常用表达") chi_exp = fields.TextField(description="中文释义") freq = fields.IntField(default=0) created_at = fields.DatetimeField(auto_now_add=True) - updated_at = fields.DatetimeField(auto_now=True) class Meta: table = "proverb_fr" diff --git a/app/models/jp.py b/app/models/jp.py index 68a207c..40d1b99 100644 --- a/app/models/jp.py +++ b/app/models/jp.py @@ -87,3 +87,14 @@ class PronunciationTestJp(Model): class Meta: table = "pronunciationtest_jp" + +class IdiomJp(Model): + id = fields.IntField(pk=True) + text = fields.TextField(null=False) + chi_exp = fields.TextField(null=False) + example = fields.TextField(null=False) + search_text = fields.TextField(null=False) + created_at = fields.DatetimeField(auto_now_add=True) + + class Meta: + table = "idiom_jp" diff --git a/scripts/md5.py b/app/utils/md5.py similarity index 100% rename from scripts/md5.py rename to app/utils/md5.py diff --git a/main.py b/main.py index 8b6d2f6..ec10dc8 100644 --- a/main.py +++ b/main.py @@ -18,7 +18,7 @@ from app.api.user.routes import users_router from app.api.word_comment.routes import word_comment_router from app.core.redis import init_redis, close_redis from app.utils.phone_encrypt import PhoneEncrypt -from settings import ONLINE_SETTINGS +from settings import TORTOISE_ORM @asynccontextmanager @@ -46,7 +46,7 @@ app.add_middleware( register_tortoise( app=app, - config=ONLINE_SETTINGS, + config=TORTOISE_ORM, ) app.include_router(users_router, tags=["User API"], prefix="/users") diff --git a/scripts/update_jp.py b/scripts/update_jp.py index 5e588ae..7b5253c 100644 --- a/scripts/update_jp.py +++ b/scripts/update_jp.py @@ -1,21 +1,21 @@ import asyncio import re import unicodedata -import jaconv +from importlib import resources from pathlib import Path +import jaconv import pandas as pd from fugashi import Tagger -import unidic_lite -from importlib import resources from pykakasi import kakasi from tortoise import Tortoise from tortoise.exceptions import MultipleObjectsReturned from app.models import WordlistJp, DefinitionJp, AttachmentJp, PosType +from app.models.jp import IdiomJp from settings import TORTOISE_ORM -xlsx_name = "./DictTable-20250823.xlsx" +xlsx_name = "./DictTable_20251029.xlsx" xlsx_path = Path(xlsx_name) @@ -228,6 +228,24 @@ async def set_hiragana(xlsx_path: Path = xlsx_path, sheet_name : str="日汉释 await WordlistJp.filter(text=word).update(hiragana=hiragana) +async def import_idiom(): + path = xlsx_path + df = pd.read_excel(path, sheet_name="日语惯用语") + df.columns = [col.strip() for col in df.columns] + + for row in df.itertuples(): + sentence = str(row[1]).strip() + search_text = str(row[2]).strip() + chi_exp = str(row[3]).strip() + example = str(row[4]).strip() + + await IdiomJp.create( + text=sentence, + chi_exp=chi_exp, + example=example, + search_text=search_text, + ) + async def main(): await Tortoise.init(config=TORTOISE_ORM) @@ -237,8 +255,8 @@ async def main(): # await import_wordlist_jp() # await import_def_jp() # await import_attachment() - await set_hiragana() - + # await set_hiragana() + await import_idiom() if __name__ == '__main__': asyncio.run(main())