更新谚语搜索,调整搜索函数
This commit is contained in:
parent
11ff892653
commit
b16917215a
|
|
@ -8,7 +8,7 @@ from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse
|
|||
from app.api.search_dict.service import suggest_autocomplete
|
||||
from app.api.word_comment.word_comment_schemas import CommentSet
|
||||
from app.models import DefinitionJp, CommentFr, CommentJp
|
||||
from app.models.fr import DefinitionFr
|
||||
from app.models.fr import DefinitionFr, ProverbFr
|
||||
from app.utils.all_kana import all_in_kana
|
||||
from app.utils.security import get_current_user
|
||||
from app.utils.textnorm import normalize_text
|
||||
|
|
@ -165,7 +165,12 @@ async def search_word_list(query_word: SearchRequest, user=Depends(get_current_u
|
|||
|
||||
|
||||
@dict_search.post("/search/proverb/list")
|
||||
async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)):
|
||||
lang: Literal['fr', 'zh'] = 'zh' if service.contains_chinese(query_word.query) else 'fr'
|
||||
suggest_proverbs = await service.suggest_proverb(query=query_word, lang=lang)
|
||||
async def search_proverb_list(query_word: ProverbSearchRequest):
|
||||
lang = service.detect_language(text=query_word.query)
|
||||
suggest_proverbs = await service.suggest_proverb(
|
||||
query=query_word.query,
|
||||
lang=lang,
|
||||
model=ProverbFr,
|
||||
)
|
||||
# TODO 使用法语词典时是否存在用英语输入的情况
|
||||
return {"list": suggest_proverbs}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ class SearchRequest(BaseModel):
|
|||
|
||||
class ProverbSearchRequest(BaseModel):
|
||||
query: str
|
||||
language: Literal['fr', 'jp'] = "fr"
|
||||
dict_language: Literal['fr', 'jp'] = "fr"
|
||||
|
||||
|
||||
class SearchItemJp(BaseModel):
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
import asyncio
|
||||
import re
|
||||
from typing import List, Tuple, Dict, Literal
|
||||
from typing import List, Tuple, Dict, Literal, Type
|
||||
|
||||
from fastapi import HTTPException
|
||||
from tortoise import Tortoise
|
||||
from tortoise import Tortoise, Model
|
||||
from tortoise.expressions import Q
|
||||
|
||||
from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchResponse, ProverbSearchRequest
|
||||
|
|
@ -14,75 +13,95 @@ from app.utils.textnorm import normalize_text
|
|||
from settings import TORTOISE_ORM
|
||||
|
||||
|
||||
def contains_chinese(text: str) -> bool:
|
||||
"""判断字符串中是否包含至少一个中文字符"""
|
||||
return bool(re.search(r'[\u4e00-\u9fff]', text))
|
||||
def detect_language(text: str) -> Literal["fr", "zh", "jp", "other"]:
|
||||
"""
|
||||
自动检测输入语言:
|
||||
返回 'zh' / 'jp' / 'fr' / 'other'
|
||||
"""
|
||||
if re.search(r"[\u4e00-\u9fff]", text):
|
||||
return "zh"
|
||||
elif re.search(r"[\u3040-\u30ff\u31f0-\u31ff]", text): # 日文假名范围
|
||||
return "jp"
|
||||
elif re.search(r"[a-zA-ZÀ-ÿ]", text):
|
||||
return "fr"
|
||||
return "other"
|
||||
|
||||
|
||||
async def accurate_proverb(proverb_id: int) -> ProverbSearchResponse:
|
||||
"""对于查询法语谚语的精准查询,返回详细信息"""
|
||||
proverb = await ProverbFr.get_or_none(id=proverb_id)
|
||||
if not proverb:
|
||||
raise HTTPException(status_code=404, detail="Proverb not found")
|
||||
return ProverbSearchResponse(
|
||||
proverb_text=proverb.proverb,
|
||||
proverb_text=proverb.text,
|
||||
chi_exp=proverb.chi_exp,
|
||||
)
|
||||
|
||||
|
||||
async def suggest_proverb(query: ProverbSearchRequest, lang: Literal['fr', 'zh']) -> List[Dict[str, str]]:
|
||||
async def suggest_proverb(
|
||||
query: str,
|
||||
lang: Literal["fr", "zh", "jp"],
|
||||
model: Type[Model],
|
||||
proverb_field: str = "text",
|
||||
chi_exp_field: str = "chi_exp",
|
||||
limit: int = 10,
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
对法语谚语表进行搜索建议。
|
||||
通用搜索建议函数,用于多语言谚语表。
|
||||
参数:
|
||||
query.query: 搜索关键词
|
||||
query: 搜索关键词
|
||||
lang: 'fr' 或 'zh'
|
||||
逻辑:
|
||||
1. 若 lang='fr',按谚语字段 (proverb) 搜索;
|
||||
2. 若 lang='zh',按中文释义字段 (chi_exp) 搜索;
|
||||
3. 优先以输入开头的匹配;
|
||||
4. 其次为包含输入但不以其开头的匹配(按 freq 排序)。
|
||||
:return: [{'id': 1, 'proverb': 'xxx'}, ...]
|
||||
model: Tortoise ORM 模型类,例如 ProverbFr
|
||||
proverb_field: 外语谚语字段名
|
||||
chi_exp_field: 中文释义字段名
|
||||
limit: 每类匹配的最大返回数量
|
||||
|
||||
搜索逻辑:
|
||||
1. 根据语言选择搜索字段;
|
||||
2. 优先匹配以输入开头的结果;
|
||||
3. 其次匹配包含输入但非开头的结果;
|
||||
4. 合并去重后返回。
|
||||
"""
|
||||
keyword = query.query.strip()
|
||||
results: List[Dict[str, str]] = []
|
||||
|
||||
keyword = query.strip()
|
||||
if not keyword:
|
||||
return results
|
||||
return []
|
||||
|
||||
# ✅ 根据语言决定搜索字段
|
||||
# ✅ 根据语言选择搜索字段
|
||||
if lang == "zh":
|
||||
startswith_field = "chi_exp__istartswith"
|
||||
contains_field = "chi_exp__icontains"
|
||||
else: # 默认法语
|
||||
startswith_field = "proverb__istartswith"
|
||||
contains_field = "proverb__icontains"
|
||||
startswith_field = f"{chi_exp_field}__istartswith"
|
||||
contains_field = f"{chi_exp_field}__icontains"
|
||||
else:
|
||||
startswith_field = f"{proverb_field}__istartswith"
|
||||
contains_field = f"{proverb_field}__icontains"
|
||||
|
||||
# ✅ 1. 开头匹配
|
||||
start_matches = await (
|
||||
ProverbFr.filter(**{startswith_field: keyword})
|
||||
model.filter(**{startswith_field: keyword})
|
||||
.order_by("-freq")
|
||||
.limit(10)
|
||||
.values("id", "proverb", "chi_exp")
|
||||
.limit(limit)
|
||||
.values("id", proverb_field, chi_exp_field)
|
||||
)
|
||||
|
||||
# ✅ 2. 包含匹配(但不是开头)
|
||||
# ✅ 2. 包含匹配(非开头)
|
||||
contain_matches = await (
|
||||
ProverbFr.filter(
|
||||
model.filter(
|
||||
Q(**{contains_field: keyword}) & ~Q(**{startswith_field: keyword})
|
||||
)
|
||||
.order_by("-freq")
|
||||
.limit(10)
|
||||
.values("id", "proverb", "chi_exp")
|
||||
.limit(limit)
|
||||
.values("id", proverb_field, chi_exp_field)
|
||||
)
|
||||
|
||||
# ✅ 合并结果(去重并保持顺序)
|
||||
# ✅ 3. 合并去重并保持顺序
|
||||
results: List[Dict[str, str]] = []
|
||||
seen_ids = set()
|
||||
for row in start_matches + contain_matches:
|
||||
if row["id"] not in seen_ids:
|
||||
seen_ids.add(row["id"])
|
||||
results.append({
|
||||
"id": row["id"],
|
||||
"proverb": row["proverb"],
|
||||
"chi_exp": row["chi_exp"]
|
||||
"proverb": row[proverb_field],
|
||||
"chi_exp": row[chi_exp_field]
|
||||
})
|
||||
|
||||
return results
|
||||
|
|
@ -205,4 +224,5 @@ async def __main():
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(__main())
|
||||
# asyncio.run(__main())
|
||||
print(detect_language(text="ahsjdasd"))
|
||||
|
|
@ -8,8 +8,8 @@ from fastapi import APIRouter, Depends, HTTPException
|
|||
|
||||
from app.models import User
|
||||
from app.schemas.trans_schemas import TransResponse, TransRequest
|
||||
from app.utils.md5 import make_md5
|
||||
from app.utils.security import is_admin_user, get_current_user
|
||||
from scripts.md5 import make_md5
|
||||
from settings import settings
|
||||
|
||||
translator_router = APIRouter()
|
||||
|
|
|
|||
|
|
@ -45,11 +45,10 @@ class DefinitionFr(Model):
|
|||
|
||||
class ProverbFr(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
proverb = fields.TextField(description="法语谚语及常用表达")
|
||||
text = fields.TextField(description="法语谚语及常用表达")
|
||||
chi_exp = fields.TextField(description="中文释义")
|
||||
freq = fields.IntField(default=0)
|
||||
created_at = fields.DatetimeField(auto_now_add=True)
|
||||
updated_at = fields.DatetimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
table = "proverb_fr"
|
||||
|
|
|
|||
|
|
@ -87,3 +87,14 @@ class PronunciationTestJp(Model):
|
|||
|
||||
class Meta:
|
||||
table = "pronunciationtest_jp"
|
||||
|
||||
class IdiomJp(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
text = fields.TextField(null=False)
|
||||
chi_exp = fields.TextField(null=False)
|
||||
example = fields.TextField(null=False)
|
||||
search_text = fields.TextField(null=False)
|
||||
created_at = fields.DatetimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
table = "idiom_jp"
|
||||
|
|
|
|||
4
main.py
4
main.py
|
|
@ -18,7 +18,7 @@ from app.api.user.routes import users_router
|
|||
from app.api.word_comment.routes import word_comment_router
|
||||
from app.core.redis import init_redis, close_redis
|
||||
from app.utils.phone_encrypt import PhoneEncrypt
|
||||
from settings import ONLINE_SETTINGS
|
||||
from settings import TORTOISE_ORM
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
|
|
@ -46,7 +46,7 @@ app.add_middleware(
|
|||
|
||||
register_tortoise(
|
||||
app=app,
|
||||
config=ONLINE_SETTINGS,
|
||||
config=TORTOISE_ORM,
|
||||
)
|
||||
|
||||
app.include_router(users_router, tags=["User API"], prefix="/users")
|
||||
|
|
|
|||
|
|
@ -1,21 +1,21 @@
|
|||
import asyncio
|
||||
import re
|
||||
import unicodedata
|
||||
import jaconv
|
||||
from importlib import resources
|
||||
from pathlib import Path
|
||||
|
||||
import jaconv
|
||||
import pandas as pd
|
||||
from fugashi import Tagger
|
||||
import unidic_lite
|
||||
from importlib import resources
|
||||
from pykakasi import kakasi
|
||||
from tortoise import Tortoise
|
||||
from tortoise.exceptions import MultipleObjectsReturned
|
||||
|
||||
from app.models import WordlistJp, DefinitionJp, AttachmentJp, PosType
|
||||
from app.models.jp import IdiomJp
|
||||
from settings import TORTOISE_ORM
|
||||
|
||||
xlsx_name = "./DictTable-20250823.xlsx"
|
||||
xlsx_name = "./DictTable_20251029.xlsx"
|
||||
xlsx_path = Path(xlsx_name)
|
||||
|
||||
|
||||
|
|
@ -228,6 +228,24 @@ async def set_hiragana(xlsx_path: Path = xlsx_path, sheet_name : str="日汉释
|
|||
|
||||
await WordlistJp.filter(text=word).update(hiragana=hiragana)
|
||||
|
||||
async def import_idiom():
|
||||
path = xlsx_path
|
||||
df = pd.read_excel(path, sheet_name="日语惯用语")
|
||||
df.columns = [col.strip() for col in df.columns]
|
||||
|
||||
for row in df.itertuples():
|
||||
sentence = str(row[1]).strip()
|
||||
search_text = str(row[2]).strip()
|
||||
chi_exp = str(row[3]).strip()
|
||||
example = str(row[4]).strip()
|
||||
|
||||
await IdiomJp.create(
|
||||
text=sentence,
|
||||
chi_exp=chi_exp,
|
||||
example=example,
|
||||
search_text=search_text,
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
await Tortoise.init(config=TORTOISE_ORM)
|
||||
|
|
@ -237,8 +255,8 @@ async def main():
|
|||
# await import_wordlist_jp()
|
||||
# await import_def_jp()
|
||||
# await import_attachment()
|
||||
await set_hiragana()
|
||||
|
||||
# await set_hiragana()
|
||||
await import_idiom()
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
|
|
|
|||
Loading…
Reference in New Issue