更新谚语搜索,调整搜索函数
This commit is contained in:
parent
11ff892653
commit
b16917215a
|
|
@ -8,7 +8,7 @@ from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse
|
||||||
from app.api.search_dict.service import suggest_autocomplete
|
from app.api.search_dict.service import suggest_autocomplete
|
||||||
from app.api.word_comment.word_comment_schemas import CommentSet
|
from app.api.word_comment.word_comment_schemas import CommentSet
|
||||||
from app.models import DefinitionJp, CommentFr, CommentJp
|
from app.models import DefinitionJp, CommentFr, CommentJp
|
||||||
from app.models.fr import DefinitionFr
|
from app.models.fr import DefinitionFr, ProverbFr
|
||||||
from app.utils.all_kana import all_in_kana
|
from app.utils.all_kana import all_in_kana
|
||||||
from app.utils.security import get_current_user
|
from app.utils.security import get_current_user
|
||||||
from app.utils.textnorm import normalize_text
|
from app.utils.textnorm import normalize_text
|
||||||
|
|
@ -165,7 +165,12 @@ async def search_word_list(query_word: SearchRequest, user=Depends(get_current_u
|
||||||
|
|
||||||
|
|
||||||
@dict_search.post("/search/proverb/list")
|
@dict_search.post("/search/proverb/list")
|
||||||
async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)):
|
async def search_proverb_list(query_word: ProverbSearchRequest):
|
||||||
lang: Literal['fr', 'zh'] = 'zh' if service.contains_chinese(query_word.query) else 'fr'
|
lang = service.detect_language(text=query_word.query)
|
||||||
suggest_proverbs = await service.suggest_proverb(query=query_word, lang=lang)
|
suggest_proverbs = await service.suggest_proverb(
|
||||||
|
query=query_word.query,
|
||||||
|
lang=lang,
|
||||||
|
model=ProverbFr,
|
||||||
|
)
|
||||||
|
# TODO 使用法语词典时是否存在用英语输入的情况
|
||||||
return {"list": suggest_proverbs}
|
return {"list": suggest_proverbs}
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ class SearchRequest(BaseModel):
|
||||||
|
|
||||||
class ProverbSearchRequest(BaseModel):
|
class ProverbSearchRequest(BaseModel):
|
||||||
query: str
|
query: str
|
||||||
language: Literal['fr', 'jp'] = "fr"
|
dict_language: Literal['fr', 'jp'] = "fr"
|
||||||
|
|
||||||
|
|
||||||
class SearchItemJp(BaseModel):
|
class SearchItemJp(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,8 @@
|
||||||
import asyncio
|
|
||||||
import re
|
import re
|
||||||
from typing import List, Tuple, Dict, Literal
|
from typing import List, Tuple, Dict, Literal, Type
|
||||||
|
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from tortoise import Tortoise
|
from tortoise import Tortoise, Model
|
||||||
from tortoise.expressions import Q
|
from tortoise.expressions import Q
|
||||||
|
|
||||||
from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchResponse, ProverbSearchRequest
|
from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchResponse, ProverbSearchRequest
|
||||||
|
|
@ -14,75 +13,95 @@ from app.utils.textnorm import normalize_text
|
||||||
from settings import TORTOISE_ORM
|
from settings import TORTOISE_ORM
|
||||||
|
|
||||||
|
|
||||||
def contains_chinese(text: str) -> bool:
|
def detect_language(text: str) -> Literal["fr", "zh", "jp", "other"]:
|
||||||
"""判断字符串中是否包含至少一个中文字符"""
|
"""
|
||||||
return bool(re.search(r'[\u4e00-\u9fff]', text))
|
自动检测输入语言:
|
||||||
|
返回 'zh' / 'jp' / 'fr' / 'other'
|
||||||
|
"""
|
||||||
|
if re.search(r"[\u4e00-\u9fff]", text):
|
||||||
|
return "zh"
|
||||||
|
elif re.search(r"[\u3040-\u30ff\u31f0-\u31ff]", text): # 日文假名范围
|
||||||
|
return "jp"
|
||||||
|
elif re.search(r"[a-zA-ZÀ-ÿ]", text):
|
||||||
|
return "fr"
|
||||||
|
return "other"
|
||||||
|
|
||||||
|
|
||||||
async def accurate_proverb(proverb_id: int) -> ProverbSearchResponse:
|
async def accurate_proverb(proverb_id: int) -> ProverbSearchResponse:
|
||||||
|
"""对于查询法语谚语的精准查询,返回详细信息"""
|
||||||
proverb = await ProverbFr.get_or_none(id=proverb_id)
|
proverb = await ProverbFr.get_or_none(id=proverb_id)
|
||||||
if not proverb:
|
if not proverb:
|
||||||
raise HTTPException(status_code=404, detail="Proverb not found")
|
raise HTTPException(status_code=404, detail="Proverb not found")
|
||||||
return ProverbSearchResponse(
|
return ProverbSearchResponse(
|
||||||
proverb_text=proverb.proverb,
|
proverb_text=proverb.text,
|
||||||
chi_exp=proverb.chi_exp,
|
chi_exp=proverb.chi_exp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def suggest_proverb(query: ProverbSearchRequest, lang: Literal['fr', 'zh']) -> List[Dict[str, str]]:
|
async def suggest_proverb(
|
||||||
|
query: str,
|
||||||
|
lang: Literal["fr", "zh", "jp"],
|
||||||
|
model: Type[Model],
|
||||||
|
proverb_field: str = "text",
|
||||||
|
chi_exp_field: str = "chi_exp",
|
||||||
|
limit: int = 10,
|
||||||
|
) -> List[Dict[str, str]]:
|
||||||
"""
|
"""
|
||||||
对法语谚语表进行搜索建议。
|
通用搜索建议函数,用于多语言谚语表。
|
||||||
参数:
|
参数:
|
||||||
query.query: 搜索关键词
|
query: 搜索关键词
|
||||||
lang: 'fr' 或 'zh'
|
lang: 'fr' 或 'zh'
|
||||||
逻辑:
|
model: Tortoise ORM 模型类,例如 ProverbFr
|
||||||
1. 若 lang='fr',按谚语字段 (proverb) 搜索;
|
proverb_field: 外语谚语字段名
|
||||||
2. 若 lang='zh',按中文释义字段 (chi_exp) 搜索;
|
chi_exp_field: 中文释义字段名
|
||||||
3. 优先以输入开头的匹配;
|
limit: 每类匹配的最大返回数量
|
||||||
4. 其次为包含输入但不以其开头的匹配(按 freq 排序)。
|
|
||||||
:return: [{'id': 1, 'proverb': 'xxx'}, ...]
|
搜索逻辑:
|
||||||
|
1. 根据语言选择搜索字段;
|
||||||
|
2. 优先匹配以输入开头的结果;
|
||||||
|
3. 其次匹配包含输入但非开头的结果;
|
||||||
|
4. 合并去重后返回。
|
||||||
"""
|
"""
|
||||||
keyword = query.query.strip()
|
keyword = query.strip()
|
||||||
results: List[Dict[str, str]] = []
|
|
||||||
|
|
||||||
if not keyword:
|
if not keyword:
|
||||||
return results
|
return []
|
||||||
|
|
||||||
# ✅ 根据语言决定搜索字段
|
# ✅ 根据语言选择搜索字段
|
||||||
if lang == "zh":
|
if lang == "zh":
|
||||||
startswith_field = "chi_exp__istartswith"
|
startswith_field = f"{chi_exp_field}__istartswith"
|
||||||
contains_field = "chi_exp__icontains"
|
contains_field = f"{chi_exp_field}__icontains"
|
||||||
else: # 默认法语
|
else:
|
||||||
startswith_field = "proverb__istartswith"
|
startswith_field = f"{proverb_field}__istartswith"
|
||||||
contains_field = "proverb__icontains"
|
contains_field = f"{proverb_field}__icontains"
|
||||||
|
|
||||||
# ✅ 1. 开头匹配
|
# ✅ 1. 开头匹配
|
||||||
start_matches = await (
|
start_matches = await (
|
||||||
ProverbFr.filter(**{startswith_field: keyword})
|
model.filter(**{startswith_field: keyword})
|
||||||
.order_by("-freq")
|
.order_by("-freq")
|
||||||
.limit(10)
|
.limit(limit)
|
||||||
.values("id", "proverb", "chi_exp")
|
.values("id", proverb_field, chi_exp_field)
|
||||||
)
|
)
|
||||||
|
|
||||||
# ✅ 2. 包含匹配(但不是开头)
|
# ✅ 2. 包含匹配(非开头)
|
||||||
contain_matches = await (
|
contain_matches = await (
|
||||||
ProverbFr.filter(
|
model.filter(
|
||||||
Q(**{contains_field: keyword}) & ~Q(**{startswith_field: keyword})
|
Q(**{contains_field: keyword}) & ~Q(**{startswith_field: keyword})
|
||||||
)
|
)
|
||||||
.order_by("-freq")
|
.order_by("-freq")
|
||||||
.limit(10)
|
.limit(limit)
|
||||||
.values("id", "proverb", "chi_exp")
|
.values("id", proverb_field, chi_exp_field)
|
||||||
)
|
)
|
||||||
|
|
||||||
# ✅ 合并结果(去重并保持顺序)
|
# ✅ 3. 合并去重并保持顺序
|
||||||
|
results: List[Dict[str, str]] = []
|
||||||
seen_ids = set()
|
seen_ids = set()
|
||||||
for row in start_matches + contain_matches:
|
for row in start_matches + contain_matches:
|
||||||
if row["id"] not in seen_ids:
|
if row["id"] not in seen_ids:
|
||||||
seen_ids.add(row["id"])
|
seen_ids.add(row["id"])
|
||||||
results.append({
|
results.append({
|
||||||
"id": row["id"],
|
"id": row["id"],
|
||||||
"proverb": row["proverb"],
|
"proverb": row[proverb_field],
|
||||||
"chi_exp": row["chi_exp"]
|
"chi_exp": row[chi_exp_field]
|
||||||
})
|
})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
@ -205,4 +224,5 @@ async def __main():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
asyncio.run(__main())
|
# asyncio.run(__main())
|
||||||
|
print(detect_language(text="ahsjdasd"))
|
||||||
|
|
@ -8,8 +8,8 @@ from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
|
||||||
from app.models import User
|
from app.models import User
|
||||||
from app.schemas.trans_schemas import TransResponse, TransRequest
|
from app.schemas.trans_schemas import TransResponse, TransRequest
|
||||||
|
from app.utils.md5 import make_md5
|
||||||
from app.utils.security import is_admin_user, get_current_user
|
from app.utils.security import is_admin_user, get_current_user
|
||||||
from scripts.md5 import make_md5
|
|
||||||
from settings import settings
|
from settings import settings
|
||||||
|
|
||||||
translator_router = APIRouter()
|
translator_router = APIRouter()
|
||||||
|
|
|
||||||
|
|
@ -45,11 +45,10 @@ class DefinitionFr(Model):
|
||||||
|
|
||||||
class ProverbFr(Model):
|
class ProverbFr(Model):
|
||||||
id = fields.IntField(pk=True)
|
id = fields.IntField(pk=True)
|
||||||
proverb = fields.TextField(description="法语谚语及常用表达")
|
text = fields.TextField(description="法语谚语及常用表达")
|
||||||
chi_exp = fields.TextField(description="中文释义")
|
chi_exp = fields.TextField(description="中文释义")
|
||||||
freq = fields.IntField(default=0)
|
freq = fields.IntField(default=0)
|
||||||
created_at = fields.DatetimeField(auto_now_add=True)
|
created_at = fields.DatetimeField(auto_now_add=True)
|
||||||
updated_at = fields.DatetimeField(auto_now=True)
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table = "proverb_fr"
|
table = "proverb_fr"
|
||||||
|
|
|
||||||
|
|
@ -87,3 +87,14 @@ class PronunciationTestJp(Model):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table = "pronunciationtest_jp"
|
table = "pronunciationtest_jp"
|
||||||
|
|
||||||
|
class IdiomJp(Model):
|
||||||
|
id = fields.IntField(pk=True)
|
||||||
|
text = fields.TextField(null=False)
|
||||||
|
chi_exp = fields.TextField(null=False)
|
||||||
|
example = fields.TextField(null=False)
|
||||||
|
search_text = fields.TextField(null=False)
|
||||||
|
created_at = fields.DatetimeField(auto_now_add=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
table = "idiom_jp"
|
||||||
|
|
|
||||||
4
main.py
4
main.py
|
|
@ -18,7 +18,7 @@ from app.api.user.routes import users_router
|
||||||
from app.api.word_comment.routes import word_comment_router
|
from app.api.word_comment.routes import word_comment_router
|
||||||
from app.core.redis import init_redis, close_redis
|
from app.core.redis import init_redis, close_redis
|
||||||
from app.utils.phone_encrypt import PhoneEncrypt
|
from app.utils.phone_encrypt import PhoneEncrypt
|
||||||
from settings import ONLINE_SETTINGS
|
from settings import TORTOISE_ORM
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
|
|
@ -46,7 +46,7 @@ app.add_middleware(
|
||||||
|
|
||||||
register_tortoise(
|
register_tortoise(
|
||||||
app=app,
|
app=app,
|
||||||
config=ONLINE_SETTINGS,
|
config=TORTOISE_ORM,
|
||||||
)
|
)
|
||||||
|
|
||||||
app.include_router(users_router, tags=["User API"], prefix="/users")
|
app.include_router(users_router, tags=["User API"], prefix="/users")
|
||||||
|
|
|
||||||
|
|
@ -1,21 +1,21 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import re
|
import re
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import jaconv
|
from importlib import resources
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import jaconv
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from fugashi import Tagger
|
from fugashi import Tagger
|
||||||
import unidic_lite
|
|
||||||
from importlib import resources
|
|
||||||
from pykakasi import kakasi
|
from pykakasi import kakasi
|
||||||
from tortoise import Tortoise
|
from tortoise import Tortoise
|
||||||
from tortoise.exceptions import MultipleObjectsReturned
|
from tortoise.exceptions import MultipleObjectsReturned
|
||||||
|
|
||||||
from app.models import WordlistJp, DefinitionJp, AttachmentJp, PosType
|
from app.models import WordlistJp, DefinitionJp, AttachmentJp, PosType
|
||||||
|
from app.models.jp import IdiomJp
|
||||||
from settings import TORTOISE_ORM
|
from settings import TORTOISE_ORM
|
||||||
|
|
||||||
xlsx_name = "./DictTable-20250823.xlsx"
|
xlsx_name = "./DictTable_20251029.xlsx"
|
||||||
xlsx_path = Path(xlsx_name)
|
xlsx_path = Path(xlsx_name)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -228,6 +228,24 @@ async def set_hiragana(xlsx_path: Path = xlsx_path, sheet_name : str="日汉释
|
||||||
|
|
||||||
await WordlistJp.filter(text=word).update(hiragana=hiragana)
|
await WordlistJp.filter(text=word).update(hiragana=hiragana)
|
||||||
|
|
||||||
|
async def import_idiom():
|
||||||
|
path = xlsx_path
|
||||||
|
df = pd.read_excel(path, sheet_name="日语惯用语")
|
||||||
|
df.columns = [col.strip() for col in df.columns]
|
||||||
|
|
||||||
|
for row in df.itertuples():
|
||||||
|
sentence = str(row[1]).strip()
|
||||||
|
search_text = str(row[2]).strip()
|
||||||
|
chi_exp = str(row[3]).strip()
|
||||||
|
example = str(row[4]).strip()
|
||||||
|
|
||||||
|
await IdiomJp.create(
|
||||||
|
text=sentence,
|
||||||
|
chi_exp=chi_exp,
|
||||||
|
example=example,
|
||||||
|
search_text=search_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
await Tortoise.init(config=TORTOISE_ORM)
|
await Tortoise.init(config=TORTOISE_ORM)
|
||||||
|
|
@ -237,8 +255,8 @@ async def main():
|
||||||
# await import_wordlist_jp()
|
# await import_wordlist_jp()
|
||||||
# await import_def_jp()
|
# await import_def_jp()
|
||||||
# await import_attachment()
|
# await import_attachment()
|
||||||
await set_hiragana()
|
# await set_hiragana()
|
||||||
|
await import_idiom()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
asyncio.run(main())
|
asyncio.run(main())
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue