Compare commits

..

2 Commits

Author SHA1 Message Date
Miyamizu-MitsuhaSang fe6c84e310 更新谚语搜索,调整搜索函数 2025-11-03 00:12:37 +08:00
Miyamizu-MitsuhaSang b16917215a 更新谚语搜索,调整搜索函数 2025-11-02 23:56:14 +08:00
11 changed files with 142 additions and 73 deletions

View File

@ -334,16 +334,19 @@ Authorization: Bearer <your_jwt_token>
#### 2.2 法语谚语详情 #### 2.2 法语谚语详情
- **接口**: `POST /search/proverb` - **接口**: `POST /search/proverb`
- **描述**: 通过谚语ID获取法语谚语原文与中文解释 - **描述**: 根据谚语ID返回法语谚语全文与中文释义
- **需要认证**: 是 - **需要认证**: 是
- **查询参数**: - **请求类型**: `application/x-www-form-urlencoded`
- `proverb_id`: 谚语ID (integer) - **表单字段**:
- `proverb_id`: 谚语ID (integer必填)
- **响应**: - **响应**:
```json ```json
{ {
"proverb_text": "Petit à petit, l'oiseau fait son nid.", "result": {
"chi_exp": "循序渐进才能取得成功。" "proverb_text": "Petit à petit, l'oiseau fait son nid.",
"chi_exp": "循序渐进才能取得成功。"
}
} }
``` ```
@ -380,14 +383,13 @@ Authorization: Bearer <your_jwt_token>
#### 2.4 谚语联想建议 #### 2.4 谚语联想建议
- **接口**: `POST /search/proverb/list` - **接口**: `POST /search/proverb/list`
- **描述**: 按输入内容(自动识别法语或中文)返回谚语候选列表 - **描述**: 按输入内容返回谚语候选列表,后端会自动检测输入语言(中文/日文假名/拉丁字母),无法识别时退回法语字段搜索
- **需要认证**: 是 - **需要认证**: 是
- **请求体**: - **请求体**:
```json ```json
{ {
"query": "慢", "query": "慢"
"language": "fr"
} }
``` ```
@ -405,6 +407,9 @@ Authorization: Bearer <your_jwt_token>
} }
``` ```
- **状态码**:
- `200`: 查询成功
--- ---
### 3. 翻译模块 (`/translate`) ### 3. 翻译模块 (`/translate`)
@ -437,6 +442,7 @@ Authorization: Bearer <your_jwt_token>
} }
``` ```
- **限制**: 依赖 Redis 计数器做限流,同一用户每秒最多 2 次请求(超出返回 `429`
- **状态码**: - **状态码**:
- `200`: 翻译成功 - `200`: 翻译成功
- `401`: 未授权 - `401`: 未授权
@ -452,7 +458,7 @@ Authorization: Bearer <your_jwt_token>
- `from_lang`: 源语言,默认为 `auto` - `from_lang`: 源语言,默认为 `auto`
- `to_lang`: 目标语言,默认为 `zh` - `to_lang`: 目标语言,默认为 `zh`
- **限制**: 每秒最多2次请求 - **限制**: 与标准翻译接口共享限流计数,同一用户每秒最多2次请求
- **状态码**: - **状态码**:
- `200`: 翻译成功 - `200`: 翻译成功
- `429`: 请求频率过高 - `429`: 请求频率过高
@ -770,7 +776,8 @@ Authorization: Bearer <your_jwt_token>
- **需要认证**: 是 - **需要认证**: 是
- **查询参数**: - **查询参数**:
- `count`: 抽题数量 (integer默认 `20`) - `count`: 抽题数量 (integer默认 `20`)
- `lang`: 语种代码,支持 `fr-FR`(法语)、`ja-JP`(日语),默认 `fr-FR` - **表单字段**:
- `lang`: 语种代码(`fr-FR` 或 `ja-JP`,默认 `fr-FR`。由于实现方式FastAPI 将其视为 form-data 字段GET 请求需通过 form 提交或在调试文档中直接填写。
- **响应**: - **响应**:
```json ```json

View File

@ -1,6 +1,6 @@
from typing import Literal, List from typing import Literal, List
from fastapi import APIRouter, Depends, HTTPException, Request from fastapi import APIRouter, Depends, HTTPException, Request, Form
from app.api.search_dict import service from app.api.search_dict import service
from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \ from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \
@ -8,7 +8,7 @@ from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse
from app.api.search_dict.service import suggest_autocomplete from app.api.search_dict.service import suggest_autocomplete
from app.api.word_comment.word_comment_schemas import CommentSet from app.api.word_comment.word_comment_schemas import CommentSet
from app.models import DefinitionJp, CommentFr, CommentJp from app.models import DefinitionJp, CommentFr, CommentJp
from app.models.fr import DefinitionFr from app.models.fr import DefinitionFr, ProverbFr
from app.utils.all_kana import all_in_kana from app.utils.all_kana import all_in_kana
from app.utils.security import get_current_user from app.utils.security import get_current_user
from app.utils.textnorm import normalize_text from app.utils.textnorm import normalize_text
@ -165,7 +165,19 @@ async def search_word_list(query_word: SearchRequest, user=Depends(get_current_u
@dict_search.post("/search/proverb/list") @dict_search.post("/search/proverb/list")
async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)): async def search_proverb_list(query_word: ProverbSearchRequest):
lang: Literal['fr', 'zh'] = 'zh' if service.contains_chinese(query_word.query) else 'fr' lang = service.detect_language(text=query_word.query)
suggest_proverbs = await service.suggest_proverb(query=query_word, lang=lang) query = normalize_text(query_word.query) if lang == "fr" else query_word.query
suggest_proverbs = await service.suggest_proverb(
query=query_word.query,
lang=lang,
model=ProverbFr,
search_field="search_text",
)
# TODO 使用法语词典时是否存在用英语输入的情况
return {"list": suggest_proverbs} return {"list": suggest_proverbs}
@dict_search.post("/search/proverb")
async def search_proverb(proverb_id:int = Form(...), user=Depends(get_current_user)):
result = await service.accurate_proverb(proverb_id=proverb_id)
return {"result": result}

View File

@ -13,7 +13,7 @@ class SearchRequest(BaseModel):
class ProverbSearchRequest(BaseModel): class ProverbSearchRequest(BaseModel):
query: str query: str
language: Literal['fr', 'jp'] = "fr" dict_language: Literal['fr', 'jp'] = "fr"
class SearchItemJp(BaseModel): class SearchItemJp(BaseModel):

View File

@ -1,9 +1,8 @@
import asyncio
import re import re
from typing import List, Tuple, Dict, Literal from typing import List, Tuple, Dict, Literal, Type
from fastapi import HTTPException from fastapi import HTTPException
from tortoise import Tortoise from tortoise import Tortoise, Model
from tortoise.expressions import Q from tortoise.expressions import Q
from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchResponse, ProverbSearchRequest from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchResponse, ProverbSearchRequest
@ -14,75 +13,97 @@ from app.utils.textnorm import normalize_text
from settings import TORTOISE_ORM from settings import TORTOISE_ORM
def contains_chinese(text: str) -> bool: def detect_language(text: str) -> Literal["fr", "zh", "jp", "other"]:
"""判断字符串中是否包含至少一个中文字符""" """
return bool(re.search(r'[\u4e00-\u9fff]', text)) 自动检测输入语言:
返回 'zh' / 'jp' / 'fr' / 'other'
"""
if re.search(r"[\u4e00-\u9fff]", text):
return "zh"
elif re.search(r"[\u3040-\u30ff\u31f0-\u31ff]", text): # 日文假名范围
return "jp"
elif re.search(r"[a-zA-ZÀ-ÿ]", text):
return "fr"
return "other"
async def accurate_proverb(proverb_id: int) -> ProverbSearchResponse: async def accurate_proverb(proverb_id: int) -> ProverbSearchResponse:
"""对于查询法语谚语的精准查询,返回详细信息"""
proverb = await ProverbFr.get_or_none(id=proverb_id) proverb = await ProverbFr.get_or_none(id=proverb_id)
if not proverb: if not proverb:
raise HTTPException(status_code=404, detail="Proverb not found") raise HTTPException(status_code=404, detail="Proverb not found")
return ProverbSearchResponse( return ProverbSearchResponse(
proverb_text=proverb.proverb, proverb_text=proverb.text,
chi_exp=proverb.chi_exp, chi_exp=proverb.chi_exp,
) )
async def suggest_proverb(query: ProverbSearchRequest, lang: Literal['fr', 'zh']) -> List[Dict[str, str]]: async def suggest_proverb(
query: str,
lang: Literal["fr", "zh", "jp"],
model: Type[Model],
search_field: str = "search_text",
target_field: str = "text",
chi_exp_field: str = "chi_exp",
limit: int = 10,
) -> List[Dict[str, str]]:
""" """
对法语谚语表进行搜索建议 通用搜索建议函数用于多语言谚语表
参数: 参数:
query.query: 搜索关键词 query: 搜索关键词
lang: 'fr' 'zh' lang: 'fr' 'zh'
逻辑: model: Tortoise ORM 模型类例如 ProverbFr
1. lang='fr'按谚语字段 (proverb) 搜索 proverb_field: 外语谚语字段名
2. lang='zh'按中文释义字段 (chi_exp) 搜索 chi_exp_field: 中文释义字段名
3. 优先以输入开头的匹配 limit: 每类匹配的最大返回数量
4. 其次为包含输入但不以其开头的匹配 freq 排序
:return: [{'id': 1, 'proverb': 'xxx'}, ...] 搜索逻辑:
1. 根据语言选择搜索字段
2. 优先匹配以输入开头的结果
3. 其次匹配包含输入但非开头的结果
4. 合并去重后返回
""" """
keyword = query.query.strip() keyword = query.strip()
results: List[Dict[str, str]] = []
if not keyword: if not keyword:
return results return []
# ✅ 根据语言决定搜索字段 # ✅ 根据语言选择搜索字段
if lang == "zh": if lang == "zh":
startswith_field = "chi_exp__istartswith" startswith_field = f"{chi_exp_field}__istartswith"
contains_field = "chi_exp__icontains" contains_field = f"{chi_exp_field}__icontains"
else: # 默认法语 else:
startswith_field = "proverb__istartswith" startswith_field = f"{search_field}__istartswith"
contains_field = "proverb__icontains" contains_field = f"{search_field}__icontains"
# ✅ 1. 开头匹配 # ✅ 1. 开头匹配
start_matches = await ( start_matches = await (
ProverbFr.filter(**{startswith_field: keyword}) model.filter(**{startswith_field: keyword})
.order_by("-freq") .order_by("-freq")
.limit(10) .limit(limit)
.values("id", "proverb", "chi_exp") .values("id", target_field, search_field, chi_exp_field)
) )
# ✅ 2. 包含匹配(但不是开头) # ✅ 2. 包含匹配(开头)
contain_matches = await ( contain_matches = await (
ProverbFr.filter( model.filter(
Q(**{contains_field: keyword}) & ~Q(**{startswith_field: keyword}) Q(**{contains_field: keyword}) & ~Q(**{startswith_field: keyword})
) )
.order_by("-freq") .order_by("-freq")
.limit(10) .limit(limit)
.values("id", "proverb", "chi_exp") .values("id", target_field, search_field, chi_exp_field)
) )
# ✅ 合并结果(去重并保持顺序) # ✅ 3. 合并去重并保持顺序
results: List[Dict[str, str]] = []
seen_ids = set() seen_ids = set()
for row in start_matches + contain_matches: for row in start_matches + contain_matches:
if row["id"] not in seen_ids: if row["id"] not in seen_ids:
seen_ids.add(row["id"]) seen_ids.add(row["id"])
results.append({ results.append({
"id": row["id"], "id": row["id"],
"proverb": row["proverb"], "proverb": row[target_field],
"chi_exp": row["chi_exp"] "search_text": row[search_field],
"chi_exp": row[chi_exp_field]
}) })
return results return results
@ -205,4 +226,5 @@ async def __main():
if __name__ == '__main__': if __name__ == '__main__':
asyncio.run(__main()) # asyncio.run(__main())
print(detect_language(text="ahsjdasd"))

View File

@ -8,8 +8,8 @@ from fastapi import APIRouter, Depends, HTTPException
from app.models import User from app.models import User
from app.schemas.trans_schemas import TransResponse, TransRequest from app.schemas.trans_schemas import TransResponse, TransRequest
from app.utils.md5 import make_md5
from app.utils.security import is_admin_user, get_current_user from app.utils.security import is_admin_user, get_current_user
from scripts.md5 import make_md5
from settings import settings from settings import settings
translator_router = APIRouter() translator_router = APIRouter()

View File

@ -45,11 +45,11 @@ class DefinitionFr(Model):
class ProverbFr(Model): class ProverbFr(Model):
id = fields.IntField(pk=True) id = fields.IntField(pk=True)
proverb = fields.TextField(description="法语谚语及常用表达") text = fields.TextField(description="法语谚语及常用表达")
chi_exp = fields.TextField(description="中文释义") chi_exp = fields.TextField(description="中文释义")
freq = fields.IntField(default=0) freq = fields.IntField(default=0)
search_text = fields.TextField()
created_at = fields.DatetimeField(auto_now_add=True) created_at = fields.DatetimeField(auto_now_add=True)
updated_at = fields.DatetimeField(auto_now=True)
class Meta: class Meta:
table = "proverb_fr" table = "proverb_fr"

View File

@ -87,3 +87,14 @@ class PronunciationTestJp(Model):
class Meta: class Meta:
table = "pronunciationtest_jp" table = "pronunciationtest_jp"
class IdiomJp(Model):
id = fields.IntField(pk=True)
text = fields.TextField(null=False)
chi_exp = fields.TextField(null=False)
example = fields.TextField(null=False)
search_text = fields.TextField(null=False)
created_at = fields.DatetimeField(auto_now_add=True)
class Meta:
table = "idiom_jp"

View File

@ -18,7 +18,7 @@ from app.api.user.routes import users_router
from app.api.word_comment.routes import word_comment_router from app.api.word_comment.routes import word_comment_router
from app.core.redis import init_redis, close_redis from app.core.redis import init_redis, close_redis
from app.utils.phone_encrypt import PhoneEncrypt from app.utils.phone_encrypt import PhoneEncrypt
from settings import ONLINE_SETTINGS from settings import TORTOISE_ORM
@asynccontextmanager @asynccontextmanager
@ -46,7 +46,7 @@ app.add_middleware(
register_tortoise( register_tortoise(
app=app, app=app,
config=ONLINE_SETTINGS, config=TORTOISE_ORM,
) )
app.include_router(users_router, tags=["User API"], prefix="/users") app.include_router(users_router, tags=["User API"], prefix="/users")

View File

@ -2,7 +2,7 @@ import asyncio
from pathlib import Path from pathlib import Path
import pandas as pd import pandas as pd
from tortoise import Tortoise, connections from tortoise import Tortoise
from tortoise.exceptions import MultipleObjectsReturned from tortoise.exceptions import MultipleObjectsReturned
from app.models.fr import DefinitionFr, WordlistFr from app.models.fr import DefinitionFr, WordlistFr
@ -101,14 +101,13 @@ async def varification_eg():
async def main(): async def main():
await Tortoise.init(config=TORTOISE_ORM) await Tortoise.init(config=TORTOISE_ORM)
await DefinitionFr.all().delete() # TRUNCATE TABLE definitions_fr; # await DefinitionFr.all().delete() # TRUNCATE TABLE definitions_fr;
conn = connections.get("default") # conn = connections.get("default")
await conn.execute_script(""" # await conn.execute_script("""
ALTER TABLE definitions_fr AUTO_INCREMENT = 1; # ALTER TABLE definitions_fr AUTO_INCREMENT = 1;
""") # """)
await import_def_fr() # await import_def_fr()
# await import_wordlist_fr() # # await import_wordlist_fr()
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main())

View File

@ -1,21 +1,21 @@
import asyncio import asyncio
import re import re
import unicodedata import unicodedata
import jaconv from importlib import resources
from pathlib import Path from pathlib import Path
import jaconv
import pandas as pd import pandas as pd
from fugashi import Tagger from fugashi import Tagger
import unidic_lite
from importlib import resources
from pykakasi import kakasi from pykakasi import kakasi
from tortoise import Tortoise from tortoise import Tortoise
from tortoise.exceptions import MultipleObjectsReturned from tortoise.exceptions import MultipleObjectsReturned
from app.models import WordlistJp, DefinitionJp, AttachmentJp, PosType from app.models import WordlistJp, DefinitionJp, AttachmentJp, PosType
from app.models.jp import IdiomJp
from settings import TORTOISE_ORM from settings import TORTOISE_ORM
xlsx_name = "./DictTable-20250823.xlsx" xlsx_name = "./DictTable_20251029.xlsx"
xlsx_path = Path(xlsx_name) xlsx_path = Path(xlsx_name)
@ -228,6 +228,24 @@ async def set_hiragana(xlsx_path: Path = xlsx_path, sheet_name : str="日汉释
await WordlistJp.filter(text=word).update(hiragana=hiragana) await WordlistJp.filter(text=word).update(hiragana=hiragana)
async def import_idiom():
path = xlsx_path
df = pd.read_excel(path, sheet_name="日语惯用语")
df.columns = [col.strip() for col in df.columns]
for row in df.itertuples():
sentence = str(row[1]).strip()
search_text = str(row[2]).strip()
chi_exp = str(row[3]).strip()
example = str(row[4]).strip()
await IdiomJp.create(
text=sentence,
chi_exp=chi_exp,
example=example,
search_text=search_text,
)
async def main(): async def main():
await Tortoise.init(config=TORTOISE_ORM) await Tortoise.init(config=TORTOISE_ORM)
@ -237,8 +255,8 @@ async def main():
# await import_wordlist_jp() # await import_wordlist_jp()
# await import_def_jp() # await import_def_jp()
# await import_attachment() # await import_attachment()
await set_hiragana() # await set_hiragana()
await import_idiom()
if __name__ == '__main__': if __name__ == '__main__':
asyncio.run(main()) asyncio.run(main())