Compare commits

..

2 Commits

Author SHA1 Message Date
Miyamizu-MitsuhaSang fe6c84e310 更新谚语搜索,调整搜索函数 2025-11-03 00:12:37 +08:00
Miyamizu-MitsuhaSang b16917215a 更新谚语搜索,调整搜索函数 2025-11-02 23:56:14 +08:00
11 changed files with 142 additions and 73 deletions

View File

@ -334,16 +334,19 @@ Authorization: Bearer <your_jwt_token>
#### 2.2 法语谚语详情
- **接口**: `POST /search/proverb`
- **描述**: 通过谚语ID获取法语谚语原文与中文解释
- **描述**: 根据谚语ID返回法语谚语全文与中文释义
- **需要认证**: 是
- **查询参数**:
- `proverb_id`: 谚语ID (integer)
- **请求类型**: `application/x-www-form-urlencoded`
- **表单字段**:
- `proverb_id`: 谚语ID (integer必填)
- **响应**:
```json
{
"proverb_text": "Petit à petit, l'oiseau fait son nid.",
"chi_exp": "循序渐进才能取得成功。"
"result": {
"proverb_text": "Petit à petit, l'oiseau fait son nid.",
"chi_exp": "循序渐进才能取得成功。"
}
}
```
@ -380,14 +383,13 @@ Authorization: Bearer <your_jwt_token>
#### 2.4 谚语联想建议
- **接口**: `POST /search/proverb/list`
- **描述**: 按输入内容(自动识别法语或中文)返回谚语候选列表
- **描述**: 按输入内容返回谚语候选列表,后端会自动检测输入语言(中文/日文假名/拉丁字母),无法识别时退回法语字段搜索
- **需要认证**: 是
- **请求体**:
```json
{
"query": "慢",
"language": "fr"
"query": "慢"
}
```
@ -405,6 +407,9 @@ Authorization: Bearer <your_jwt_token>
}
```
- **状态码**:
- `200`: 查询成功
---
### 3. 翻译模块 (`/translate`)
@ -437,6 +442,7 @@ Authorization: Bearer <your_jwt_token>
}
```
- **限制**: 依赖 Redis 计数器做限流,同一用户每秒最多 2 次请求(超出返回 `429`
- **状态码**:
- `200`: 翻译成功
- `401`: 未授权
@ -452,7 +458,7 @@ Authorization: Bearer <your_jwt_token>
- `from_lang`: 源语言,默认为 `auto`
- `to_lang`: 目标语言,默认为 `zh`
- **限制**: 每秒最多2次请求
- **限制**: 与标准翻译接口共享限流计数,同一用户每秒最多2次请求
- **状态码**:
- `200`: 翻译成功
- `429`: 请求频率过高
@ -770,7 +776,8 @@ Authorization: Bearer <your_jwt_token>
- **需要认证**: 是
- **查询参数**:
- `count`: 抽题数量 (integer默认 `20`)
- `lang`: 语种代码,支持 `fr-FR`(法语)、`ja-JP`(日语),默认 `fr-FR`
- **表单字段**:
- `lang`: 语种代码(`fr-FR` 或 `ja-JP`,默认 `fr-FR`。由于实现方式FastAPI 将其视为 form-data 字段GET 请求需通过 form 提交或在调试文档中直接填写。
- **响应**:
```json

View File

@ -1,6 +1,6 @@
from typing import Literal, List
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi import APIRouter, Depends, HTTPException, Request, Form
from app.api.search_dict import service
from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \
@ -8,7 +8,7 @@ from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse
from app.api.search_dict.service import suggest_autocomplete
from app.api.word_comment.word_comment_schemas import CommentSet
from app.models import DefinitionJp, CommentFr, CommentJp
from app.models.fr import DefinitionFr
from app.models.fr import DefinitionFr, ProverbFr
from app.utils.all_kana import all_in_kana
from app.utils.security import get_current_user
from app.utils.textnorm import normalize_text
@ -165,7 +165,19 @@ async def search_word_list(query_word: SearchRequest, user=Depends(get_current_u
@dict_search.post("/search/proverb/list")
async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)):
lang: Literal['fr', 'zh'] = 'zh' if service.contains_chinese(query_word.query) else 'fr'
suggest_proverbs = await service.suggest_proverb(query=query_word, lang=lang)
async def search_proverb_list(query_word: ProverbSearchRequest):
lang = service.detect_language(text=query_word.query)
query = normalize_text(query_word.query) if lang == "fr" else query_word.query
suggest_proverbs = await service.suggest_proverb(
query=query_word.query,
lang=lang,
model=ProverbFr,
search_field="search_text",
)
# TODO 使用法语词典时是否存在用英语输入的情况
return {"list": suggest_proverbs}
@dict_search.post("/search/proverb")
async def search_proverb(proverb_id:int = Form(...), user=Depends(get_current_user)):
result = await service.accurate_proverb(proverb_id=proverb_id)
return {"result": result}

View File

@ -13,7 +13,7 @@ class SearchRequest(BaseModel):
class ProverbSearchRequest(BaseModel):
query: str
language: Literal['fr', 'jp'] = "fr"
dict_language: Literal['fr', 'jp'] = "fr"
class SearchItemJp(BaseModel):

View File

@ -1,9 +1,8 @@
import asyncio
import re
from typing import List, Tuple, Dict, Literal
from typing import List, Tuple, Dict, Literal, Type
from fastapi import HTTPException
from tortoise import Tortoise
from tortoise import Tortoise, Model
from tortoise.expressions import Q
from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchResponse, ProverbSearchRequest
@ -14,75 +13,97 @@ from app.utils.textnorm import normalize_text
from settings import TORTOISE_ORM
def contains_chinese(text: str) -> bool:
"""判断字符串中是否包含至少一个中文字符"""
return bool(re.search(r'[\u4e00-\u9fff]', text))
def detect_language(text: str) -> Literal["fr", "zh", "jp", "other"]:
"""
自动检测输入语言:
返回 'zh' / 'jp' / 'fr' / 'other'
"""
if re.search(r"[\u4e00-\u9fff]", text):
return "zh"
elif re.search(r"[\u3040-\u30ff\u31f0-\u31ff]", text): # 日文假名范围
return "jp"
elif re.search(r"[a-zA-ZÀ-ÿ]", text):
return "fr"
return "other"
async def accurate_proverb(proverb_id: int) -> ProverbSearchResponse:
"""对于查询法语谚语的精准查询,返回详细信息"""
proverb = await ProverbFr.get_or_none(id=proverb_id)
if not proverb:
raise HTTPException(status_code=404, detail="Proverb not found")
return ProverbSearchResponse(
proverb_text=proverb.proverb,
proverb_text=proverb.text,
chi_exp=proverb.chi_exp,
)
async def suggest_proverb(query: ProverbSearchRequest, lang: Literal['fr', 'zh']) -> List[Dict[str, str]]:
async def suggest_proverb(
query: str,
lang: Literal["fr", "zh", "jp"],
model: Type[Model],
search_field: str = "search_text",
target_field: str = "text",
chi_exp_field: str = "chi_exp",
limit: int = 10,
) -> List[Dict[str, str]]:
"""
对法语谚语表进行搜索建议
通用搜索建议函数用于多语言谚语表
参数:
query.query: 搜索关键词
query: 搜索关键词
lang: 'fr' 'zh'
逻辑:
1. lang='fr'按谚语字段 (proverb) 搜索
2. lang='zh'按中文释义字段 (chi_exp) 搜索
3. 优先以输入开头的匹配
4. 其次为包含输入但不以其开头的匹配 freq 排序
:return: [{'id': 1, 'proverb': 'xxx'}, ...]
model: Tortoise ORM 模型类例如 ProverbFr
proverb_field: 外语谚语字段名
chi_exp_field: 中文释义字段名
limit: 每类匹配的最大返回数量
搜索逻辑:
1. 根据语言选择搜索字段
2. 优先匹配以输入开头的结果
3. 其次匹配包含输入但非开头的结果
4. 合并去重后返回
"""
keyword = query.query.strip()
results: List[Dict[str, str]] = []
keyword = query.strip()
if not keyword:
return results
return []
# ✅ 根据语言决定搜索字段
# ✅ 根据语言选择搜索字段
if lang == "zh":
startswith_field = "chi_exp__istartswith"
contains_field = "chi_exp__icontains"
else: # 默认法语
startswith_field = "proverb__istartswith"
contains_field = "proverb__icontains"
startswith_field = f"{chi_exp_field}__istartswith"
contains_field = f"{chi_exp_field}__icontains"
else:
startswith_field = f"{search_field}__istartswith"
contains_field = f"{search_field}__icontains"
# ✅ 1. 开头匹配
start_matches = await (
ProverbFr.filter(**{startswith_field: keyword})
model.filter(**{startswith_field: keyword})
.order_by("-freq")
.limit(10)
.values("id", "proverb", "chi_exp")
.limit(limit)
.values("id", target_field, search_field, chi_exp_field)
)
# ✅ 2. 包含匹配(但不是开头)
# ✅ 2. 包含匹配(开头)
contain_matches = await (
ProverbFr.filter(
model.filter(
Q(**{contains_field: keyword}) & ~Q(**{startswith_field: keyword})
)
.order_by("-freq")
.limit(10)
.values("id", "proverb", "chi_exp")
.limit(limit)
.values("id", target_field, search_field, chi_exp_field)
)
# ✅ 合并结果(去重并保持顺序)
# ✅ 3. 合并去重并保持顺序
results: List[Dict[str, str]] = []
seen_ids = set()
for row in start_matches + contain_matches:
if row["id"] not in seen_ids:
seen_ids.add(row["id"])
results.append({
"id": row["id"],
"proverb": row["proverb"],
"chi_exp": row["chi_exp"]
"proverb": row[target_field],
"search_text": row[search_field],
"chi_exp": row[chi_exp_field]
})
return results
@ -205,4 +226,5 @@ async def __main():
if __name__ == '__main__':
asyncio.run(__main())
# asyncio.run(__main())
print(detect_language(text="ahsjdasd"))

View File

@ -8,8 +8,8 @@ from fastapi import APIRouter, Depends, HTTPException
from app.models import User
from app.schemas.trans_schemas import TransResponse, TransRequest
from app.utils.md5 import make_md5
from app.utils.security import is_admin_user, get_current_user
from scripts.md5 import make_md5
from settings import settings
translator_router = APIRouter()

View File

@ -45,11 +45,11 @@ class DefinitionFr(Model):
class ProverbFr(Model):
id = fields.IntField(pk=True)
proverb = fields.TextField(description="法语谚语及常用表达")
text = fields.TextField(description="法语谚语及常用表达")
chi_exp = fields.TextField(description="中文释义")
freq = fields.IntField(default=0)
search_text = fields.TextField()
created_at = fields.DatetimeField(auto_now_add=True)
updated_at = fields.DatetimeField(auto_now=True)
class Meta:
table = "proverb_fr"

View File

@ -87,3 +87,14 @@ class PronunciationTestJp(Model):
class Meta:
table = "pronunciationtest_jp"
class IdiomJp(Model):
id = fields.IntField(pk=True)
text = fields.TextField(null=False)
chi_exp = fields.TextField(null=False)
example = fields.TextField(null=False)
search_text = fields.TextField(null=False)
created_at = fields.DatetimeField(auto_now_add=True)
class Meta:
table = "idiom_jp"

View File

@ -18,7 +18,7 @@ from app.api.user.routes import users_router
from app.api.word_comment.routes import word_comment_router
from app.core.redis import init_redis, close_redis
from app.utils.phone_encrypt import PhoneEncrypt
from settings import ONLINE_SETTINGS
from settings import TORTOISE_ORM
@asynccontextmanager
@ -46,7 +46,7 @@ app.add_middleware(
register_tortoise(
app=app,
config=ONLINE_SETTINGS,
config=TORTOISE_ORM,
)
app.include_router(users_router, tags=["User API"], prefix="/users")

View File

@ -2,7 +2,7 @@ import asyncio
from pathlib import Path
import pandas as pd
from tortoise import Tortoise, connections
from tortoise import Tortoise
from tortoise.exceptions import MultipleObjectsReturned
from app.models.fr import DefinitionFr, WordlistFr
@ -101,14 +101,13 @@ async def varification_eg():
async def main():
await Tortoise.init(config=TORTOISE_ORM)
await DefinitionFr.all().delete() # TRUNCATE TABLE definitions_fr;
conn = connections.get("default")
await conn.execute_script("""
ALTER TABLE definitions_fr AUTO_INCREMENT = 1;
""")
await import_def_fr()
# await import_wordlist_fr()
# await DefinitionFr.all().delete() # TRUNCATE TABLE definitions_fr;
# conn = connections.get("default")
# await conn.execute_script("""
# ALTER TABLE definitions_fr AUTO_INCREMENT = 1;
# """)
# await import_def_fr()
# # await import_wordlist_fr()
if __name__ == "__main__":
asyncio.run(main())

View File

@ -1,21 +1,21 @@
import asyncio
import re
import unicodedata
import jaconv
from importlib import resources
from pathlib import Path
import jaconv
import pandas as pd
from fugashi import Tagger
import unidic_lite
from importlib import resources
from pykakasi import kakasi
from tortoise import Tortoise
from tortoise.exceptions import MultipleObjectsReturned
from app.models import WordlistJp, DefinitionJp, AttachmentJp, PosType
from app.models.jp import IdiomJp
from settings import TORTOISE_ORM
xlsx_name = "./DictTable-20250823.xlsx"
xlsx_name = "./DictTable_20251029.xlsx"
xlsx_path = Path(xlsx_name)
@ -228,6 +228,24 @@ async def set_hiragana(xlsx_path: Path = xlsx_path, sheet_name : str="日汉释
await WordlistJp.filter(text=word).update(hiragana=hiragana)
async def import_idiom():
path = xlsx_path
df = pd.read_excel(path, sheet_name="日语惯用语")
df.columns = [col.strip() for col in df.columns]
for row in df.itertuples():
sentence = str(row[1]).strip()
search_text = str(row[2]).strip()
chi_exp = str(row[3]).strip()
example = str(row[4]).strip()
await IdiomJp.create(
text=sentence,
chi_exp=chi_exp,
example=example,
search_text=search_text,
)
async def main():
await Tortoise.init(config=TORTOISE_ORM)
@ -237,8 +255,8 @@ async def main():
# await import_wordlist_jp()
# await import_def_jp()
# await import_attachment()
await set_hiragana()
# await set_hiragana()
await import_idiom()
if __name__ == '__main__':
asyncio.run(main())