Compare commits
3 Commits
2edd3e7a56
...
8897680d24
| Author | SHA1 | Date |
|---|---|---|
|
|
8897680d24 | |
|
|
2608a27abc | |
|
|
d7658db3e8 |
75
README.md
75
README.md
|
|
@ -344,8 +344,9 @@ Authorization: Bearer <your_jwt_token>
|
|||
```json
|
||||
{
|
||||
"result": {
|
||||
"proverb_text": "Petit à petit, l'oiseau fait son nid.",
|
||||
"chi_exp": "循序渐进才能取得成功。"
|
||||
"text": "Petit à petit, l'oiseau fait son nid.",
|
||||
"chi_exp": "循序渐进才能取得成功。",
|
||||
"freq": 128
|
||||
}
|
||||
}
|
||||
```
|
||||
|
|
@ -356,7 +357,7 @@ Authorization: Bearer <your_jwt_token>
|
|||
|
||||
#### 2.3 单词联想建议
|
||||
|
||||
- **接口**: `POST /search/word/list`
|
||||
- **接口**: `POST /search/list/word`
|
||||
- **描述**: 根据用户输入返回单词联想列表,含前缀匹配与包含匹配。
|
||||
- **需要认证**: 是
|
||||
- **请求体**:
|
||||
|
|
@ -380,16 +381,20 @@ Authorization: Bearer <your_jwt_token>
|
|||
|
||||
> **说明**: `language = "jp"` 时返回形如 `[["愛", "あい"], ["愛する", "あいする"]]` 的二维数组,第二列为假名读音。
|
||||
|
||||
- **状态码**:
|
||||
- `200`: 查询成功
|
||||
|
||||
#### 2.4 谚语联想建议
|
||||
|
||||
- **接口**: `POST /search/proverb/list`
|
||||
- **接口**: `POST /search/list/proverb`
|
||||
- **描述**: 按输入内容返回谚语候选列表,后端会自动检测输入语言(中文/日文假名/拉丁字母),无法识别时退回法语字段搜索。
|
||||
- **需要认证**: 是
|
||||
- **请求体**:
|
||||
|
||||
```json
|
||||
{
|
||||
"query": "慢"
|
||||
"query": "慢",
|
||||
"dict_language": "fr"
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -410,6 +415,64 @@ Authorization: Bearer <your_jwt_token>
|
|||
- **状态码**:
|
||||
- `200`: 查询成功
|
||||
|
||||
#### 2.5 日语惯用语联想建议
|
||||
|
||||
- **接口**: `POST /search/list/idiom`
|
||||
- **描述**: 针对日语惯用语返回联想候选,支持输入日文假名或中文汉字;若输入匹配汉字映射表,会并发查询假名结果并合并输出。
|
||||
- **需要认证**: 是
|
||||
- **请求体**:
|
||||
|
||||
```json
|
||||
{
|
||||
"query": "愛してる",
|
||||
"dict_language": "jp"
|
||||
}
|
||||
```
|
||||
|
||||
- **响应示例**:
|
||||
|
||||
```json
|
||||
{
|
||||
"list": [
|
||||
{
|
||||
"id": 21,
|
||||
"proverb": "愛してる",
|
||||
"chi_exp": "我爱你"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
- **状态码**:
|
||||
- `200`: 查询成功
|
||||
- `400`: 当 `dict_language` 不是 `jp` 时返回错误信息
|
||||
|
||||
#### 2.6 日语惯用语详情
|
||||
|
||||
- **接口**: `POST /search/idiom`
|
||||
- **描述**: 根据惯用语 ID 返回详细信息并增加访问频次。
|
||||
- **需要认证**: 是
|
||||
- **查询参数**:
|
||||
- `query_id`: 惯用语 ID (integer)
|
||||
- **响应示例**:
|
||||
|
||||
```json
|
||||
{
|
||||
"result": {
|
||||
"id": 21,
|
||||
"text": "愛してる",
|
||||
"search_text": "あいしてる",
|
||||
"chi_exp": "我爱你",
|
||||
"example": "私はあなたを愛してる。",
|
||||
"freq": 57
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- **状态码**:
|
||||
- `200`: 查询成功
|
||||
- `404`: 惯用语不存在
|
||||
|
||||
---
|
||||
|
||||
### 3. 翻译模块 (`/translate`)
|
||||
|
|
@ -1061,7 +1124,7 @@ curl -X POST "http://127.0.0.1:8000/search/word" \
|
|||
}'
|
||||
|
||||
# 4. 获取单词联想列表
|
||||
curl -X POST "http://127.0.0.1:8000/search/word/list" \
|
||||
curl -X POST "http://127.0.0.1:8000/search/list/word" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer <your_token_here>" \
|
||||
-d '{
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import asyncio
|
||||
from typing import Literal, List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Form
|
||||
|
|
@ -5,7 +6,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request, Form
|
|||
from app.api.search_dict import service
|
||||
from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \
|
||||
ProverbSearchRequest
|
||||
from app.api.search_dict.service import suggest_autocomplete, accurate_proverb
|
||||
from app.api.search_dict.service import suggest_autocomplete
|
||||
from app.api.word_comment.word_comment_schemas import CommentSet
|
||||
from app.models import DefinitionJp, CommentFr, CommentJp
|
||||
from app.models.fr import DefinitionFr, ProverbFr
|
||||
|
|
@ -136,23 +137,10 @@ async def search(request: Request, body: SearchRequest, user=Depends(get_current
|
|||
)
|
||||
|
||||
|
||||
@dict_search.post("/search/proverb")
|
||||
async def proverb(request: Request, proverb_id: int, user=Depends(get_current_user)):
|
||||
"""
|
||||
用于法语谚语搜索
|
||||
:param request:
|
||||
:param body: 要求用户输入的内容必须为法语
|
||||
:param user:
|
||||
:return:
|
||||
"""
|
||||
content = await service.accurate_proverb(proverb_id=proverb_id)
|
||||
return content
|
||||
|
||||
|
||||
# TODO 相关度排序(转换为模糊匹配)
|
||||
# TODO 输入搜索框时反馈内容
|
||||
|
||||
@dict_search.post("/search/word/list")
|
||||
@dict_search.post("/search/list/word")
|
||||
async def search_word_list(query_word: SearchRequest, user=Depends(get_current_user)):
|
||||
"""
|
||||
检索时的提示接口
|
||||
|
|
@ -165,9 +153,9 @@ async def search_word_list(query_word: SearchRequest, user=Depends(get_current_u
|
|||
return {"list": word_contents}
|
||||
|
||||
|
||||
@dict_search.post("/search/proverb/list")
|
||||
@dict_search.post("/search/list/proverb")
|
||||
async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)):
|
||||
lang = service.detect_language(text=query_word.query)[1]
|
||||
query, lang, _ = service.detect_language(text=query_word.query)
|
||||
query = normalize_text(query_word.query) if lang == "fr" else query_word.query
|
||||
suggest_proverbs = await service.suggest_proverb(
|
||||
query=query_word.query,
|
||||
|
|
@ -180,35 +168,53 @@ async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get
|
|||
|
||||
@dict_search.post("/search/proverb")
|
||||
async def search_proverb(proverb_id: int = Form(...), user=Depends(get_current_user)):
|
||||
result = await service.accurate_proverb(proverb_id=proverb_id)
|
||||
result = await service.accurate_idiom_proverb(search_id=proverb_id, model=ProverbFr, only_fields=["text", "chi_exp"])
|
||||
|
||||
return {"result": result}
|
||||
|
||||
|
||||
@dict_search.post("/search/idiom/list")
|
||||
async def search_idiom_list(query_idiom: ProverbSearchRequest):
|
||||
@dict_search.post("/search/list/idiom")
|
||||
async def search_idiom_list(query_idiom: ProverbSearchRequest, user=Depends(get_current_user)):
|
||||
if query_idiom.dict_language == "fr":
|
||||
raise HTTPException(status_code=400, detail="Dict language Error")
|
||||
trad_query, lang = service.detect_language(text=query_idiom.query)
|
||||
|
||||
mapping_query, lang, is_kangji = await service.detect_language(text=query_idiom.query)
|
||||
query = all_in_kana(text=query_idiom.query) if lang == "jp" else query_idiom.query
|
||||
result = await service.suggest_proverb(
|
||||
|
||||
# ✅ 并发任务列表
|
||||
tasks = [
|
||||
service.suggest_proverb(
|
||||
query=query,
|
||||
lang=lang,
|
||||
model=IdiomJp,
|
||||
search_field="search_text",
|
||||
target_field="text",
|
||||
)
|
||||
if lang == "zh":
|
||||
trad_query = all_in_kana(text=query_idiom.query)
|
||||
search_idioms_from_chi = await service.suggest_proverb(
|
||||
query=trad_query,
|
||||
]
|
||||
|
||||
if lang == "zh" and is_kangji:
|
||||
# jp_query = all_in_kana(text=query_idiom.query)
|
||||
tasks.append(
|
||||
service.suggest_proverb(
|
||||
query=mapping_query,
|
||||
lang="jp",
|
||||
model=IdiomJp,
|
||||
search_field="text",
|
||||
)
|
||||
result[:0] = search_idioms_from_chi
|
||||
)
|
||||
|
||||
# ✅ 并发执行(返回结果顺序与任务顺序一致)
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
# ✅ 合并结果
|
||||
result = results[0]
|
||||
if len(results) > 1:
|
||||
result[:0] = results[1] # 将中文映射查询结果插到最前面
|
||||
|
||||
return {"list": result}
|
||||
|
||||
|
||||
@dict_search.post("/search/idiom")
|
||||
async def search_idiom(query_id: int):
|
||||
result = await accurate_proverb(proverb_id=query_id)
|
||||
async def search_idiom(query_id: int, user=Depends(get_current_user)):
|
||||
result = await service.accurate_idiom_proverb(search_id=query_id, model=IdiomJp, only_fields=["id", "text", "search_text", "chi_exp", "example"])
|
||||
return {"result": result}
|
||||
|
|
|
|||
|
|
@ -2,83 +2,70 @@ import re
|
|||
from typing import List, Tuple, Dict, Literal, Type
|
||||
|
||||
from fastapi import HTTPException
|
||||
from opencc import OpenCC
|
||||
from tortoise import Tortoise, Model
|
||||
from tortoise.expressions import Q
|
||||
|
||||
from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchResponse, ProverbSearchRequest
|
||||
from app.models import WordlistFr, WordlistJp
|
||||
from app.models.fr import ProverbFr
|
||||
from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchRequest
|
||||
from app.models import WordlistFr, WordlistJp, KangjiMapping
|
||||
from app.utils.all_kana import all_in_kana
|
||||
from app.utils.textnorm import normalize_text
|
||||
from settings import TORTOISE_ORM
|
||||
|
||||
|
||||
def detect_language(text: str) -> Tuple[str, Literal["fr", "zh", "jp", "other"]]:
|
||||
async def detect_language(text: str) -> Tuple[str, str, bool]:
|
||||
"""
|
||||
自动检测输入语言:
|
||||
- zh: 简体中文
|
||||
- jp: 日语(含假名或繁体/旧体字)
|
||||
- jp: 日语(含假名或旧字体)
|
||||
- fr: 拉丁字母(法语等)
|
||||
- other: 其他
|
||||
"""
|
||||
cc_s2t = OpenCC('s2t') # 简体 → 繁体
|
||||
cc_t2s = OpenCC('t2s') # 繁体 → 简体
|
||||
|
||||
返回:
|
||||
(映射或原文本, 语言代码, 是否为“含汉字且命中映射表”的情况)
|
||||
"""
|
||||
JAPANESE_HIRAGANA = r"[\u3040-\u309F]"
|
||||
JAPANESE_KATAKANA = r"[\u30A0-\u30FF\u31F0-\u31FF]"
|
||||
|
||||
text = text.strip()
|
||||
if not text:
|
||||
return "", "other"
|
||||
return "", "other", False
|
||||
|
||||
# ✅ Step 1: 假名检测
|
||||
if re.search(JAPANESE_HIRAGANA, text) or re.search(JAPANESE_KATAKANA, text):
|
||||
return text, "jp"
|
||||
# ✅ Step 1: 全部假名(无汉字)
|
||||
if re.fullmatch(f"(?:{JAPANESE_HIRAGANA}|{JAPANESE_KATAKANA})+", text):
|
||||
return text, "jp", False
|
||||
|
||||
# ✅ Step 2: 汉字检测
|
||||
if re.search(r"[\u4e00-\u9fff]", text):
|
||||
# 简繁互转对比
|
||||
to_trad = cc_s2t.convert(text)
|
||||
to_simp = cc_t2s.convert(text)
|
||||
# 优先判断是否为日语汉字
|
||||
jp_match = await KangjiMapping.get_or_none(kangji=text).only("kangji")
|
||||
if jp_match:
|
||||
return text, "jp", True # 含汉字且命中日语列
|
||||
|
||||
# 如果输入等于繁体转换结果 → 繁体或日文汉字
|
||||
if text == to_trad and text != to_simp:
|
||||
return text, "jp"
|
||||
# 如果输入等于简体转换结果 → 简体中文
|
||||
elif text == to_simp and text != to_trad:
|
||||
return to_trad, "zh" # 注意返回的是繁体形式用于补充搜索
|
||||
# 否则混合(既有简体又有繁体)
|
||||
else:
|
||||
# 混合时可优先认定为繁体(日语)
|
||||
return to_trad, "jp"
|
||||
# 再检查是否为中文汉字
|
||||
zh_match = await KangjiMapping.get_or_none(hanzi=text).only("hanzi", "kangji")
|
||||
if zh_match:
|
||||
return zh_match.kangji, "zh", True # 含汉字且命中中文列
|
||||
|
||||
# ✅ Step 3: 拉丁字母检测
|
||||
# 若都不在映射表中,则为未映射的中文
|
||||
return text, "zh", False
|
||||
|
||||
# ✅ Step 3: 拉丁字母检测(如法语)
|
||||
if re.search(r"[a-zA-ZÀ-ÿ]", text):
|
||||
return text, "fr"
|
||||
return text, "fr", False
|
||||
|
||||
return text, "other"
|
||||
# ✅ Step 4: 其他情况(符号、空格等)
|
||||
return text, "other", False
|
||||
|
||||
|
||||
async def accurate_proverb(proverb_id: int) -> ProverbSearchResponse:
|
||||
"""对于查询法语谚语的精准查询,返回详细信息"""
|
||||
proverb = await ProverbFr.get_or_none(id=proverb_id)
|
||||
if not proverb:
|
||||
raise HTTPException(status_code=404, detail="Proverb not found")
|
||||
proverb.freq = proverb.freq + 1
|
||||
await proverb.save()
|
||||
return ProverbSearchResponse(
|
||||
proverb_text=proverb.text,
|
||||
chi_exp=proverb.chi_exp,
|
||||
)
|
||||
|
||||
async def accurate_idiom(idiom_id: int):
|
||||
proverb = await ProverbFr.get_or_none(id=idiom_id)
|
||||
if not proverb:
|
||||
raise HTTPException(status_code=404, detail="Proverb not found")
|
||||
proverb.freq = proverb.freq + 1
|
||||
await proverb.save()
|
||||
return proverb
|
||||
async def accurate_idiom_proverb(search_id: int, model: Type[Model], only_fields: List[str] = None):
|
||||
if "freq" not in only_fields:
|
||||
only_fields.append("freq")
|
||||
result = await model.get_or_none(id=search_id).only(*only_fields)
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="Target not found")
|
||||
result.freq = result.freq + 1
|
||||
await result.save(update_fields=["freq"])
|
||||
return result
|
||||
|
||||
|
||||
async def suggest_proverb(
|
||||
|
|
@ -90,54 +77,37 @@ async def suggest_proverb(
|
|||
chi_exp_field: str = "chi_exp",
|
||||
limit: int = 10,
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
通用搜索建议函数,用于多语言谚语表。
|
||||
参数:
|
||||
query: 搜索关键词
|
||||
lang: 'fr' 或 'zh'
|
||||
model: Tortoise ORM 模型类,例如 ProverbFr
|
||||
proverb_field: 外语谚语字段名
|
||||
chi_exp_field: 中文释义字段名
|
||||
limit: 每类匹配的最大返回数量
|
||||
|
||||
搜索逻辑:
|
||||
1. 根据语言选择搜索字段;
|
||||
2. 优先匹配以输入开头的结果;
|
||||
3. 其次匹配包含输入但非开头的结果;
|
||||
4. 合并去重后返回。
|
||||
"""
|
||||
keyword = query.strip()
|
||||
if not keyword:
|
||||
return []
|
||||
|
||||
# ✅ 根据语言选择搜索字段
|
||||
# ✅ 搜索条件:中文时双字段联合匹配
|
||||
if lang == "zh":
|
||||
startswith_field = f"{chi_exp_field}__istartswith"
|
||||
contains_field = f"{chi_exp_field}__icontains"
|
||||
start_condition = Q(**{f"{chi_exp_field}__istartswith": keyword}) | Q(
|
||||
**{f"{search_field}__istartswith": keyword})
|
||||
contain_condition = Q(**{f"{chi_exp_field}__icontains": keyword}) | Q(**{f"{search_field}__icontains": keyword})
|
||||
else:
|
||||
startswith_field = f"{search_field}__istartswith"
|
||||
contains_field = f"{search_field}__icontains"
|
||||
start_condition = Q(**{f"{search_field}__istartswith": keyword})
|
||||
contain_condition = Q(**{f"{search_field}__icontains": keyword})
|
||||
|
||||
# ✅ 1. 开头匹配
|
||||
start_matches = await (
|
||||
model.filter(**{startswith_field: keyword})
|
||||
.order_by("-freq")
|
||||
model.filter(start_condition)
|
||||
.order_by("-freq", "id")
|
||||
.limit(limit)
|
||||
.values("id", target_field, search_field, chi_exp_field)
|
||||
.values("id", target_field, chi_exp_field, "search_text")
|
||||
)
|
||||
|
||||
# ✅ 2. 包含匹配(非开头)
|
||||
# ✅ 2. 包含匹配(但不是开头)
|
||||
contain_matches = await (
|
||||
model.filter(
|
||||
Q(**{contains_field: keyword}) & ~Q(**{startswith_field: keyword})
|
||||
)
|
||||
.order_by("-freq")
|
||||
model.filter(contain_condition & ~start_condition)
|
||||
.order_by("-freq", "id")
|
||||
.limit(limit)
|
||||
.values("id", target_field, search_field, chi_exp_field)
|
||||
.values("id", target_field, chi_exp_field, "search_text")
|
||||
)
|
||||
|
||||
# ✅ 3. 合并去重并保持顺序
|
||||
results: List[Dict[str, str]] = []
|
||||
# ✅ 3. 合并去重保持顺序
|
||||
results = []
|
||||
seen_ids = set()
|
||||
for row in start_matches + contain_matches:
|
||||
if row["id"] not in seen_ids:
|
||||
|
|
@ -145,11 +115,12 @@ async def suggest_proverb(
|
|||
results.append({
|
||||
"id": row["id"],
|
||||
"proverb": row[target_field],
|
||||
"search_text": row[search_field],
|
||||
"chi_exp": row[chi_exp_field]
|
||||
"search_text": row["search_text"],
|
||||
"chi_exp": row[chi_exp_field],
|
||||
})
|
||||
|
||||
return results
|
||||
# ✅ 截断最终返回数量
|
||||
return results[:limit]
|
||||
|
||||
|
||||
async def suggest_autocomplete(query: SearchRequest, limit: int = 10):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple, TypeVar
|
||||
from typing import Tuple, TypeVar, Optional
|
||||
|
||||
import pandas as pd
|
||||
from tortoise import fields
|
||||
|
|
@ -74,6 +74,7 @@ class DefinitionJp(Model):
|
|||
class Meta:
|
||||
table = "definitions_jp"
|
||||
|
||||
|
||||
class PosType(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
pos_type = fields.CharEnumField(PosEnumJp, max_length=30, null=False)
|
||||
|
|
@ -81,6 +82,7 @@ class PosType(Model):
|
|||
class Meta:
|
||||
table = "pos_type"
|
||||
|
||||
|
||||
class PronunciationTestJp(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
text = fields.TextField(description="朗读文段")
|
||||
|
|
@ -88,6 +90,7 @@ class PronunciationTestJp(Model):
|
|||
class Meta:
|
||||
table = "pronunciationtest_jp"
|
||||
|
||||
|
||||
class IdiomJp(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
text = fields.TextField(null=False)
|
||||
|
|
@ -100,6 +103,7 @@ class IdiomJp(Model):
|
|||
class Meta:
|
||||
table = "idiom_jp"
|
||||
|
||||
|
||||
class KangjiMapping(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
hanzi = fields.TextField(null=False)
|
||||
|
|
@ -107,5 +111,12 @@ class KangjiMapping(Model):
|
|||
note = fields.TextField(null=False)
|
||||
created_at = fields.DatetimeField(auto_now_add=True)
|
||||
|
||||
@classmethod
|
||||
async def chi2kangji(text_chi: str) -> Optional[str]:
|
||||
mapping = await KangjiMapping.get_or_none(hanzi=text_chi)
|
||||
if not mapping:
|
||||
return None
|
||||
return mapping.kangji
|
||||
|
||||
class Meta:
|
||||
table = "kangji_mapping_zh_jp"
|
||||
|
|
|
|||
4
main.py
4
main.py
|
|
@ -18,7 +18,7 @@ from app.api.user.routes import users_router
|
|||
from app.api.word_comment.routes import word_comment_router
|
||||
from app.core.redis import init_redis, close_redis
|
||||
from app.utils.phone_encrypt import PhoneEncrypt
|
||||
from settings import TORTOISE_ORM
|
||||
from settings import ONLINE_SETTINGS
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
|
|
@ -46,7 +46,7 @@ app.add_middleware(
|
|||
|
||||
register_tortoise(
|
||||
app=app,
|
||||
config=TORTOISE_ORM,
|
||||
config=ONLINE_SETTINGS,
|
||||
)
|
||||
|
||||
app.include_router(users_router, tags=["User API"], prefix="/users")
|
||||
|
|
|
|||
Loading…
Reference in New Issue