Compare commits
No commits in common. "fe6c84e310f3c621a5db58761a3b779b29fedcac" and "11ff8926537434e5fd5562ed93d2059f7f9ba6f0" have entirely different histories.
fe6c84e310
...
11ff892653
27
README.md
27
README.md
|
|
@ -334,19 +334,16 @@ Authorization: Bearer <your_jwt_token>
|
|||
#### 2.2 法语谚语详情
|
||||
|
||||
- **接口**: `POST /search/proverb`
|
||||
- **描述**: 根据谚语ID返回法语谚语全文与中文释义。
|
||||
- **描述**: 通过谚语ID获取法语谚语原文与中文解释。
|
||||
- **需要认证**: 是
|
||||
- **请求类型**: `application/x-www-form-urlencoded`
|
||||
- **表单字段**:
|
||||
- `proverb_id`: 谚语ID (integer,必填)
|
||||
- **查询参数**:
|
||||
- `proverb_id`: 谚语ID (integer)
|
||||
- **响应**:
|
||||
|
||||
```json
|
||||
{
|
||||
"result": {
|
||||
"proverb_text": "Petit à petit, l'oiseau fait son nid.",
|
||||
"chi_exp": "循序渐进才能取得成功。"
|
||||
}
|
||||
"proverb_text": "Petit à petit, l'oiseau fait son nid.",
|
||||
"chi_exp": "循序渐进才能取得成功。"
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -383,13 +380,14 @@ Authorization: Bearer <your_jwt_token>
|
|||
#### 2.4 谚语联想建议
|
||||
|
||||
- **接口**: `POST /search/proverb/list`
|
||||
- **描述**: 按输入内容返回谚语候选列表,后端会自动检测输入语言(中文/日文假名/拉丁字母),无法识别时退回法语字段搜索。
|
||||
- **描述**: 按输入内容(自动识别法语或中文)返回谚语候选列表。
|
||||
- **需要认证**: 是
|
||||
- **请求体**:
|
||||
|
||||
```json
|
||||
{
|
||||
"query": "慢"
|
||||
"query": "慢",
|
||||
"language": "fr"
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -407,9 +405,6 @@ Authorization: Bearer <your_jwt_token>
|
|||
}
|
||||
```
|
||||
|
||||
- **状态码**:
|
||||
- `200`: 查询成功
|
||||
|
||||
---
|
||||
|
||||
### 3. 翻译模块 (`/translate`)
|
||||
|
|
@ -442,7 +437,6 @@ Authorization: Bearer <your_jwt_token>
|
|||
}
|
||||
```
|
||||
|
||||
- **限制**: 依赖 Redis 计数器做限流,同一用户每秒最多 2 次请求(超出返回 `429`)
|
||||
- **状态码**:
|
||||
- `200`: 翻译成功
|
||||
- `401`: 未授权
|
||||
|
|
@ -458,7 +452,7 @@ Authorization: Bearer <your_jwt_token>
|
|||
- `from_lang`: 源语言,默认为 `auto`
|
||||
- `to_lang`: 目标语言,默认为 `zh`
|
||||
|
||||
- **限制**: 与标准翻译接口共享限流计数,同一用户每秒最多2次请求
|
||||
- **限制**: 每秒最多2次请求
|
||||
- **状态码**:
|
||||
- `200`: 翻译成功
|
||||
- `429`: 请求频率过高
|
||||
|
|
@ -776,8 +770,7 @@ Authorization: Bearer <your_jwt_token>
|
|||
- **需要认证**: 是
|
||||
- **查询参数**:
|
||||
- `count`: 抽题数量 (integer,默认 `20`)
|
||||
- **表单字段**:
|
||||
- `lang`: 语种代码(`fr-FR` 或 `ja-JP`,默认 `fr-FR`)。由于实现方式,FastAPI 将其视为 form-data 字段,GET 请求需通过 form 提交或在调试文档中直接填写。
|
||||
- `lang`: 语种代码,支持 `fr-FR`(法语)、`ja-JP`(日语),默认 `fr-FR`
|
||||
- **响应**:
|
||||
|
||||
```json
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from typing import Literal, List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Form
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
|
||||
from app.api.search_dict import service
|
||||
from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \
|
||||
|
|
@ -8,7 +8,7 @@ from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse
|
|||
from app.api.search_dict.service import suggest_autocomplete
|
||||
from app.api.word_comment.word_comment_schemas import CommentSet
|
||||
from app.models import DefinitionJp, CommentFr, CommentJp
|
||||
from app.models.fr import DefinitionFr, ProverbFr
|
||||
from app.models.fr import DefinitionFr
|
||||
from app.utils.all_kana import all_in_kana
|
||||
from app.utils.security import get_current_user
|
||||
from app.utils.textnorm import normalize_text
|
||||
|
|
@ -165,19 +165,7 @@ async def search_word_list(query_word: SearchRequest, user=Depends(get_current_u
|
|||
|
||||
|
||||
@dict_search.post("/search/proverb/list")
|
||||
async def search_proverb_list(query_word: ProverbSearchRequest):
|
||||
lang = service.detect_language(text=query_word.query)
|
||||
query = normalize_text(query_word.query) if lang == "fr" else query_word.query
|
||||
suggest_proverbs = await service.suggest_proverb(
|
||||
query=query_word.query,
|
||||
lang=lang,
|
||||
model=ProverbFr,
|
||||
search_field="search_text",
|
||||
)
|
||||
# TODO 使用法语词典时是否存在用英语输入的情况
|
||||
async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)):
|
||||
lang: Literal['fr', 'zh'] = 'zh' if service.contains_chinese(query_word.query) else 'fr'
|
||||
suggest_proverbs = await service.suggest_proverb(query=query_word, lang=lang)
|
||||
return {"list": suggest_proverbs}
|
||||
|
||||
@dict_search.post("/search/proverb")
|
||||
async def search_proverb(proverb_id:int = Form(...), user=Depends(get_current_user)):
|
||||
result = await service.accurate_proverb(proverb_id=proverb_id)
|
||||
return {"result": result}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ class SearchRequest(BaseModel):
|
|||
|
||||
class ProverbSearchRequest(BaseModel):
|
||||
query: str
|
||||
dict_language: Literal['fr', 'jp'] = "fr"
|
||||
language: Literal['fr', 'jp'] = "fr"
|
||||
|
||||
|
||||
class SearchItemJp(BaseModel):
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
import asyncio
|
||||
import re
|
||||
from typing import List, Tuple, Dict, Literal, Type
|
||||
from typing import List, Tuple, Dict, Literal
|
||||
|
||||
from fastapi import HTTPException
|
||||
from tortoise import Tortoise, Model
|
||||
from tortoise import Tortoise
|
||||
from tortoise.expressions import Q
|
||||
|
||||
from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchResponse, ProverbSearchRequest
|
||||
|
|
@ -13,97 +14,75 @@ from app.utils.textnorm import normalize_text
|
|||
from settings import TORTOISE_ORM
|
||||
|
||||
|
||||
def detect_language(text: str) -> Literal["fr", "zh", "jp", "other"]:
|
||||
"""
|
||||
自动检测输入语言:
|
||||
返回 'zh' / 'jp' / 'fr' / 'other'
|
||||
"""
|
||||
if re.search(r"[\u4e00-\u9fff]", text):
|
||||
return "zh"
|
||||
elif re.search(r"[\u3040-\u30ff\u31f0-\u31ff]", text): # 日文假名范围
|
||||
return "jp"
|
||||
elif re.search(r"[a-zA-ZÀ-ÿ]", text):
|
||||
return "fr"
|
||||
return "other"
|
||||
def contains_chinese(text: str) -> bool:
|
||||
"""判断字符串中是否包含至少一个中文字符"""
|
||||
return bool(re.search(r'[\u4e00-\u9fff]', text))
|
||||
|
||||
|
||||
async def accurate_proverb(proverb_id: int) -> ProverbSearchResponse:
|
||||
"""对于查询法语谚语的精准查询,返回详细信息"""
|
||||
proverb = await ProverbFr.get_or_none(id=proverb_id)
|
||||
if not proverb:
|
||||
raise HTTPException(status_code=404, detail="Proverb not found")
|
||||
return ProverbSearchResponse(
|
||||
proverb_text=proverb.text,
|
||||
proverb_text=proverb.proverb,
|
||||
chi_exp=proverb.chi_exp,
|
||||
)
|
||||
|
||||
|
||||
async def suggest_proverb(
|
||||
query: str,
|
||||
lang: Literal["fr", "zh", "jp"],
|
||||
model: Type[Model],
|
||||
search_field: str = "search_text",
|
||||
target_field: str = "text",
|
||||
chi_exp_field: str = "chi_exp",
|
||||
limit: int = 10,
|
||||
) -> List[Dict[str, str]]:
|
||||
async def suggest_proverb(query: ProverbSearchRequest, lang: Literal['fr', 'zh']) -> List[Dict[str, str]]:
|
||||
"""
|
||||
通用搜索建议函数,用于多语言谚语表。
|
||||
对法语谚语表进行搜索建议。
|
||||
参数:
|
||||
query: 搜索关键词
|
||||
query.query: 搜索关键词
|
||||
lang: 'fr' 或 'zh'
|
||||
model: Tortoise ORM 模型类,例如 ProverbFr
|
||||
proverb_field: 外语谚语字段名
|
||||
chi_exp_field: 中文释义字段名
|
||||
limit: 每类匹配的最大返回数量
|
||||
|
||||
搜索逻辑:
|
||||
1. 根据语言选择搜索字段;
|
||||
2. 优先匹配以输入开头的结果;
|
||||
3. 其次匹配包含输入但非开头的结果;
|
||||
4. 合并去重后返回。
|
||||
逻辑:
|
||||
1. 若 lang='fr',按谚语字段 (proverb) 搜索;
|
||||
2. 若 lang='zh',按中文释义字段 (chi_exp) 搜索;
|
||||
3. 优先以输入开头的匹配;
|
||||
4. 其次为包含输入但不以其开头的匹配(按 freq 排序)。
|
||||
:return: [{'id': 1, 'proverb': 'xxx'}, ...]
|
||||
"""
|
||||
keyword = query.strip()
|
||||
if not keyword:
|
||||
return []
|
||||
keyword = query.query.strip()
|
||||
results: List[Dict[str, str]] = []
|
||||
|
||||
# ✅ 根据语言选择搜索字段
|
||||
if not keyword:
|
||||
return results
|
||||
|
||||
# ✅ 根据语言决定搜索字段
|
||||
if lang == "zh":
|
||||
startswith_field = f"{chi_exp_field}__istartswith"
|
||||
contains_field = f"{chi_exp_field}__icontains"
|
||||
else:
|
||||
startswith_field = f"{search_field}__istartswith"
|
||||
contains_field = f"{search_field}__icontains"
|
||||
startswith_field = "chi_exp__istartswith"
|
||||
contains_field = "chi_exp__icontains"
|
||||
else: # 默认法语
|
||||
startswith_field = "proverb__istartswith"
|
||||
contains_field = "proverb__icontains"
|
||||
|
||||
# ✅ 1. 开头匹配
|
||||
start_matches = await (
|
||||
model.filter(**{startswith_field: keyword})
|
||||
ProverbFr.filter(**{startswith_field: keyword})
|
||||
.order_by("-freq")
|
||||
.limit(limit)
|
||||
.values("id", target_field, search_field, chi_exp_field)
|
||||
.limit(10)
|
||||
.values("id", "proverb", "chi_exp")
|
||||
)
|
||||
|
||||
# ✅ 2. 包含匹配(非开头)
|
||||
# ✅ 2. 包含匹配(但不是开头)
|
||||
contain_matches = await (
|
||||
model.filter(
|
||||
ProverbFr.filter(
|
||||
Q(**{contains_field: keyword}) & ~Q(**{startswith_field: keyword})
|
||||
)
|
||||
.order_by("-freq")
|
||||
.limit(limit)
|
||||
.values("id", target_field, search_field, chi_exp_field)
|
||||
.limit(10)
|
||||
.values("id", "proverb", "chi_exp")
|
||||
)
|
||||
|
||||
# ✅ 3. 合并去重并保持顺序
|
||||
results: List[Dict[str, str]] = []
|
||||
# ✅ 合并结果(去重并保持顺序)
|
||||
seen_ids = set()
|
||||
for row in start_matches + contain_matches:
|
||||
if row["id"] not in seen_ids:
|
||||
seen_ids.add(row["id"])
|
||||
results.append({
|
||||
"id": row["id"],
|
||||
"proverb": row[target_field],
|
||||
"search_text": row[search_field],
|
||||
"chi_exp": row[chi_exp_field]
|
||||
"proverb": row["proverb"],
|
||||
"chi_exp": row["chi_exp"]
|
||||
})
|
||||
|
||||
return results
|
||||
|
|
@ -226,5 +205,4 @@ async def __main():
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# asyncio.run(__main())
|
||||
print(detect_language(text="ahsjdasd"))
|
||||
asyncio.run(__main())
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ from fastapi import APIRouter, Depends, HTTPException
|
|||
|
||||
from app.models import User
|
||||
from app.schemas.trans_schemas import TransResponse, TransRequest
|
||||
from app.utils.md5 import make_md5
|
||||
from app.utils.security import is_admin_user, get_current_user
|
||||
from scripts.md5 import make_md5
|
||||
from settings import settings
|
||||
|
||||
translator_router = APIRouter()
|
||||
|
|
|
|||
|
|
@ -45,11 +45,11 @@ class DefinitionFr(Model):
|
|||
|
||||
class ProverbFr(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
text = fields.TextField(description="法语谚语及常用表达")
|
||||
proverb = fields.TextField(description="法语谚语及常用表达")
|
||||
chi_exp = fields.TextField(description="中文释义")
|
||||
freq = fields.IntField(default=0)
|
||||
search_text = fields.TextField()
|
||||
created_at = fields.DatetimeField(auto_now_add=True)
|
||||
updated_at = fields.DatetimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
table = "proverb_fr"
|
||||
|
|
|
|||
|
|
@ -87,14 +87,3 @@ class PronunciationTestJp(Model):
|
|||
|
||||
class Meta:
|
||||
table = "pronunciationtest_jp"
|
||||
|
||||
class IdiomJp(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
text = fields.TextField(null=False)
|
||||
chi_exp = fields.TextField(null=False)
|
||||
example = fields.TextField(null=False)
|
||||
search_text = fields.TextField(null=False)
|
||||
created_at = fields.DatetimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
table = "idiom_jp"
|
||||
|
|
|
|||
4
main.py
4
main.py
|
|
@ -18,7 +18,7 @@ from app.api.user.routes import users_router
|
|||
from app.api.word_comment.routes import word_comment_router
|
||||
from app.core.redis import init_redis, close_redis
|
||||
from app.utils.phone_encrypt import PhoneEncrypt
|
||||
from settings import TORTOISE_ORM
|
||||
from settings import ONLINE_SETTINGS
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
|
|
@ -46,7 +46,7 @@ app.add_middleware(
|
|||
|
||||
register_tortoise(
|
||||
app=app,
|
||||
config=TORTOISE_ORM,
|
||||
config=ONLINE_SETTINGS,
|
||||
)
|
||||
|
||||
app.include_router(users_router, tags=["User API"], prefix="/users")
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import asyncio
|
|||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
from tortoise import Tortoise
|
||||
from tortoise import Tortoise, connections
|
||||
from tortoise.exceptions import MultipleObjectsReturned
|
||||
|
||||
from app.models.fr import DefinitionFr, WordlistFr
|
||||
|
|
@ -101,13 +101,14 @@ async def varification_eg():
|
|||
|
||||
async def main():
|
||||
await Tortoise.init(config=TORTOISE_ORM)
|
||||
# await DefinitionFr.all().delete() # TRUNCATE TABLE definitions_fr;
|
||||
# conn = connections.get("default")
|
||||
# await conn.execute_script("""
|
||||
# ALTER TABLE definitions_fr AUTO_INCREMENT = 1;
|
||||
# """)
|
||||
# await import_def_fr()
|
||||
# # await import_wordlist_fr()
|
||||
await DefinitionFr.all().delete() # TRUNCATE TABLE definitions_fr;
|
||||
conn = connections.get("default")
|
||||
await conn.execute_script("""
|
||||
ALTER TABLE definitions_fr AUTO_INCREMENT = 1;
|
||||
""")
|
||||
await import_def_fr()
|
||||
# await import_wordlist_fr()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
|
|
|||
|
|
@ -1,21 +1,21 @@
|
|||
import asyncio
|
||||
import re
|
||||
import unicodedata
|
||||
from importlib import resources
|
||||
import jaconv
|
||||
from pathlib import Path
|
||||
|
||||
import jaconv
|
||||
import pandas as pd
|
||||
from fugashi import Tagger
|
||||
import unidic_lite
|
||||
from importlib import resources
|
||||
from pykakasi import kakasi
|
||||
from tortoise import Tortoise
|
||||
from tortoise.exceptions import MultipleObjectsReturned
|
||||
|
||||
from app.models import WordlistJp, DefinitionJp, AttachmentJp, PosType
|
||||
from app.models.jp import IdiomJp
|
||||
from settings import TORTOISE_ORM
|
||||
|
||||
xlsx_name = "./DictTable_20251029.xlsx"
|
||||
xlsx_name = "./DictTable-20250823.xlsx"
|
||||
xlsx_path = Path(xlsx_name)
|
||||
|
||||
|
||||
|
|
@ -228,24 +228,6 @@ async def set_hiragana(xlsx_path: Path = xlsx_path, sheet_name : str="日汉释
|
|||
|
||||
await WordlistJp.filter(text=word).update(hiragana=hiragana)
|
||||
|
||||
async def import_idiom():
|
||||
path = xlsx_path
|
||||
df = pd.read_excel(path, sheet_name="日语惯用语")
|
||||
df.columns = [col.strip() for col in df.columns]
|
||||
|
||||
for row in df.itertuples():
|
||||
sentence = str(row[1]).strip()
|
||||
search_text = str(row[2]).strip()
|
||||
chi_exp = str(row[3]).strip()
|
||||
example = str(row[4]).strip()
|
||||
|
||||
await IdiomJp.create(
|
||||
text=sentence,
|
||||
chi_exp=chi_exp,
|
||||
example=example,
|
||||
search_text=search_text,
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
await Tortoise.init(config=TORTOISE_ORM)
|
||||
|
|
@ -255,8 +237,8 @@ async def main():
|
|||
# await import_wordlist_jp()
|
||||
# await import_def_jp()
|
||||
# await import_attachment()
|
||||
# await set_hiragana()
|
||||
await import_idiom()
|
||||
await set_hiragana()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
|
|
|
|||
Loading…
Reference in New Issue