Compare commits

..

No commits in common. "9f9f46264001ae121d1a4fa753b3e1c04512c453" and "1c8036cb8de6731f0b0385a6a08d94b20215ca34" have entirely different histories.

4 changed files with 120 additions and 367 deletions

View File

@ -362,73 +362,28 @@ Authorization: Bearer <your_jwt_token>
#### 2.3 单词联想建议 #### 2.3 单词联想建议
- **接口**: `POST /api/search/list/word` - **接口**: `POST /api/search/list/word`
- **描述**: 返回智能联想候选列表。后端会根据 `language`(当前词典)与用户输入自动切换检索策略,综合“前缀匹配”和“释义反查”两种来源,并对结果去重合并释义 - **描述**: 根据用户输入返回单词联想列表,含前缀匹配与包含匹配
- **需要认证**: 是 - **需要认证**: 是
- **请求体**: - **请求体**:
```json ```json
{ {
"query": "bon", "query": "bon",
"language": "fr" "language": "fr",
"sort": "relevance",
"order": "des"
} }
``` ```
- **检索规则**: - **响应示例**:
- `language = "fr"`:
- 法语/拉丁字符输入:优先使用 `WordlistFr` 做前缀 + 包含匹配。
- 中文输入:回退到法语释义的中文字段做反查。
- 英文输入:会优先使用英文释义字段做反查,方便“英文 → 法语”场景。
- `language = "jp"`:
- 假名或日文汉字:直接在 `WordlistJp` 做前缀 + 包含匹配,同时返回假名字段。
- 中文输入:优先用中文释义反查;若该中文词条存在汉字映射,则并行检索对应的日语原词并放在结果前列。
- **响应字段**:
- `word`: 词条原文(法语或日语)
- `hiragana`: 仅日语结果携带;法语为 `null`
- `meanings`: 中文释义去重数组(当结果来自释义反查时才会出现)
- `english`: 英文释义去重数组(仅法语词典且按英文释义反查时出现)
- **响应示例(法语)**:
```json ```json
{ {
"list": [ "list": ["bonjour", "bonsoir", "bonheur"]
{
"word": "bonjour",
"hiragana": null,
"meanings": ["你好", "问候语"],
"english": ["hello"]
},
{
"word": "bonsoir",
"hiragana": null,
"meanings": [],
"english": []
}
]
} }
``` ```
- **响应示例(日语,中文反查)**: > **说明**: `language = "jp"` 时返回形如 `[["愛", "あい"], ["愛する", "あいする"]]` 的二维数组,第二列为假名读音。
```json
{
"list": [
{
"word": "愛",
"hiragana": "あい",
"meanings": ["爱;爱意"],
"english": []
},
{
"word": "愛する",
"hiragana": "あいする",
"meanings": ["热爱;深爱"],
"english": []
}
]
}
```
- **状态码**: - **状态码**:
- `200`: 查询成功 - `200`: 查询成功

View File

@ -31,13 +31,7 @@ async def article_director(
redis = request.app.state.redis redis = request.app.state.redis
# print(upload_article) # print(upload_article)
match lang: article_lang = "法语" if lang == "fr-FR" else "日语"
case "en-US":
article_lang = "英语"
case "fr-FR":
article_lang = "法语"
case _:
article_lang = "日语"
user_id = user[0].id user_id = user[0].id
article = upload_article.content article = upload_article.content

View File

@ -6,10 +6,11 @@ from fastapi import APIRouter, Depends, HTTPException, Request, Form
from app.api.search_dict import service from app.api.search_dict import service
from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \ from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \
ProverbSearchRequest ProverbSearchRequest
from app.api.search_dict.service import suggest_autocomplete
from app.api.word_comment.word_comment_schemas import CommentSet from app.api.word_comment.word_comment_schemas import CommentSet
from app.models import DefinitionJp, CommentFr, CommentJp, WordlistFr from app.models import DefinitionJp, CommentFr, CommentJp
from app.models.fr import DefinitionFr, ProverbFr from app.models.fr import DefinitionFr, ProverbFr
from app.models.jp import IdiomJp, WordlistJp from app.models.jp import IdiomJp
from app.utils.all_kana import all_in_kana from app.utils.all_kana import all_in_kana
from app.utils.security import get_current_user from app.utils.security import get_current_user
from app.utils.textnorm import normalize_text from app.utils.textnorm import normalize_text
@ -157,66 +158,13 @@ async def search_word_list(query_word: SearchRequest, user=Depends(get_current_u
:return: 待选列表 :return: 待选列表
""" """
# print(query_word.query, query_word.language, query_word.sort, query_word.order) # print(query_word.query, query_word.language, query_word.sort, query_word.order)
query = query_word.query word_contents = await suggest_autocomplete(query=query_word)
lang = query_word.language return {"list": word_contents}
query, search_lang, transable = await service.detect_language(text=query)
word_contents = []
if lang == "fr":
if search_lang == "fr":
word_contents = await service.suggest_autocomplete(
query=query,
dict_lang="fr",
model=WordlistFr,
)
if not transable:
word_contents.extend(
await service.search_definition_by_meaning(
query=query,
model=DefinitionFr,
lang="en",
)
)
else:
word_contents = await service.search_definition_by_meaning(
query=query_word.query,
model=DefinitionFr,
lang="zh",
)
else:
if search_lang == "jp":
word_contents = await service.suggest_autocomplete(
query=query,
dict_lang="jp",
model=WordlistJp,
)
elif search_lang == "zh":
word_contents = []
word_contents.extend(
await service.search_definition_by_meaning(
query=query_word.query,
model=DefinitionJp,
lang="zh",
)
)
if transable:
word_contents = await service.suggest_autocomplete(
query=query,
dict_lang="jp",
model=WordlistJp,
)
else:
word_contents = await service.suggest_autocomplete(
query=query,
dict_lang="jp",
model=WordlistJp,
)
suggest_list = service.merge_word_results(word_contents)
return {"list": suggest_list}
@dict_search.post("/search/list/proverb") @dict_search.post("/search/list/proverb")
async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)): async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)):
query, lang, transable = await service.detect_language(text=query_word.query) query, lang, _ = service.detect_language(text=query_word.query)
query = normalize_text(query_word.query) if lang == "fr" else query_word.query query = normalize_text(query_word.query) if lang == "fr" else query_word.query
suggest_proverbs = await service.suggest_proverb( suggest_proverbs = await service.suggest_proverb(
query=query_word.query, query=query_word.query,
@ -229,8 +177,7 @@ async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get
@dict_search.post("/search/proverb") @dict_search.post("/search/proverb")
async def search_proverb(proverb_id: int = Form(...), user=Depends(get_current_user)): async def search_proverb(proverb_id: int = Form(...), user=Depends(get_current_user)):
result = await service.accurate_idiom_proverb(search_id=proverb_id, model=ProverbFr, result = await service.accurate_idiom_proverb(search_id=proverb_id, model=ProverbFr, only_fields=["text", "chi_exp"])
only_fields=["text", "chi_exp"])
return {"result": result} return {"result": result}
@ -278,6 +225,5 @@ async def search_idiom_list(query_idiom: ProverbSearchRequest, user=Depends(get_
@dict_search.post("/search/idiom") @dict_search.post("/search/idiom")
async def search_idiom(query_id: int, user=Depends(get_current_user)): async def search_idiom(query_id: int, user=Depends(get_current_user)):
result = await service.accurate_idiom_proverb(search_id=query_id, model=IdiomJp, result = await service.accurate_idiom_proverb(search_id=query_id, model=IdiomJp, only_fields=["id", "text", "search_text", "chi_exp", "example"])
only_fields=["id", "text", "search_text", "chi_exp", "example"])
return {"result": result} return {"result": result}

View File

@ -1,13 +1,15 @@
import re import re
from typing import List, Tuple, Dict, Literal, Type, Any from typing import List, Tuple, Dict, Literal, Type
from fastapi import HTTPException from fastapi import HTTPException
from redis.asyncio import Redis from redis.asyncio import Redis
from tortoise import Tortoise, Model from tortoise import Tortoise, Model
from tortoise.expressions import Q from tortoise.expressions import Q
from app.models import KangjiMapping from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchRequest
from app.models import WordlistFr, WordlistJp, KangjiMapping
from app.utils.all_kana import all_in_kana from app.utils.all_kana import all_in_kana
from app.utils.textnorm import normalize_text
from settings import TORTOISE_ORM from settings import TORTOISE_ORM
@ -55,12 +57,8 @@ async def detect_language(text: str) -> Tuple[str, str, bool]:
return text, "zh", False return text, "zh", False
# ✅ Step 3: 拉丁字母检测(如法语) # ✅ Step 3: 拉丁字母检测(如法语)
if re.search(r"[À-ÿ]", text): if re.search(r"[a-zA-ZÀ-ÿ]", text):
return text, "fr", True # True → 含拉丁扩展(非英语) return text, "fr", False
# 全部为纯英文字符
elif re.fullmatch(r"[a-zA-Z]+", text):
return text, "fr", False # False → 英语单词
# ✅ Step 4: 其他情况(符号、空格等) # ✅ Step 4: 其他情况(符号、空格等)
return text, "other", False return text, "other", False
@ -132,255 +130,115 @@ async def suggest_proverb(
return results[:limit] return results[:limit]
async def suggest_autocomplete( async def suggest_autocomplete(query: SearchRequest, limit: int = 10):
query: str,
dict_lang: Literal["fr", "jp"],
model: Type[Model],
search_field: str = "search_text",
text_field: str = "text",
hira_field: str = "hiragana",
freq_field: str = "freq",
limit: int = 10,
) -> List[Dict[str, str]]:
""" """
通用自动补全建议接口
- 法语: search_text / text 搜索 + 反查 DefinitionFr /中释义 :param query: 当前用户输入的内容
- 日语: 先按原文 text 匹配再按假名匹配 + 反查 DefinitionJp 中文释义 :param limit: 返回列表限制长度
统一返回结构 :return: 联想的单词列表非完整信息单纯单词
[
{
"word": "étudier",
"hiragana": None,
"meanings": ["学习", "研究"],
"english": ["to study", "to learn"]
}
]
""" """
keyword = query.strip() if query.language == 'fr':
if not keyword: query_word = normalize_text(query.query)
return [] exact = await (
WordlistFr
# ========== 法语分支 ========== .get_or_none(search_text=query.query)
if dict_lang == "fr": .values("text", "freq")
start_condition = (
Q(**{f"{search_field}__istartswith": keyword})
| Q(**{f"{text_field}__istartswith": keyword})
) )
contain_condition = ( if exact:
Q(**{f"{search_field}__icontains": keyword}) exact_word = [(exact.get("text"), exact.get("freq"))]
| Q(**{f"{text_field}__icontains": keyword}) else:
exact_word = []
qs_prefix = (
WordlistFr
.filter(Q(search_text__startswith=query_word) | Q(text__startswith=query.query))
.exclude(search_text=query.query)
.only("text", "freq")
) )
value_fields = ["id", text_field, freq_field, search_field] prefix_objs = await qs_prefix[:limit]
prefix: List[Tuple[str, int]] = [(o.text, o.freq) for o in prefix_objs]
# ========== 日语分支 ========== need = max(0, limit - len(prefix))
elif dict_lang == "jp": contains: List[Tuple[str, int]] = []
kana_word = all_in_kana(keyword)
start_condition = Q(**{f"{text_field}__startswith": keyword})
contain_condition = Q(**{f"{text_field}__icontains": keyword})
kana_start = Q(**{f"{hira_field}__startswith": kana_word}) if need > 0:
kana_contain = Q(**{f"{hira_field}__icontains": kana_word}) qs_contain = (
WordlistFr
.filter(Q(search_text__icontains=query_word) | Q(text__icontains=query.query))
.exclude(Q(search_text__startswith=query_word) | Q(text__startswith=query.query) | Q(text=query.query))
.only("text", "freq")
.only("text", "freq")
)
contains_objs = await qs_contain[: need * 2]
contains = [(o.text, o.freq) for o in contains_objs]
start_condition |= kana_start seen_text, out = set(), []
contain_condition |= kana_contain for text, freq in list(exact_word) + list(prefix) + list(contains):
value_fields = ["id", text_field, hira_field, freq_field] key = text
if key not in seen_text:
seen_text.add(key)
out.append((text, freq))
if len(out) >= limit:
break
out = sorted(out, key=lambda w: (-w[2], len(w[0]), w[0]))
return [text for text, _ in out]
else: else:
return [] query_word = all_in_kana(query.query)
exact = await (
# ✅ 获取匹配单词 WordlistJp
start_matches = await ( .get_or_none(
model.filter(start_condition) text=query.query
.order_by(f"-{freq_field}", "id")
.limit(limit)
.values(*value_fields)
) )
.only("text", "hiragana", "freq")
contain_matches = await (
model.filter(contain_condition & ~start_condition)
.order_by(f"-{freq_field}", "id")
.limit(limit)
.values(*value_fields)
) )
if exact:
results = [] exact_word = [(exact.text, exact.hiragana, exact.freq)]
seen_ids = set()
for row in start_matches + contain_matches:
if row["id"] not in seen_ids:
seen_ids.add(row["id"])
results.append({
"id": row["id"],
"word": row[text_field],
"hiragana": row.get(hira_field) if dict_lang == "jp" else None,
"meanings": [],
"english": [],
})
# ✅ 批量反查 Definition 表,防止 N+1 查询
if dict_lang == "fr":
from app.models import DefinitionFr # 避免循环导入
word_ids = [r["id"] for r in results]
defs = await DefinitionFr.filter(word_id__in=word_ids).values("word_id", "meaning", "eng_explanation")
meaning_map: Dict[int, Dict[str, List[str]]] = {}
for d in defs:
meaning_map.setdefault(d["word_id"], {"meanings": [], "english": []})
if d["meaning"]:
meaning_map[d["word_id"]]["meanings"].append(d["meaning"].strip())
if d["eng_explanation"]:
meaning_map[d["word_id"]]["english"].append(d["eng_explanation"].strip())
for r in results:
if r["id"] in meaning_map:
r["meanings"] = list(set(meaning_map[r["id"]]["meanings"]))
r["english"] = list(set(meaning_map[r["id"]]["english"]))
elif dict_lang == "jp":
from app.models import DefinitionJp
word_ids = [r["id"] for r in results]
defs = await DefinitionJp.filter(word_id__in=word_ids).values("word_id", "meaning")
meaning_map: Dict[int, List[str]] = {}
for d in defs:
if d["meaning"]:
meaning_map.setdefault(d["word_id"], []).append(d["meaning"].strip())
for r in results:
if r["id"] in meaning_map:
r["meanings"] = list(set(meaning_map[r["id"]]))
# ✅ 删除 id只保留用户需要字段
for r in results:
r.pop("id", None)
return results[:limit]
# ===================================================
# ✅ 释义反查接口(返回统一结构)
# ===================================================
async def search_definition_by_meaning(
query: str,
model: Type[Model],
meaning_field: str = "meaning",
eng_field: str = "eng_explanation",
hira_field: str = "hiragana",
limit: int = 20,
lang: Literal["zh", "en"] = "zh",
) -> List[Dict[str, str]]:
"""
双语释义反查接口中文/英文
统一返回结构
[
{
"word": "étudier",
"hiragana": None,
"meanings": ["学习", "研究"],
"english": ["to study"]
}
]
"""
keyword = query.strip()
if not keyword:
return []
if lang == "zh":
search_field = meaning_field
elif lang == "en":
search_field = eng_field
else: else:
raise ValueError("lang 参数必须为 'zh''en'") exact_word = []
contain_condition = Q(**{f"{search_field}__icontains": keyword}) qs_prefix = (
WordlistJp
matches = ( .filter(Q(hiragana__startswith=query_word) | Q(text__startswith=query.query))
await model.filter(contain_condition) .exclude(text=query.query)
.prefetch_related("word") .only("text", "hiragana", "freq")
.order_by("id")
) )
prefix_objs = await qs_prefix[:limit]
prefix: List[Tuple[str, str, int]] = [(o.text, o.hiragana, o.freq) for o in prefix_objs]
word_to_data: Dict[str, Dict[str, List[str] | str | None]] = {} need = max(0, limit - len(prefix))
contains: List[Tuple[str, str, int]] = []
for entry in matches: if need > 0:
word_obj = await entry.word qs_contain = await (
word_text = getattr(word_obj, "text", None) WordlistJp
if not word_text: .filter(Q(hiragana__icontains=query_word) | Q(text__icontains=query.query))
continue .exclude(Q(hiragana__startswith=query_word) | Q(text__startswith=query.query) | Q(text=query.query))
.only("text", "hiragana", "freq")
)
contains_objs = qs_contain[:need * 2]
contains: List[Tuple[str, str, int]] = [(o.text, o.hiragana, o.freq) for o in contains_objs]
chi_mean = getattr(entry, meaning_field, "").strip() or None seen_text, out = set(), []
eng_mean = getattr(entry, eng_field, "").strip() or None for text, hiragana, freq in list(exact_word) + list(prefix) + list(contains):
hira_text = getattr(word_obj, hira_field, None) if hasattr(word_obj, hira_field) else None key = (text, hiragana)
if key not in seen_text:
if word_text not in word_to_data: seen_text.add(key)
word_to_data[word_text] = {"hiragana": hira_text, "meanings": [], "english": []} out.append((text, hiragana, freq))
if len(out) >= limit:
if chi_mean: break
word_to_data[word_text]["meanings"].append(chi_mean) out = sorted(out, key=lambda w: (-w[2], len(w[0]), w[0]))
if eng_mean: return [(text, hiragana) for text, hiragana, _ in out]
word_to_data[word_text]["english"].append(eng_mean)
results = []
for word, data in word_to_data.items():
results.append({
"word": word,
"hiragana": data["hiragana"],
"meanings": list(set(data["meanings"])),
"english": list(set(data["english"]))
})
return results[:limit]
def merge_word_results(*lists: List[Dict[str, Any]]) -> List[Dict[str, object]]: async def __test():
""" query_word: str = '棋逢'
合并多个结果列表并去重 return await (
- 依据 word+ hiragana唯一性去重 suggest_proverb(
- meanings / english 合并去重 query=ProverbSearchRequest(query=query_word),
- 保留最早出现的顺序 lang='zh'
""" )
merged: Dict[str, Dict[str, Any]] = {} )
order: List[str] = []
for lst in lists:
for item in lst:
word = item.get("word")
hira = item.get("hiragana")
key = f"{word}:{hira or ''}" # 以 word+hiragana 作为唯一标识
if key not in merged:
# 初次出现,加入结果集
merged[key] = {
"word": word,
"hiragana": hira,
"meanings": list(item.get("meanings", [])),
"english": list(item.get("english", []))
}
order.append(key)
else:
# 已存在 → 合并释义和英文解释
merged[key]["meanings"] = list(set(
list(merged[key].get("meanings", [])) +
list(item.get("meanings", []) or [])
))
merged[key]["english"] = list(set(
list(merged[key].get("english", [])) +
list(item.get("english", []) or [])
))
# 保持插入顺序输出
return [merged[k] for k in order]
# async def __test():
# query_word: str = '棋逢'
# return await (
# suggest_proverb(
# query=ProverbSearchRequest(query=query_word),
# lang='zh'
# )
# )
async def __main(): async def __main():