Compare commits

...

7 Commits

Author SHA1 Message Date
Miyamizu-MitsuhaSang 9f9f462640 前端适配 2025-11-07 17:58:19 +08:00
Miyamizu-MitsuhaSang b3195f726d 前端适配 2025-11-07 17:43:48 +08:00
Miyamizu-MitsuhaSang bf9d136112 前端适配 2025-11-07 17:35:30 +08:00
Miyamizu-MitsuhaSang 79b09c17f9 前端适配 2025-11-07 17:28:03 +08:00
Miyamizu-MitsuhaSang 9a099f46e8 前端适配 2025-11-07 17:24:52 +08:00
Miyamizu-MitsuhaSang af4139ce91 前端适配 2025-11-07 17:23:22 +08:00
Miyamizu-MitsuhaSang cf64bc71b3 前端适配 2025-11-07 17:17:30 +08:00
4 changed files with 367 additions and 120 deletions

View File

@ -362,28 +362,73 @@ Authorization: Bearer <your_jwt_token>
#### 2.3 单词联想建议
- **接口**: `POST /api/search/list/word`
- **描述**: 根据用户输入返回单词联想列表,含前缀匹配与包含匹配
- **描述**: 返回智能联想候选列表。后端会根据 `language`(当前词典)与用户输入自动切换检索策略,综合“前缀匹配”和“释义反查”两种来源,并对结果去重合并释义
- **需要认证**: 是
- **请求体**:
```json
{
"query": "bon",
"language": "fr",
"sort": "relevance",
"order": "des"
"language": "fr"
}
```
- **响应示例**:
- **检索规则**:
- `language = "fr"`:
- 法语/拉丁字符输入:优先使用 `WordlistFr` 做前缀 + 包含匹配。
- 中文输入:回退到法语释义的中文字段做反查。
- 英文输入:会优先使用英文释义字段做反查,方便“英文 → 法语”场景。
- `language = "jp"`:
- 假名或日文汉字:直接在 `WordlistJp` 做前缀 + 包含匹配,同时返回假名字段。
- 中文输入:优先用中文释义反查;若该中文词条存在汉字映射,则并行检索对应的日语原词并放在结果前列。
- **响应字段**:
- `word`: 词条原文(法语或日语)
- `hiragana`: 仅日语结果携带;法语为 `null`
- `meanings`: 中文释义去重数组(当结果来自释义反查时才会出现)
- `english`: 英文释义去重数组(仅法语词典且按英文释义反查时出现)
- **响应示例(法语)**:
```json
{
"list": ["bonjour", "bonsoir", "bonheur"]
"list": [
{
"word": "bonjour",
"hiragana": null,
"meanings": ["你好", "问候语"],
"english": ["hello"]
},
{
"word": "bonsoir",
"hiragana": null,
"meanings": [],
"english": []
}
]
}
```
> **说明**: `language = "jp"` 时返回形如 `[["愛", "あい"], ["愛する", "あいする"]]` 的二维数组,第二列为假名读音。
- **响应示例(日语,中文反查)**:
```json
{
"list": [
{
"word": "愛",
"hiragana": "あい",
"meanings": ["爱;爱意"],
"english": []
},
{
"word": "愛する",
"hiragana": "あいする",
"meanings": ["热爱;深爱"],
"english": []
}
]
}
```
- **状态码**:
- `200`: 查询成功

View File

@ -31,7 +31,13 @@ async def article_director(
redis = request.app.state.redis
# print(upload_article)
article_lang = "法语" if lang == "fr-FR" else "日语"
match lang:
case "en-US":
article_lang = "英语"
case "fr-FR":
article_lang = "法语"
case _:
article_lang = "日语"
user_id = user[0].id
article = upload_article.content

View File

@ -6,11 +6,10 @@ from fastapi import APIRouter, Depends, HTTPException, Request, Form
from app.api.search_dict import service
from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \
ProverbSearchRequest
from app.api.search_dict.service import suggest_autocomplete
from app.api.word_comment.word_comment_schemas import CommentSet
from app.models import DefinitionJp, CommentFr, CommentJp
from app.models import DefinitionJp, CommentFr, CommentJp, WordlistFr
from app.models.fr import DefinitionFr, ProverbFr
from app.models.jp import IdiomJp
from app.models.jp import IdiomJp, WordlistJp
from app.utils.all_kana import all_in_kana
from app.utils.security import get_current_user
from app.utils.textnorm import normalize_text
@ -158,13 +157,66 @@ async def search_word_list(query_word: SearchRequest, user=Depends(get_current_u
:return: 待选列表
"""
# print(query_word.query, query_word.language, query_word.sort, query_word.order)
word_contents = await suggest_autocomplete(query=query_word)
return {"list": word_contents}
query = query_word.query
lang = query_word.language
query, search_lang, transable = await service.detect_language(text=query)
word_contents = []
if lang == "fr":
if search_lang == "fr":
word_contents = await service.suggest_autocomplete(
query=query,
dict_lang="fr",
model=WordlistFr,
)
if not transable:
word_contents.extend(
await service.search_definition_by_meaning(
query=query,
model=DefinitionFr,
lang="en",
)
)
else:
word_contents = await service.search_definition_by_meaning(
query=query_word.query,
model=DefinitionFr,
lang="zh",
)
else:
if search_lang == "jp":
word_contents = await service.suggest_autocomplete(
query=query,
dict_lang="jp",
model=WordlistJp,
)
elif search_lang == "zh":
word_contents = []
word_contents.extend(
await service.search_definition_by_meaning(
query=query_word.query,
model=DefinitionJp,
lang="zh",
)
)
if transable:
word_contents = await service.suggest_autocomplete(
query=query,
dict_lang="jp",
model=WordlistJp,
)
else:
word_contents = await service.suggest_autocomplete(
query=query,
dict_lang="jp",
model=WordlistJp,
)
suggest_list = service.merge_word_results(word_contents)
return {"list": suggest_list}
@dict_search.post("/search/list/proverb")
async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)):
query, lang, _ = service.detect_language(text=query_word.query)
query, lang, transable = await service.detect_language(text=query_word.query)
query = normalize_text(query_word.query) if lang == "fr" else query_word.query
suggest_proverbs = await service.suggest_proverb(
query=query_word.query,
@ -177,7 +229,8 @@ async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get
@dict_search.post("/search/proverb")
async def search_proverb(proverb_id: int = Form(...), user=Depends(get_current_user)):
result = await service.accurate_idiom_proverb(search_id=proverb_id, model=ProverbFr, only_fields=["text", "chi_exp"])
result = await service.accurate_idiom_proverb(search_id=proverb_id, model=ProverbFr,
only_fields=["text", "chi_exp"])
return {"result": result}
@ -225,5 +278,6 @@ async def search_idiom_list(query_idiom: ProverbSearchRequest, user=Depends(get_
@dict_search.post("/search/idiom")
async def search_idiom(query_id: int, user=Depends(get_current_user)):
result = await service.accurate_idiom_proverb(search_id=query_id, model=IdiomJp, only_fields=["id", "text", "search_text", "chi_exp", "example"])
result = await service.accurate_idiom_proverb(search_id=query_id, model=IdiomJp,
only_fields=["id", "text", "search_text", "chi_exp", "example"])
return {"result": result}

View File

@ -1,15 +1,13 @@
import re
from typing import List, Tuple, Dict, Literal, Type
from typing import List, Tuple, Dict, Literal, Type, Any
from fastapi import HTTPException
from redis.asyncio import Redis
from tortoise import Tortoise, Model
from tortoise.expressions import Q
from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchRequest
from app.models import WordlistFr, WordlistJp, KangjiMapping
from app.models import KangjiMapping
from app.utils.all_kana import all_in_kana
from app.utils.textnorm import normalize_text
from settings import TORTOISE_ORM
@ -57,8 +55,12 @@ async def detect_language(text: str) -> Tuple[str, str, bool]:
return text, "zh", False
# ✅ Step 3: 拉丁字母检测(如法语)
if re.search(r"[a-zA-ZÀ-ÿ]", text):
return text, "fr", False
if re.search(r"[À-ÿ]", text):
return text, "fr", True # True → 含拉丁扩展(非英语)
# 全部为纯英文字符
elif re.fullmatch(r"[a-zA-Z]+", text):
return text, "fr", False # False → 英语单词
# ✅ Step 4: 其他情况(符号、空格等)
return text, "other", False
@ -130,116 +132,256 @@ async def suggest_proverb(
return results[:limit]
async def suggest_autocomplete(query: SearchRequest, limit: int = 10):
async def suggest_autocomplete(
query: str,
dict_lang: Literal["fr", "jp"],
model: Type[Model],
search_field: str = "search_text",
text_field: str = "text",
hira_field: str = "hiragana",
freq_field: str = "freq",
limit: int = 10,
) -> List[Dict[str, str]]:
"""
:param query: 当前用户输入的内容
:param limit: 返回列表限制长度
:return: 联想的单词列表非完整信息单纯单词
通用自动补全建议接口
- 法语: search_text / text 搜索 + 反查 DefinitionFr /中释义
- 日语: 先按原文 text 匹配再按假名匹配 + 反查 DefinitionJp 中文释义
统一返回结构
[
{
"word": "étudier",
"hiragana": None,
"meanings": ["学习", "研究"],
"english": ["to study", "to learn"]
}
]
"""
if query.language == 'fr':
query_word = normalize_text(query.query)
exact = await (
WordlistFr
.get_or_none(search_text=query.query)
.values("text", "freq")
keyword = query.strip()
if not keyword:
return []
# ========== 法语分支 ==========
if dict_lang == "fr":
start_condition = (
Q(**{f"{search_field}__istartswith": keyword})
| Q(**{f"{text_field}__istartswith": keyword})
)
if exact:
exact_word = [(exact.get("text"), exact.get("freq"))]
else:
exact_word = []
qs_prefix = (
WordlistFr
.filter(Q(search_text__startswith=query_word) | Q(text__startswith=query.query))
.exclude(search_text=query.query)
.only("text", "freq")
contain_condition = (
Q(**{f"{search_field}__icontains": keyword})
| Q(**{f"{text_field}__icontains": keyword})
)
prefix_objs = await qs_prefix[:limit]
prefix: List[Tuple[str, int]] = [(o.text, o.freq) for o in prefix_objs]
value_fields = ["id", text_field, freq_field, search_field]
need = max(0, limit - len(prefix))
contains: List[Tuple[str, int]] = []
# ========== 日语分支 ==========
elif dict_lang == "jp":
kana_word = all_in_kana(keyword)
start_condition = Q(**{f"{text_field}__startswith": keyword})
contain_condition = Q(**{f"{text_field}__icontains": keyword})
if need > 0:
qs_contain = (
WordlistFr
.filter(Q(search_text__icontains=query_word) | Q(text__icontains=query.query))
.exclude(Q(search_text__startswith=query_word) | Q(text__startswith=query.query) | Q(text=query.query))
.only("text", "freq")
.only("text", "freq")
)
contains_objs = await qs_contain[: need * 2]
contains = [(o.text, o.freq) for o in contains_objs]
kana_start = Q(**{f"{hira_field}__startswith": kana_word})
kana_contain = Q(**{f"{hira_field}__icontains": kana_word})
seen_text, out = set(), []
for text, freq in list(exact_word) + list(prefix) + list(contains):
key = text
if key not in seen_text:
seen_text.add(key)
out.append((text, freq))
if len(out) >= limit:
break
out = sorted(out, key=lambda w: (-w[2], len(w[0]), w[0]))
return [text for text, _ in out]
start_condition |= kana_start
contain_condition |= kana_contain
value_fields = ["id", text_field, hira_field, freq_field]
else:
query_word = all_in_kana(query.query)
exact = await (
WordlistJp
.get_or_none(
text=query.query
)
.only("text", "hiragana", "freq")
)
if exact:
exact_word = [(exact.text, exact.hiragana, exact.freq)]
else:
exact_word = []
return []
qs_prefix = (
WordlistJp
.filter(Q(hiragana__startswith=query_word) | Q(text__startswith=query.query))
.exclude(text=query.query)
.only("text", "hiragana", "freq")
)
prefix_objs = await qs_prefix[:limit]
prefix: List[Tuple[str, str, int]] = [(o.text, o.hiragana, o.freq) for o in prefix_objs]
need = max(0, limit - len(prefix))
contains: List[Tuple[str, str, int]] = []
if need > 0:
qs_contain = await (
WordlistJp
.filter(Q(hiragana__icontains=query_word) | Q(text__icontains=query.query))
.exclude(Q(hiragana__startswith=query_word) | Q(text__startswith=query.query) | Q(text=query.query))
.only("text", "hiragana", "freq")
)
contains_objs = qs_contain[:need * 2]
contains: List[Tuple[str, str, int]] = [(o.text, o.hiragana, o.freq) for o in contains_objs]
seen_text, out = set(), []
for text, hiragana, freq in list(exact_word) + list(prefix) + list(contains):
key = (text, hiragana)
if key not in seen_text:
seen_text.add(key)
out.append((text, hiragana, freq))
if len(out) >= limit:
break
out = sorted(out, key=lambda w: (-w[2], len(w[0]), w[0]))
return [(text, hiragana) for text, hiragana, _ in out]
async def __test():
query_word: str = '棋逢'
return await (
suggest_proverb(
query=ProverbSearchRequest(query=query_word),
lang='zh'
)
# ✅ 获取匹配单词
start_matches = await (
model.filter(start_condition)
.order_by(f"-{freq_field}", "id")
.limit(limit)
.values(*value_fields)
)
contain_matches = await (
model.filter(contain_condition & ~start_condition)
.order_by(f"-{freq_field}", "id")
.limit(limit)
.values(*value_fields)
)
results = []
seen_ids = set()
for row in start_matches + contain_matches:
if row["id"] not in seen_ids:
seen_ids.add(row["id"])
results.append({
"id": row["id"],
"word": row[text_field],
"hiragana": row.get(hira_field) if dict_lang == "jp" else None,
"meanings": [],
"english": [],
})
# ✅ 批量反查 Definition 表,防止 N+1 查询
if dict_lang == "fr":
from app.models import DefinitionFr # 避免循环导入
word_ids = [r["id"] for r in results]
defs = await DefinitionFr.filter(word_id__in=word_ids).values("word_id", "meaning", "eng_explanation")
meaning_map: Dict[int, Dict[str, List[str]]] = {}
for d in defs:
meaning_map.setdefault(d["word_id"], {"meanings": [], "english": []})
if d["meaning"]:
meaning_map[d["word_id"]]["meanings"].append(d["meaning"].strip())
if d["eng_explanation"]:
meaning_map[d["word_id"]]["english"].append(d["eng_explanation"].strip())
for r in results:
if r["id"] in meaning_map:
r["meanings"] = list(set(meaning_map[r["id"]]["meanings"]))
r["english"] = list(set(meaning_map[r["id"]]["english"]))
elif dict_lang == "jp":
from app.models import DefinitionJp
word_ids = [r["id"] for r in results]
defs = await DefinitionJp.filter(word_id__in=word_ids).values("word_id", "meaning")
meaning_map: Dict[int, List[str]] = {}
for d in defs:
if d["meaning"]:
meaning_map.setdefault(d["word_id"], []).append(d["meaning"].strip())
for r in results:
if r["id"] in meaning_map:
r["meanings"] = list(set(meaning_map[r["id"]]))
# ✅ 删除 id只保留用户需要字段
for r in results:
r.pop("id", None)
return results[:limit]
# ===================================================
# ✅ 释义反查接口(返回统一结构)
# ===================================================
async def search_definition_by_meaning(
query: str,
model: Type[Model],
meaning_field: str = "meaning",
eng_field: str = "eng_explanation",
hira_field: str = "hiragana",
limit: int = 20,
lang: Literal["zh", "en"] = "zh",
) -> List[Dict[str, str]]:
"""
双语释义反查接口中文/英文
统一返回结构
[
{
"word": "étudier",
"hiragana": None,
"meanings": ["学习", "研究"],
"english": ["to study"]
}
]
"""
keyword = query.strip()
if not keyword:
return []
if lang == "zh":
search_field = meaning_field
elif lang == "en":
search_field = eng_field
else:
raise ValueError("lang 参数必须为 'zh''en'")
contain_condition = Q(**{f"{search_field}__icontains": keyword})
matches = (
await model.filter(contain_condition)
.prefetch_related("word")
.order_by("id")
)
word_to_data: Dict[str, Dict[str, List[str] | str | None]] = {}
for entry in matches:
word_obj = await entry.word
word_text = getattr(word_obj, "text", None)
if not word_text:
continue
chi_mean = getattr(entry, meaning_field, "").strip() or None
eng_mean = getattr(entry, eng_field, "").strip() or None
hira_text = getattr(word_obj, hira_field, None) if hasattr(word_obj, hira_field) else None
if word_text not in word_to_data:
word_to_data[word_text] = {"hiragana": hira_text, "meanings": [], "english": []}
if chi_mean:
word_to_data[word_text]["meanings"].append(chi_mean)
if eng_mean:
word_to_data[word_text]["english"].append(eng_mean)
results = []
for word, data in word_to_data.items():
results.append({
"word": word,
"hiragana": data["hiragana"],
"meanings": list(set(data["meanings"])),
"english": list(set(data["english"]))
})
return results[:limit]
def merge_word_results(*lists: List[Dict[str, Any]]) -> List[Dict[str, object]]:
"""
合并多个结果列表并去重
- 依据 word+ hiragana唯一性去重
- meanings / english 合并去重
- 保留最早出现的顺序
"""
merged: Dict[str, Dict[str, Any]] = {}
order: List[str] = []
for lst in lists:
for item in lst:
word = item.get("word")
hira = item.get("hiragana")
key = f"{word}:{hira or ''}" # 以 word+hiragana 作为唯一标识
if key not in merged:
# 初次出现,加入结果集
merged[key] = {
"word": word,
"hiragana": hira,
"meanings": list(item.get("meanings", [])),
"english": list(item.get("english", []))
}
order.append(key)
else:
# 已存在 → 合并释义和英文解释
merged[key]["meanings"] = list(set(
list(merged[key].get("meanings", [])) +
list(item.get("meanings", []) or [])
))
merged[key]["english"] = list(set(
list(merged[key].get("english", [])) +
list(item.get("english", []) or [])
))
# 保持插入顺序输出
return [merged[k] for k in order]
# async def __test():
# query_word: str = '棋逢'
# return await (
# suggest_proverb(
# query=ProverbSearchRequest(query=query_word),
# lang='zh'
# )
# )
async def __main():
await Tortoise.init(config=TORTOISE_ORM)