Compare commits
No commits in common. "9f9f46264001ae121d1a4fa753b3e1c04512c453" and "1c8036cb8de6731f0b0385a6a08d94b20215ca34" have entirely different histories.
9f9f462640
...
1c8036cb8d
59
README.md
59
README.md
|
|
@ -362,73 +362,28 @@ Authorization: Bearer <your_jwt_token>
|
||||||
#### 2.3 单词联想建议
|
#### 2.3 单词联想建议
|
||||||
|
|
||||||
- **接口**: `POST /api/search/list/word`
|
- **接口**: `POST /api/search/list/word`
|
||||||
- **描述**: 返回智能联想候选列表。后端会根据 `language`(当前词典)与用户输入自动切换检索策略,综合“前缀匹配”和“释义反查”两种来源,并对结果去重合并释义。
|
- **描述**: 根据用户输入返回单词联想列表,含前缀匹配与包含匹配。
|
||||||
- **需要认证**: 是
|
- **需要认证**: 是
|
||||||
- **请求体**:
|
- **请求体**:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"query": "bon",
|
"query": "bon",
|
||||||
"language": "fr"
|
"language": "fr",
|
||||||
|
"sort": "relevance",
|
||||||
|
"order": "des"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
- **检索规则**:
|
- **响应示例**:
|
||||||
- `language = "fr"`:
|
|
||||||
- 法语/拉丁字符输入:优先使用 `WordlistFr` 做前缀 + 包含匹配。
|
|
||||||
- 中文输入:回退到法语释义的中文字段做反查。
|
|
||||||
- 英文输入:会优先使用英文释义字段做反查,方便“英文 → 法语”场景。
|
|
||||||
- `language = "jp"`:
|
|
||||||
- 假名或日文汉字:直接在 `WordlistJp` 做前缀 + 包含匹配,同时返回假名字段。
|
|
||||||
- 中文输入:优先用中文释义反查;若该中文词条存在汉字映射,则并行检索对应的日语原词并放在结果前列。
|
|
||||||
|
|
||||||
- **响应字段**:
|
|
||||||
- `word`: 词条原文(法语或日语)
|
|
||||||
- `hiragana`: 仅日语结果携带;法语为 `null`
|
|
||||||
- `meanings`: 中文释义去重数组(当结果来自释义反查时才会出现)
|
|
||||||
- `english`: 英文释义去重数组(仅法语词典且按英文释义反查时出现)
|
|
||||||
|
|
||||||
- **响应示例(法语)**:
|
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"list": [
|
"list": ["bonjour", "bonsoir", "bonheur"]
|
||||||
{
|
|
||||||
"word": "bonjour",
|
|
||||||
"hiragana": null,
|
|
||||||
"meanings": ["你好", "问候语"],
|
|
||||||
"english": ["hello"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"word": "bonsoir",
|
|
||||||
"hiragana": null,
|
|
||||||
"meanings": [],
|
|
||||||
"english": []
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
- **响应示例(日语,中文反查)**:
|
> **说明**: `language = "jp"` 时返回形如 `[["愛", "あい"], ["愛する", "あいする"]]` 的二维数组,第二列为假名读音。
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"list": [
|
|
||||||
{
|
|
||||||
"word": "愛",
|
|
||||||
"hiragana": "あい",
|
|
||||||
"meanings": ["爱;爱意"],
|
|
||||||
"english": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"word": "愛する",
|
|
||||||
"hiragana": "あいする",
|
|
||||||
"meanings": ["热爱;深爱"],
|
|
||||||
"english": []
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
- **状态码**:
|
- **状态码**:
|
||||||
- `200`: 查询成功
|
- `200`: 查询成功
|
||||||
|
|
|
||||||
|
|
@ -31,13 +31,7 @@ async def article_director(
|
||||||
redis = request.app.state.redis
|
redis = request.app.state.redis
|
||||||
# print(upload_article)
|
# print(upload_article)
|
||||||
|
|
||||||
match lang:
|
article_lang = "法语" if lang == "fr-FR" else "日语"
|
||||||
case "en-US":
|
|
||||||
article_lang = "英语"
|
|
||||||
case "fr-FR":
|
|
||||||
article_lang = "法语"
|
|
||||||
case _:
|
|
||||||
article_lang = "日语"
|
|
||||||
|
|
||||||
user_id = user[0].id
|
user_id = user[0].id
|
||||||
article = upload_article.content
|
article = upload_article.content
|
||||||
|
|
|
||||||
|
|
@ -6,10 +6,11 @@ from fastapi import APIRouter, Depends, HTTPException, Request, Form
|
||||||
from app.api.search_dict import service
|
from app.api.search_dict import service
|
||||||
from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \
|
from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \
|
||||||
ProverbSearchRequest
|
ProverbSearchRequest
|
||||||
|
from app.api.search_dict.service import suggest_autocomplete
|
||||||
from app.api.word_comment.word_comment_schemas import CommentSet
|
from app.api.word_comment.word_comment_schemas import CommentSet
|
||||||
from app.models import DefinitionJp, CommentFr, CommentJp, WordlistFr
|
from app.models import DefinitionJp, CommentFr, CommentJp
|
||||||
from app.models.fr import DefinitionFr, ProverbFr
|
from app.models.fr import DefinitionFr, ProverbFr
|
||||||
from app.models.jp import IdiomJp, WordlistJp
|
from app.models.jp import IdiomJp
|
||||||
from app.utils.all_kana import all_in_kana
|
from app.utils.all_kana import all_in_kana
|
||||||
from app.utils.security import get_current_user
|
from app.utils.security import get_current_user
|
||||||
from app.utils.textnorm import normalize_text
|
from app.utils.textnorm import normalize_text
|
||||||
|
|
@ -157,66 +158,13 @@ async def search_word_list(query_word: SearchRequest, user=Depends(get_current_u
|
||||||
:return: 待选列表
|
:return: 待选列表
|
||||||
"""
|
"""
|
||||||
# print(query_word.query, query_word.language, query_word.sort, query_word.order)
|
# print(query_word.query, query_word.language, query_word.sort, query_word.order)
|
||||||
query = query_word.query
|
word_contents = await suggest_autocomplete(query=query_word)
|
||||||
lang = query_word.language
|
return {"list": word_contents}
|
||||||
query, search_lang, transable = await service.detect_language(text=query)
|
|
||||||
word_contents = []
|
|
||||||
if lang == "fr":
|
|
||||||
if search_lang == "fr":
|
|
||||||
word_contents = await service.suggest_autocomplete(
|
|
||||||
query=query,
|
|
||||||
dict_lang="fr",
|
|
||||||
model=WordlistFr,
|
|
||||||
)
|
|
||||||
if not transable:
|
|
||||||
word_contents.extend(
|
|
||||||
await service.search_definition_by_meaning(
|
|
||||||
query=query,
|
|
||||||
model=DefinitionFr,
|
|
||||||
lang="en",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
word_contents = await service.search_definition_by_meaning(
|
|
||||||
query=query_word.query,
|
|
||||||
model=DefinitionFr,
|
|
||||||
lang="zh",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
if search_lang == "jp":
|
|
||||||
word_contents = await service.suggest_autocomplete(
|
|
||||||
query=query,
|
|
||||||
dict_lang="jp",
|
|
||||||
model=WordlistJp,
|
|
||||||
)
|
|
||||||
elif search_lang == "zh":
|
|
||||||
word_contents = []
|
|
||||||
word_contents.extend(
|
|
||||||
await service.search_definition_by_meaning(
|
|
||||||
query=query_word.query,
|
|
||||||
model=DefinitionJp,
|
|
||||||
lang="zh",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if transable:
|
|
||||||
word_contents = await service.suggest_autocomplete(
|
|
||||||
query=query,
|
|
||||||
dict_lang="jp",
|
|
||||||
model=WordlistJp,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
word_contents = await service.suggest_autocomplete(
|
|
||||||
query=query,
|
|
||||||
dict_lang="jp",
|
|
||||||
model=WordlistJp,
|
|
||||||
)
|
|
||||||
suggest_list = service.merge_word_results(word_contents)
|
|
||||||
return {"list": suggest_list}
|
|
||||||
|
|
||||||
|
|
||||||
@dict_search.post("/search/list/proverb")
|
@dict_search.post("/search/list/proverb")
|
||||||
async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)):
|
async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)):
|
||||||
query, lang, transable = await service.detect_language(text=query_word.query)
|
query, lang, _ = service.detect_language(text=query_word.query)
|
||||||
query = normalize_text(query_word.query) if lang == "fr" else query_word.query
|
query = normalize_text(query_word.query) if lang == "fr" else query_word.query
|
||||||
suggest_proverbs = await service.suggest_proverb(
|
suggest_proverbs = await service.suggest_proverb(
|
||||||
query=query_word.query,
|
query=query_word.query,
|
||||||
|
|
@ -229,8 +177,7 @@ async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get
|
||||||
|
|
||||||
@dict_search.post("/search/proverb")
|
@dict_search.post("/search/proverb")
|
||||||
async def search_proverb(proverb_id: int = Form(...), user=Depends(get_current_user)):
|
async def search_proverb(proverb_id: int = Form(...), user=Depends(get_current_user)):
|
||||||
result = await service.accurate_idiom_proverb(search_id=proverb_id, model=ProverbFr,
|
result = await service.accurate_idiom_proverb(search_id=proverb_id, model=ProverbFr, only_fields=["text", "chi_exp"])
|
||||||
only_fields=["text", "chi_exp"])
|
|
||||||
|
|
||||||
return {"result": result}
|
return {"result": result}
|
||||||
|
|
||||||
|
|
@ -278,6 +225,5 @@ async def search_idiom_list(query_idiom: ProverbSearchRequest, user=Depends(get_
|
||||||
|
|
||||||
@dict_search.post("/search/idiom")
|
@dict_search.post("/search/idiom")
|
||||||
async def search_idiom(query_id: int, user=Depends(get_current_user)):
|
async def search_idiom(query_id: int, user=Depends(get_current_user)):
|
||||||
result = await service.accurate_idiom_proverb(search_id=query_id, model=IdiomJp,
|
result = await service.accurate_idiom_proverb(search_id=query_id, model=IdiomJp, only_fields=["id", "text", "search_text", "chi_exp", "example"])
|
||||||
only_fields=["id", "text", "search_text", "chi_exp", "example"])
|
|
||||||
return {"result": result}
|
return {"result": result}
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,15 @@
|
||||||
import re
|
import re
|
||||||
from typing import List, Tuple, Dict, Literal, Type, Any
|
from typing import List, Tuple, Dict, Literal, Type
|
||||||
|
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from redis.asyncio import Redis
|
from redis.asyncio import Redis
|
||||||
from tortoise import Tortoise, Model
|
from tortoise import Tortoise, Model
|
||||||
from tortoise.expressions import Q
|
from tortoise.expressions import Q
|
||||||
|
|
||||||
from app.models import KangjiMapping
|
from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchRequest
|
||||||
|
from app.models import WordlistFr, WordlistJp, KangjiMapping
|
||||||
from app.utils.all_kana import all_in_kana
|
from app.utils.all_kana import all_in_kana
|
||||||
|
from app.utils.textnorm import normalize_text
|
||||||
from settings import TORTOISE_ORM
|
from settings import TORTOISE_ORM
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -55,12 +57,8 @@ async def detect_language(text: str) -> Tuple[str, str, bool]:
|
||||||
return text, "zh", False
|
return text, "zh", False
|
||||||
|
|
||||||
# ✅ Step 3: 拉丁字母检测(如法语)
|
# ✅ Step 3: 拉丁字母检测(如法语)
|
||||||
if re.search(r"[À-ÿ]", text):
|
if re.search(r"[a-zA-ZÀ-ÿ]", text):
|
||||||
return text, "fr", True # True → 含拉丁扩展(非英语)
|
return text, "fr", False
|
||||||
|
|
||||||
# 全部为纯英文字符
|
|
||||||
elif re.fullmatch(r"[a-zA-Z]+", text):
|
|
||||||
return text, "fr", False # False → 英语单词
|
|
||||||
|
|
||||||
# ✅ Step 4: 其他情况(符号、空格等)
|
# ✅ Step 4: 其他情况(符号、空格等)
|
||||||
return text, "other", False
|
return text, "other", False
|
||||||
|
|
@ -132,255 +130,115 @@ async def suggest_proverb(
|
||||||
return results[:limit]
|
return results[:limit]
|
||||||
|
|
||||||
|
|
||||||
async def suggest_autocomplete(
|
async def suggest_autocomplete(query: SearchRequest, limit: int = 10):
|
||||||
query: str,
|
|
||||||
dict_lang: Literal["fr", "jp"],
|
|
||||||
model: Type[Model],
|
|
||||||
search_field: str = "search_text",
|
|
||||||
text_field: str = "text",
|
|
||||||
hira_field: str = "hiragana",
|
|
||||||
freq_field: str = "freq",
|
|
||||||
limit: int = 10,
|
|
||||||
) -> List[Dict[str, str]]:
|
|
||||||
"""
|
"""
|
||||||
通用自动补全建议接口:
|
|
||||||
- 法语: 按 search_text / text 搜索 + 反查 DefinitionFr 英/中释义
|
:param query: 当前用户输入的内容
|
||||||
- 日语: 先按原文 text 匹配,再按假名匹配 + 反查 DefinitionJp 中文释义
|
:param limit: 返回列表限制长度
|
||||||
统一返回结构:
|
:return: 联想的单词列表(非完整信息,单纯单词)
|
||||||
[
|
|
||||||
{
|
|
||||||
"word": "étudier",
|
|
||||||
"hiragana": None,
|
|
||||||
"meanings": ["学习", "研究"],
|
|
||||||
"english": ["to study", "to learn"]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
"""
|
"""
|
||||||
keyword = query.strip()
|
if query.language == 'fr':
|
||||||
if not keyword:
|
query_word = normalize_text(query.query)
|
||||||
return []
|
exact = await (
|
||||||
|
WordlistFr
|
||||||
# ========== 法语分支 ==========
|
.get_or_none(search_text=query.query)
|
||||||
if dict_lang == "fr":
|
.values("text", "freq")
|
||||||
start_condition = (
|
|
||||||
Q(**{f"{search_field}__istartswith": keyword})
|
|
||||||
| Q(**{f"{text_field}__istartswith": keyword})
|
|
||||||
)
|
)
|
||||||
contain_condition = (
|
if exact:
|
||||||
Q(**{f"{search_field}__icontains": keyword})
|
exact_word = [(exact.get("text"), exact.get("freq"))]
|
||||||
| Q(**{f"{text_field}__icontains": keyword})
|
else:
|
||||||
|
exact_word = []
|
||||||
|
|
||||||
|
qs_prefix = (
|
||||||
|
WordlistFr
|
||||||
|
.filter(Q(search_text__startswith=query_word) | Q(text__startswith=query.query))
|
||||||
|
.exclude(search_text=query.query)
|
||||||
|
.only("text", "freq")
|
||||||
)
|
)
|
||||||
value_fields = ["id", text_field, freq_field, search_field]
|
prefix_objs = await qs_prefix[:limit]
|
||||||
|
prefix: List[Tuple[str, int]] = [(o.text, o.freq) for o in prefix_objs]
|
||||||
|
|
||||||
# ========== 日语分支 ==========
|
need = max(0, limit - len(prefix))
|
||||||
elif dict_lang == "jp":
|
contains: List[Tuple[str, int]] = []
|
||||||
kana_word = all_in_kana(keyword)
|
|
||||||
start_condition = Q(**{f"{text_field}__startswith": keyword})
|
|
||||||
contain_condition = Q(**{f"{text_field}__icontains": keyword})
|
|
||||||
|
|
||||||
kana_start = Q(**{f"{hira_field}__startswith": kana_word})
|
if need > 0:
|
||||||
kana_contain = Q(**{f"{hira_field}__icontains": kana_word})
|
qs_contain = (
|
||||||
|
WordlistFr
|
||||||
|
.filter(Q(search_text__icontains=query_word) | Q(text__icontains=query.query))
|
||||||
|
.exclude(Q(search_text__startswith=query_word) | Q(text__startswith=query.query) | Q(text=query.query))
|
||||||
|
.only("text", "freq")
|
||||||
|
.only("text", "freq")
|
||||||
|
)
|
||||||
|
contains_objs = await qs_contain[: need * 2]
|
||||||
|
contains = [(o.text, o.freq) for o in contains_objs]
|
||||||
|
|
||||||
start_condition |= kana_start
|
seen_text, out = set(), []
|
||||||
contain_condition |= kana_contain
|
for text, freq in list(exact_word) + list(prefix) + list(contains):
|
||||||
value_fields = ["id", text_field, hira_field, freq_field]
|
key = text
|
||||||
|
if key not in seen_text:
|
||||||
|
seen_text.add(key)
|
||||||
|
out.append((text, freq))
|
||||||
|
if len(out) >= limit:
|
||||||
|
break
|
||||||
|
out = sorted(out, key=lambda w: (-w[2], len(w[0]), w[0]))
|
||||||
|
return [text for text, _ in out]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return []
|
query_word = all_in_kana(query.query)
|
||||||
|
exact = await (
|
||||||
# ✅ 获取匹配单词
|
WordlistJp
|
||||||
start_matches = await (
|
.get_or_none(
|
||||||
model.filter(start_condition)
|
text=query.query
|
||||||
.order_by(f"-{freq_field}", "id")
|
|
||||||
.limit(limit)
|
|
||||||
.values(*value_fields)
|
|
||||||
)
|
)
|
||||||
|
.only("text", "hiragana", "freq")
|
||||||
contain_matches = await (
|
|
||||||
model.filter(contain_condition & ~start_condition)
|
|
||||||
.order_by(f"-{freq_field}", "id")
|
|
||||||
.limit(limit)
|
|
||||||
.values(*value_fields)
|
|
||||||
)
|
)
|
||||||
|
if exact:
|
||||||
results = []
|
exact_word = [(exact.text, exact.hiragana, exact.freq)]
|
||||||
seen_ids = set()
|
|
||||||
for row in start_matches + contain_matches:
|
|
||||||
if row["id"] not in seen_ids:
|
|
||||||
seen_ids.add(row["id"])
|
|
||||||
results.append({
|
|
||||||
"id": row["id"],
|
|
||||||
"word": row[text_field],
|
|
||||||
"hiragana": row.get(hira_field) if dict_lang == "jp" else None,
|
|
||||||
"meanings": [],
|
|
||||||
"english": [],
|
|
||||||
})
|
|
||||||
|
|
||||||
# ✅ 批量反查 Definition 表,防止 N+1 查询
|
|
||||||
if dict_lang == "fr":
|
|
||||||
from app.models import DefinitionFr # 避免循环导入
|
|
||||||
word_ids = [r["id"] for r in results]
|
|
||||||
defs = await DefinitionFr.filter(word_id__in=word_ids).values("word_id", "meaning", "eng_explanation")
|
|
||||||
|
|
||||||
meaning_map: Dict[int, Dict[str, List[str]]] = {}
|
|
||||||
for d in defs:
|
|
||||||
meaning_map.setdefault(d["word_id"], {"meanings": [], "english": []})
|
|
||||||
if d["meaning"]:
|
|
||||||
meaning_map[d["word_id"]]["meanings"].append(d["meaning"].strip())
|
|
||||||
if d["eng_explanation"]:
|
|
||||||
meaning_map[d["word_id"]]["english"].append(d["eng_explanation"].strip())
|
|
||||||
|
|
||||||
for r in results:
|
|
||||||
if r["id"] in meaning_map:
|
|
||||||
r["meanings"] = list(set(meaning_map[r["id"]]["meanings"]))
|
|
||||||
r["english"] = list(set(meaning_map[r["id"]]["english"]))
|
|
||||||
|
|
||||||
elif dict_lang == "jp":
|
|
||||||
from app.models import DefinitionJp
|
|
||||||
word_ids = [r["id"] for r in results]
|
|
||||||
defs = await DefinitionJp.filter(word_id__in=word_ids).values("word_id", "meaning")
|
|
||||||
|
|
||||||
meaning_map: Dict[int, List[str]] = {}
|
|
||||||
for d in defs:
|
|
||||||
if d["meaning"]:
|
|
||||||
meaning_map.setdefault(d["word_id"], []).append(d["meaning"].strip())
|
|
||||||
|
|
||||||
for r in results:
|
|
||||||
if r["id"] in meaning_map:
|
|
||||||
r["meanings"] = list(set(meaning_map[r["id"]]))
|
|
||||||
|
|
||||||
# ✅ 删除 id,只保留用户需要字段
|
|
||||||
for r in results:
|
|
||||||
r.pop("id", None)
|
|
||||||
|
|
||||||
return results[:limit]
|
|
||||||
|
|
||||||
|
|
||||||
# ===================================================
|
|
||||||
# ✅ 释义反查接口(返回统一结构)
|
|
||||||
# ===================================================
|
|
||||||
|
|
||||||
async def search_definition_by_meaning(
|
|
||||||
query: str,
|
|
||||||
model: Type[Model],
|
|
||||||
meaning_field: str = "meaning",
|
|
||||||
eng_field: str = "eng_explanation",
|
|
||||||
hira_field: str = "hiragana",
|
|
||||||
limit: int = 20,
|
|
||||||
lang: Literal["zh", "en"] = "zh",
|
|
||||||
) -> List[Dict[str, str]]:
|
|
||||||
"""
|
|
||||||
双语释义反查接口(中文/英文):
|
|
||||||
统一返回结构:
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"word": "étudier",
|
|
||||||
"hiragana": None,
|
|
||||||
"meanings": ["学习", "研究"],
|
|
||||||
"english": ["to study"]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
"""
|
|
||||||
|
|
||||||
keyword = query.strip()
|
|
||||||
if not keyword:
|
|
||||||
return []
|
|
||||||
|
|
||||||
if lang == "zh":
|
|
||||||
search_field = meaning_field
|
|
||||||
elif lang == "en":
|
|
||||||
search_field = eng_field
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("lang 参数必须为 'zh' 或 'en'")
|
exact_word = []
|
||||||
|
|
||||||
contain_condition = Q(**{f"{search_field}__icontains": keyword})
|
qs_prefix = (
|
||||||
|
WordlistJp
|
||||||
matches = (
|
.filter(Q(hiragana__startswith=query_word) | Q(text__startswith=query.query))
|
||||||
await model.filter(contain_condition)
|
.exclude(text=query.query)
|
||||||
.prefetch_related("word")
|
.only("text", "hiragana", "freq")
|
||||||
.order_by("id")
|
|
||||||
)
|
)
|
||||||
|
prefix_objs = await qs_prefix[:limit]
|
||||||
|
prefix: List[Tuple[str, str, int]] = [(o.text, o.hiragana, o.freq) for o in prefix_objs]
|
||||||
|
|
||||||
word_to_data: Dict[str, Dict[str, List[str] | str | None]] = {}
|
need = max(0, limit - len(prefix))
|
||||||
|
contains: List[Tuple[str, str, int]] = []
|
||||||
|
|
||||||
for entry in matches:
|
if need > 0:
|
||||||
word_obj = await entry.word
|
qs_contain = await (
|
||||||
word_text = getattr(word_obj, "text", None)
|
WordlistJp
|
||||||
if not word_text:
|
.filter(Q(hiragana__icontains=query_word) | Q(text__icontains=query.query))
|
||||||
continue
|
.exclude(Q(hiragana__startswith=query_word) | Q(text__startswith=query.query) | Q(text=query.query))
|
||||||
|
.only("text", "hiragana", "freq")
|
||||||
|
)
|
||||||
|
contains_objs = qs_contain[:need * 2]
|
||||||
|
contains: List[Tuple[str, str, int]] = [(o.text, o.hiragana, o.freq) for o in contains_objs]
|
||||||
|
|
||||||
chi_mean = getattr(entry, meaning_field, "").strip() or None
|
seen_text, out = set(), []
|
||||||
eng_mean = getattr(entry, eng_field, "").strip() or None
|
for text, hiragana, freq in list(exact_word) + list(prefix) + list(contains):
|
||||||
hira_text = getattr(word_obj, hira_field, None) if hasattr(word_obj, hira_field) else None
|
key = (text, hiragana)
|
||||||
|
if key not in seen_text:
|
||||||
if word_text not in word_to_data:
|
seen_text.add(key)
|
||||||
word_to_data[word_text] = {"hiragana": hira_text, "meanings": [], "english": []}
|
out.append((text, hiragana, freq))
|
||||||
|
if len(out) >= limit:
|
||||||
if chi_mean:
|
break
|
||||||
word_to_data[word_text]["meanings"].append(chi_mean)
|
out = sorted(out, key=lambda w: (-w[2], len(w[0]), w[0]))
|
||||||
if eng_mean:
|
return [(text, hiragana) for text, hiragana, _ in out]
|
||||||
word_to_data[word_text]["english"].append(eng_mean)
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for word, data in word_to_data.items():
|
|
||||||
results.append({
|
|
||||||
"word": word,
|
|
||||||
"hiragana": data["hiragana"],
|
|
||||||
"meanings": list(set(data["meanings"])),
|
|
||||||
"english": list(set(data["english"]))
|
|
||||||
})
|
|
||||||
|
|
||||||
return results[:limit]
|
|
||||||
|
|
||||||
|
|
||||||
def merge_word_results(*lists: List[Dict[str, Any]]) -> List[Dict[str, object]]:
|
async def __test():
|
||||||
"""
|
query_word: str = '棋逢'
|
||||||
合并多个结果列表并去重:
|
return await (
|
||||||
- 依据 word(+ hiragana)唯一性去重
|
suggest_proverb(
|
||||||
- meanings / english 合并去重
|
query=ProverbSearchRequest(query=query_word),
|
||||||
- 保留最早出现的顺序
|
lang='zh'
|
||||||
"""
|
)
|
||||||
merged: Dict[str, Dict[str, Any]] = {}
|
)
|
||||||
order: List[str] = []
|
|
||||||
|
|
||||||
for lst in lists:
|
|
||||||
for item in lst:
|
|
||||||
word = item.get("word")
|
|
||||||
hira = item.get("hiragana")
|
|
||||||
key = f"{word}:{hira or ''}" # 以 word+hiragana 作为唯一标识
|
|
||||||
|
|
||||||
if key not in merged:
|
|
||||||
# 初次出现,加入结果集
|
|
||||||
merged[key] = {
|
|
||||||
"word": word,
|
|
||||||
"hiragana": hira,
|
|
||||||
"meanings": list(item.get("meanings", [])),
|
|
||||||
"english": list(item.get("english", []))
|
|
||||||
}
|
|
||||||
order.append(key)
|
|
||||||
else:
|
|
||||||
# 已存在 → 合并释义和英文解释
|
|
||||||
merged[key]["meanings"] = list(set(
|
|
||||||
list(merged[key].get("meanings", [])) +
|
|
||||||
list(item.get("meanings", []) or [])
|
|
||||||
))
|
|
||||||
merged[key]["english"] = list(set(
|
|
||||||
list(merged[key].get("english", [])) +
|
|
||||||
list(item.get("english", []) or [])
|
|
||||||
))
|
|
||||||
|
|
||||||
# 保持插入顺序输出
|
|
||||||
return [merged[k] for k in order]
|
|
||||||
|
|
||||||
|
|
||||||
# async def __test():
|
|
||||||
# query_word: str = '棋逢'
|
|
||||||
# return await (
|
|
||||||
# suggest_proverb(
|
|
||||||
# query=ProverbSearchRequest(query=query_word),
|
|
||||||
# lang='zh'
|
|
||||||
# )
|
|
||||||
# )
|
|
||||||
|
|
||||||
|
|
||||||
async def __main():
|
async def __main():
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue