diff --git a/app/api/search.py b/app/api/search.py index a8369ec..3780fe5 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -1,16 +1,111 @@ +from typing import Literal, List + +import jaconv +import pykakasi from fastapi import APIRouter, Depends, HTTPException, Request +from app.models import DefinitionJp from app.models.fr import DefinitionFr +from app.schemas.search_schemas import SearchRequest, SearchResponse, SearchItemFr, SearchItemJp from app.utils.security import get_current_user +from app.utils.textnorm import normalize_text +from scripts.update_jp import normalize_jp_text dict_search = APIRouter() +kks = pykakasi.kakasi() +kks.setMode("H", "a") # 平假名 -> ascii (罗马字) +kks.setMode("K", "a") # 片假名 -> ascii +kks.setMode("J", "a") # 汉字 -> ascii +kks.setMode("r", "Hepburn") # 转换成 Hepburn 罗马字 +conv = kks.getConverter() -@dict_search.get("/search") -async def search(request: Request, lang_pref: str, query_word: str, user= Depends(get_current_user)): - word_content = await DefinitionFr.filter( - word__icontains=query_word, lang_pref=lang_pref - ).values("word", "part_of_speech", "meaning", "example") - if not word_content: - raise HTTPException(status_code=404, detail="Word not found") - return word_content + +def all_in_kana(text: str) -> str: + """ + 将输入统一转换为平假名,支持: + - 平假名 + - 片假名 + - 罗马字 (Hepburn 转写) + + 返回:平假名字符串 + """ + if not text: + return "" + + # 1. 片假名 → 平假名 + normalized = jaconv.kata2hira(text) + + # 2. 如果里面含有罗马字字符,就先转成假名 + if any("a" <= ch.lower() <= "z" for ch in normalized): + hira = conv.do(normalized) # 罗马字 -> 平假名 + normalized = jaconv.kata2hira(hira) + + # 3. 再次片假名 -> 平假名保险 + normalized = jaconv.kata2hira(normalized) + + return normalized + + +@dict_search.post("/search", response_model=SearchResponse) +async def search(request: Request, body: SearchRequest, user=Depends(get_current_user)): + query = body.query + if body.language == 'fr': + query = normalize_text(query) + word_contents = await ( + DefinitionFr + .filter(word__text=query) + .prefetch_related("word") + ) + if not word_contents: + raise HTTPException(status_code=404, detail="Word not found") + pos_seen = set() + pos_contents = [] + contents: List[SearchItemFr] = [] + for wc in word_contents: + if wc.pos not in pos_seen: + pos_seen.add(wc.pos) + pos_contents.append(wc.pos) + + contents.append( + SearchItemFr( + pos=wc.pos, + chi_exp=wc.meaning, + example=wc.example, + eng_explanation=wc.eng_explanation, + ) + ) + return SearchResponse( + query=query, + pos=pos_contents, + contents=contents, + ) + else: + query = all_in_kana(query) + print(query) + word_content = await DefinitionJp.filter( + word__text=query + ).prefetch_related("word", "pos") + if not word_content: + raise HTTPException(status_code=404, detail="Word not found") + + first_def = word_content[0] + pos_list = await first_def.pos.all() + pos_contents = [p.pos_type for p in pos_list] + + contents: List[SearchItemJp] = [] + for wc in word_content: + contents.append( + SearchItemJp( + chi_exp=wc.meaning, + example=wc.example, + ) + ) + return SearchResponse( + query=query, + pos=pos_contents, + contents=contents, + ) + +# TODO 相关度排序(转换为模糊匹配) +# TODO 输入搜索框时反馈内容 diff --git a/app/models/fr.py b/app/models/fr.py index d47987b..86c84ae 100644 --- a/app/models/fr.py +++ b/app/models/fr.py @@ -15,7 +15,7 @@ class WordlistFr(Model): text = fields.CharField(max_length=40, unique=True, description="单词") definitions: fields.ReverseRelation["DefinitionFr"] attachments: fields.ReverseRelation["AttachmentFr"] - freq = fields.IntField() # 词频排序用 + freq = fields.IntField(default=0) # 词频排序用 search_text = fields.CharField(max_length=255, index=True) # 检索字段 # attachment = fields.ForeignKeyField("models.Attachment", related_name="wordlists", on_delete=fields.CASCADE) @@ -44,4 +44,4 @@ class DefinitionFr(Model): eng_explanation = fields.TextField(null=True, description="English explanation") example_varification = fields.BooleanField(default=False, description="例句是否审核") class Meta: - table = "definition_fr" + table = "definitions_fr" diff --git a/app/models/jp.py b/app/models/jp.py index b3c27a2..15cf90f 100644 --- a/app/models/jp.py +++ b/app/models/jp.py @@ -16,6 +16,8 @@ sheet_name_jp = "日汉释义" class WordlistJp(Model): id = fields.IntField(pk=True) text = fields.CharField(max_length=40, description="单词") + hiragana = fields.CharField(max_length=60, description="假名", null=False) + freq = fields.IntField(default=0) definitions : fields.ReverseRelation["DefinitionJp"] attachments : fields.ReverseRelation["AttachmentJp"] diff --git a/app/schemas/search_schemas.py b/app/schemas/search_schemas.py new file mode 100644 index 0000000..889a5de --- /dev/null +++ b/app/schemas/search_schemas.py @@ -0,0 +1,31 @@ +from typing import Literal, List, Union + +from pydantic import BaseModel + +from app.models import PosType +from app.schemas.admin_schemas import PosEnumFr + + +class SearchRequest(BaseModel): + query: str + language: Literal['fr', 'jp'] + sort: Literal['relevance', 'date'] = 'date' + order: Literal['asc', 'des'] = 'des' + + +class SearchItemJp(BaseModel): + chi_exp: str + example: str + + +class SearchItemFr(BaseModel): + pos: PosEnumFr + chi_exp: str + eng_explanation: str + example: str + + +class SearchResponse(BaseModel): + query: str + pos: list + contents: Union[List[SearchItemFr], List[SearchItemJp]] diff --git a/app/utils/security.py b/app/utils/security.py index 70495bd..5cf983f 100644 --- a/app/utils/security.py +++ b/app/utils/security.py @@ -130,29 +130,19 @@ async def get_current_user_with_oauth( return await _decode_and_load_user(token) -async def get_current_user(*args, **kwargs) -> Tuple[User, Dict]: +async def get_current_user( + request: Request, + token: Annotated[str, Depends(oauth2_scheme)] = None +) -> Tuple[User, Dict]: if settings.USE_OAUTH: - return await get_current_user_with_oauth(*args, **kwargs) - return await get_current_user_with_oauth(*args, **kwargs) + return await get_current_user_with_oauth(token) + return await get_current_user_basic(request) -async def is_admin_user_basic(user_payload: Tuple[User, Dict] = Depends(get_current_user)) -> Tuple[User, Dict]: - user, payload = user_payload - if not getattr(user, "is_admin", False): - raise HTTPException(status_code=403, detail="Access denied") - return user, payload - - -async def is_admin_user_oauth( - user_payload: Tuple[User, Dict] = Depends(get_current_user_with_oauth) +async def is_admin_user( + user_payload: Tuple[User, Dict] = Depends(get_current_user), ) -> Tuple[User, Dict]: user, payload = user_payload if not getattr(user, "is_admin", False): raise HTTPException(status_code=403, detail="Access denied") return user, payload - - -async def is_admin_user(*args, **kwargs) -> Tuple[User, Dict]: - if settings.USE_OAUTH: - return await is_admin_user_basic(*args, **kwargs) - return await is_admin_user_oauth(*args, **kwargs) diff --git a/debug/__init__.py b/debug/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/debug/httpdebugger.py b/debug/httpdebugger.py new file mode 100644 index 0000000..dcf03af --- /dev/null +++ b/debug/httpdebugger.py @@ -0,0 +1,12 @@ +# 临时加个调试中间件(或异常处理器) +from fastapi.responses import JSONResponse +from fastapi.requests import Request +from fastapi.exceptions import RequestValidationError +from fastapi import FastAPI +from main import app + + +@app.exception_handler(RequestValidationError) +async def validation_exception_handler(request: Request, exc: RequestValidationError): + print("422 detail:", exc.errors()) # 在控制台打印 + return JSONResponse(status_code=422, content={"detail": exc.errors()}) diff --git a/main.py b/main.py index b386793..19a972c 100644 --- a/main.py +++ b/main.py @@ -23,6 +23,8 @@ async def lifespan(app: FastAPI): app = FastAPI(lifespan=lifespan) +import debug.httpdebugger + # 添加CORS中间件 app.add_middleware( CORSMiddleware, diff --git a/scripts/update_jp.py b/scripts/update_jp.py index 923e3af..db3a752 100644 --- a/scripts/update_jp.py +++ b/scripts/update_jp.py @@ -146,8 +146,10 @@ async def import_def_jp(path: Path = xlsx_path, sheet_name: str = "日汉释义" print(f"❌ 查找单词 {word} 出错: {e}") continue + if pd.isna(row[6]): + continue # 字段处理 - example = None if pd.isna(row.日语例句1) else normalize_jp_text(str(row.日语例句1)) + example = None if pd.isna(row.日语例句2) else normalize_jp_text(str(row.日语例句2)) if not pd.isna(row.词性): pos_obj, jump = await pos_process(str(row.词性)) if jump: @@ -155,7 +157,7 @@ async def import_def_jp(path: Path = xlsx_path, sheet_name: str = "日汉释义" else: print(f"❌ {word} 的词性为空,跳过") continue - chi_exp = str(row[4]).strip() + chi_exp = str(row[6]).strip() # 读取第二个释义 exists = await DefinitionJp.filter( word=cls_word, @@ -209,6 +211,21 @@ async def import_attachment(path: Path = xlsx_path, sheet_name: str = "日汉释 ) +async def set_hiragana(xlsx_path: Path = xlsx_path, sheet_name : str="日汉释义"): + df = pd.read_excel(xlsx_path) + df.columns = [col.strip() for col in df.columns] + + for row in df.itertuples(): + word = normalize_jp_text(str(row[1]).strip()) + if pd.isna(word): + break + + hiragana = normalize_jp_text(jaconv.kata2hira(str(row[1]))) if pd.isna(row[2]) else normalize_jp_text(str(row[2])) + romaji = row[3] + + await WordlistJp.filter(text=word).update(hiragana=hiragana) + + async def main(): await Tortoise.init(config=TORTOISE_ORM) # await DefinitionJp.all().delete() # TRUNCATE TABLE definitions_fr; @@ -216,7 +233,8 @@ async def main(): # await AttachmentJp.all().delete() # await import_wordlist_jp() # await import_def_jp() - await import_attachment() + # await import_attachment() + await set_hiragana() if __name__ == '__main__': diff --git a/settings.py b/settings.py index 999028d..368e79b 100644 --- a/settings.py +++ b/settings.py @@ -21,6 +21,26 @@ TORTOISE_ORM = { 'timezone': 'Asia/Shanghai' } +ONLINE_SETTINGS = { + 'connections': { + 'default': 'mysql://root:@124.221.145.135:3306/test_db', + }, + 'apps': { + 'models': { + 'models': [ + 'app.models.base', + 'app.models.fr', + 'app.models.jp', + 'aerich.models' # aerich自带模型类(必须填入) + ], + 'default_connection': 'default', + + } + }, + 'use_tz': False, + 'timezone': 'Asia/Shanghai' +} + class Settings(BaseSettings): USE_OAUTH = False