diff --git a/.idea/dict_server.iml b/.idea/dict_server.iml
index 5305fe2..53b48ab 100644
--- a/.idea/dict_server.iml
+++ b/.idea/dict_server.iml
@@ -4,7 +4,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index dbda99f..1b75482 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,5 +3,5 @@
-
+
\ No newline at end of file
diff --git a/app/api/users.py b/app/api/users.py
index 7f34085..c99f7e8 100644
--- a/app/api/users.py
+++ b/app/api/users.py
@@ -6,7 +6,7 @@ import redis.asyncio as redis
from app.models.base import ReservedWords, User, Language
from app.utils.security import verify_password, hash_password, validate_password, validate_username, get_current_user
-from settings import SECRET_KEY
+from settings import settings
from app.core.redis import get_redis
from app.schemas.user_schemas import UserIn, UserOut, UpdateUserRequest, UserLoginRequest
@@ -67,7 +67,7 @@ async def user_login(user_in: UserLoginRequest):
"is_admin": user.is_admin,
}
- token = jwt.encode(payload, SECRET_KEY, algorithm="HS256")
+ token = jwt.encode(payload, settings.SECRET_KEY, algorithm="HS256")
return {
"access_token": token,
diff --git a/app/models/fr.py b/app/models/fr.py
index 42825ac..60e7eb3 100644
--- a/app/models/fr.py
+++ b/app/models/fr.py
@@ -12,42 +12,17 @@ sheet_name_fr = "法英中释义"
class WordlistFr(Model):
id = fields.IntField(pk=True)
- language = fields.CharField(max_length=20, description="单词语种")
text = fields.CharField(max_length=40, unique=True, description="单词")
- definitions = fields.ReverseRelation("DefinitionFr")
- attachments = fields.ReverseRelation("AttachmentsFr")
+ definitions: fields.ReverseRelation["DefinitionFr"]
+ attachments: fields.ReverseRelation["AttachmentFr"]
+ freq = fields.IntField() # 词频排序用
+ search_text = fields.CharField(max_length=255, index=True) # 检索字段
# attachment = fields.ForeignKeyField("models.Attachment", related_name="wordlists", on_delete=fields.CASCADE)
# source = fields.CharField(max_length=20, description="", null=True)
class Meta:
table = "wordlist_fr"
- T = TypeVar("T", bound=Model)
-
- @classmethod
- async def update_or_create(cls: Type[T], **kwargs) -> Tuple[T, bool]:
- print("传入参数为:", kwargs)
- if not kwargs:
- raise ValueError("必须提供至少一个字段作为参数")
-
- created: bool = False
-
- # 使用 kwargs 中第一个字段作为查找条件
- first_key = next(iter(kwargs))
- lookup = {first_key: kwargs[first_key]}
-
- word = await cls.filter(**lookup).first() # 参数展开语法
- if word:
- for k, v in kwargs.items():
- if k != first_key:
- setattr(word, k, v)
- await word.save()
- else:
- await cls.create(**kwargs)
- created = True
-
- return word, created
-
class AttachmentFr(Model):
id = fields.IntField(pk=True)
@@ -63,91 +38,10 @@ class AttachmentFr(Model):
class DefinitionFr(Model):
id = fields.IntField(pk=True)
word = fields.ForeignKeyField("models.WordlistFr", related_name="definitions", on_delete=fields.CASCADE)
- pos = fields.CharEnumField(PosEnumFr, max_length=30) # ✅ 把词性放在释义层面
+ pos = fields.CharEnumField(PosEnumFr, max_length=30, null=True) # ✅ 把词性放在释义层面
meaning = fields.TextField(description="单词释义") # 如:“学习”
example = fields.TextField(null=True, description="单词例句")
eng_explanation = fields.TextField(null=True, description="English explanation")
class Meta:
table = "definitions_fr"
-
- @classmethod
- async def init_from_xlsx(
- cls,
- filepath: str,
- sheet_name: str
- ):
- """
- Initiate the database from xlsx file. Only read in data without checking
- whether the content already exists.
- :param filepath: receive both relative or absolute path
- :param sheet_name: specific sheet name inside the .xlsx file
- :return: None
- """
- df = pd.read_excel(filepath, sheet_name=sheet_name, na_filter=True)
- df.columns = [col.strip() for col in df.columns]
- df.dropna(how="all", inplace=True)
-
- # create_cnt = 0
- DEF_COUNT = 1
-
- for row in df.itertuples():
- word = row.单词
- cls_word = await WordlistFr.filter(text=word).first()
- if cls_word is None:
- print(f"未找到 word: {word}")
- continue
- pos = getattr(row, f"词性{DEF_COUNT}")
- if pd.isna(pos):
- continue
- meaning = getattr(row, f"中文释义{DEF_COUNT}")
- eng_exp = getattr(row, f"英语释义{DEF_COUNT}")
- await DefinitionFr.create(
- part_of_speech=pos,
- meaning=meaning,
- eng_explanation=eng_exp,
- word=cls_word
- )
-
- # TODO revise the function (check update or create by id)
- @classmethod
- async def update_or_create_meaning(
- cls,
- word_obj,
- target_language_obj,
- part_of_speech: str,
- meaning: str,
- example: str = None,
- eng_explanation: str = None,
- ) -> tuple["DefinitionFr", bool]:
- """
- 查询某个单词是否已有该释义(依据四元组作为唯一标识),存在则更新,不存在则新增。
- 返回:(对象, 是否为新创建)
- """
- query = {
- "word": word_obj,
- "target_language": target_language_obj,
- "part_of_speech": part_of_speech,
- "meaning": meaning
- }
-
- obj = await cls.filter(**query).first()
- created = False
-
- if obj:
- # 可更新其他字段
- obj.example = example
- obj.eng_explanation = eng_explanation
- await obj.save()
- else:
- obj = await cls.create(
- word=word_obj,
- target_language=target_language_obj,
- part_of_speech=part_of_speech,
- meaning=meaning,
- example=example,
- eng_explanation=eng_explanation,
- )
- created = True
-
- return obj, created
diff --git a/app/models/jp.py b/app/models/jp.py
index 671c925..a43b398 100644
--- a/app/models/jp.py
+++ b/app/models/jp.py
@@ -16,8 +16,8 @@ sheet_name_jp = "日汉释义"
class WordlistJp(Model):
id = fields.IntField(pk=True)
text = fields.CharField(max_length=40, description="单词")
- definitions = fields.ReverseRelation("DefinitionJp")
- attachments = fields.ReverseRelation("AttachmentsJp")
+ definitions : fields.ReverseRelation["DefinitionJp"]
+ attachments : fields.ReverseRelation["AttachmentJp"]
class Meta:
table = "wordlist_jp"
diff --git a/app/models/signals.py b/app/models/signals.py
new file mode 100644
index 0000000..75c113c
--- /dev/null
+++ b/app/models/signals.py
@@ -0,0 +1,41 @@
+from tortoise.signals import pre_save
+from tortoise import BaseDBAsyncClient
+from typing import Optional
+
+from app.utils.textnorm import normalize_text
+from app.models.fr import WordlistFr
+
+
+@pre_save(WordlistFr)
+async def wordlist_fr_pre_save(
+ sender: type[WordlistFr],
+ instance: WordlistFr,
+ using_db: BaseDBAsyncClient,
+ update_fields: Optional[list[str]]
+) -> None:
+ """
+ 仅当 text 变更时,同步 search_text。
+ - 新建:总是写入 search_text
+ - 修改:只有当 text 在本次更新范围内,或 text 实际发生变化时才更新
+ - 若调用方用了 update_fields,只包含 text,则自动把 'search_text' 追加进去,确保写回
+ """
+ desired = normalize_text(instance.text or "")
+ # 不变则不写,减少无谓 UPDATE
+ if instance.search_text == desired:
+ return
+
+ # 情况 1:完整更新(没有传 update_fields)
+ if update_fields is None:
+ instance.search_text = desired
+ return # ✅ 会写入
+
+ # 情况 2:部分更新——只有当这次确实更新了 text,才同步 search_text
+ if "text" in update_fields:
+ instance.search_text = desired
+ # update_fields 可能是 tuple,转成 list 再补充
+ fields = list(update_fields)
+ if "search_text" not in fields:
+ fields.append("search_text")
+ # 交还给 ORM:确保此次 UPDATE 包含 search_text
+ instance._update_fields = fields
+ # 否则(这次没更 text),不动 search_text
diff --git a/app/schemas/admin_schemas.py b/app/schemas/admin_schemas.py
index e8c5dfd..18c911e 100644
--- a/app/schemas/admin_schemas.py
+++ b/app/schemas/admin_schemas.py
@@ -3,10 +3,6 @@ from enum import Enum
from pydantic import BaseModel, validator, field_validator, Field
from typing import Optional, Literal, List
-from tortoise.exceptions import DoesNotExist
-
-from app.models.fr import WordlistFr
-
class PosEnumFr(str, Enum):
# noun
@@ -21,6 +17,10 @@ class PosEnumFr(str, Enum):
v_i = "v.i."
v_pr = "v.pr."
v_t_i = "v.t./v.i."
+ v_t_dir = "v.t.dir."
+ v_t_ind = "v.t.ind."
+ v_t_pr = "v.t.(v.pr.)"
+ v_i_ind = "v.t.ind./v.i."
adj = "adj." # adj
adv = "adv." # adv
@@ -29,6 +29,8 @@ class PosEnumFr(str, Enum):
conj = "conj."
interj = "interj."
chauff = "chauff"
+ art = "art."
+
class PosEnumJp(str, Enum):
@@ -55,18 +57,18 @@ class CreateWord(BaseModel):
@classmethod
@field_validator("eng_explanation")
def validate_eng_explanation(cls, v):
- if cls.language is "jp" and v:
+ if cls.language == "jp" and v:
raise ValueError("Japanese word has no English explanation")
- if cls.language is "fr" and v is None or v == "":
+ if cls.language == "fr" and v is None or v == "":
raise ValueError("French word must have English explanation")
return v
@classmethod
@field_validator("pos")
def validate_pos(cls, v):
- if cls.language is "fr" and v not in PosEnumFr:
+ if cls.language == "fr" and v not in PosEnumFr:
raise ValueError("Pos is not a valid type")
- if cls.language is "jp" and v not in PosEnumJp:
+ if cls.language == "jp" and v not in PosEnumJp:
raise ValueError("Pos is not a valid type")
return v
diff --git a/app/utils/textnorm.py b/app/utils/textnorm.py
new file mode 100644
index 0000000..92cbe4e
--- /dev/null
+++ b/app/utils/textnorm.py
@@ -0,0 +1,23 @@
+import re
+import unicodedata
+
+
+def normalize_text(s: str) -> str:
+ """
+ 规范化字符串,用于搜索/存储 search_text
+ - Unicode 标准化
+ - 去除重音符号(é -> e)
+ - 转小写
+ - 去掉前后空格,多空格合并
+ """
+ if not s:
+ return ""
+ # 1. Unicode 标准化(NFKD 拆分)
+ s = unicodedata.normalize("NFKD", s)
+ # 2. 去掉音标/重音符
+ s = "".join(ch for ch in s if not unicodedata.combining(ch))
+ # 3. 转小写
+ s = s.lower()
+ # 4. 去掉首尾空格 & 合并多个空格
+ s = re.sub(r"\s+", " ", s.strip())
+ return s
diff --git a/main.py b/main.py
index 8b6f502..4eccc35 100644
--- a/main.py
+++ b/main.py
@@ -8,6 +8,7 @@ from settings import TORTOISE_ORM
from app.api.users import users_router
from app.api.admin.router import admin_router
from app.core.redis import init_redis_pool
+import app.models.signals
@asynccontextmanager
diff --git a/scripts/DictTable_20250811.xlsx b/scripts/DictTable_20250811.xlsx
new file mode 100644
index 0000000..03db959
Binary files /dev/null and b/scripts/DictTable_20250811.xlsx differ
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/backfill_search_text.py b/scripts/backfill_search_text.py
new file mode 100644
index 0000000..f93ce6e
--- /dev/null
+++ b/scripts/backfill_search_text.py
@@ -0,0 +1,17 @@
+import asyncio
+from tortoise import Tortoise, run_async
+from app.models.fr import WordlistFr
+from app.utils.textnorm import normalize_text
+from settings import TORTOISE_ORM
+
+async def main():
+ await Tortoise.init(config=TORTOISE_ORM)
+ async for w in WordlistFr.all().only("id", "text", "search_text"): # type: WordlistFr
+ want = normalize_text(w.text)
+ if w.search_text != want:
+ w.search_text = want
+ await w.save(update_fields=["search_text"])
+ await Tortoise.close_connections()
+
+if __name__ == "__main__":
+ run_async(main())
\ No newline at end of file
diff --git a/scripts/update_fr.py b/scripts/update_fr.py
new file mode 100644
index 0000000..9389c37
--- /dev/null
+++ b/scripts/update_fr.py
@@ -0,0 +1,103 @@
+import asyncio
+from pathlib import Path
+
+import pandas as pd
+from tortoise import Tortoise
+from tortoise.exceptions import MultipleObjectsReturned
+
+from app.models.fr import DefinitionFr, WordlistFr
+from settings import TORTOISE_ORM
+import app.models.signals
+
+xlsx_name = "./DictTable_20250811.xlsx"
+xlsx_path = Path(xlsx_name)
+
+
+def pos_process(pos: str) -> str:
+ pos = pos.replace(" ", "")
+ pos = pos.replace(",", "")
+ if not pos.endswith(".") and not pos.endswith(")") and pos != "chauff":
+ pos = pos + "."
+ return pos
+
+
+async def import_wordlist_fr(path: Path = xlsx_path, sheet_name: str = "法英中释义"):
+ df = pd.read_excel(path, sheet_name=sheet_name)
+ df.columns = [col.strip() for col in df.columns]
+
+ for row in df.itertuples():
+ word = str(row.单词).strip()
+ if pd.isna(word):
+ break
+
+ word_obj, created = await WordlistFr.get_or_create(text=word, defaults={"freq": 0})
+ if created:
+ print(f"✅ 新增词条: {word}")
+ else:
+ print(f"⚠️ 已存在: {word},跳过")
+
+
+async def import_def_fr(
+ path: Path = xlsx_path,
+ sheet_name: str = "法英中释义"
+):
+ df = pd.read_excel(path, sheet_name=sheet_name)
+ df.columns = [col.strip() for col in df.columns]
+
+ for row in df.itertuples():
+ word = row.单词
+ if pd.isna(word):
+ continue
+
+ word = str(word).strip()
+
+ # 查找 WordlistFr 实例(注意异常处理)
+ try:
+ cls_word = await WordlistFr.get(text=word)
+ except MultipleObjectsReturned:
+ ids = await WordlistFr.filter(text=word).values_list("id", flat=True)
+ print(f"❗ 重复单词 {word},id为: {' '.join(str(i) for i in ids)}")
+ continue
+ except Exception as e:
+ print(f"❌ 查找单词 {word} 出错: {e}")
+ continue
+
+ # 字段处理
+ example = None if pd.isna(row.法语例句1) else str(row.法语例句1).strip()
+ pos = None if pd.isna(row.词性1) else pos_process(str(row.词性1).strip())
+ eng_exp = None if pd.isna(row.英语释义1) else str(row.英语释义1).strip()
+ chi_exp = str(row[2]).strip()
+
+ # 去重:同一个词条不能有重复释义(同 pos + meaning)
+ exists = await DefinitionFr.filter(
+ word=cls_word,
+ pos=pos,
+ meaning=chi_exp
+ ).exists()
+ if exists:
+ print(f"⚠️ 已存在释义,跳过:{word} - {pos} - {chi_exp[:10]}...")
+ continue
+
+ # 创建定义
+ try:
+ await DefinitionFr.create(
+ word=cls_word,
+ pos=pos,
+ eng_explanation=eng_exp,
+ meaning=chi_exp,
+ example=example,
+ )
+ print(f"✅ 导入释义:{word} - {pos}")
+ except Exception as e:
+ print(f"❌ 插入释义失败:{word} - {pos},错误: {e}")
+
+
+async def main():
+ await Tortoise.init(config=TORTOISE_ORM)
+ await DefinitionFr.all().delete()
+ await import_def_fr()
+ # await import_wordlist_fr()
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/settings.py b/settings.py
index 2981ee6..999028d 100644
--- a/settings.py
+++ b/settings.py
@@ -2,21 +2,8 @@ from pydantic.v1 import BaseSettings
TORTOISE_ORM = {
'connections': {
- 'default': {
- # 'engine': 'tortoise.backends.asyncpg', PostgreSQL
- 'engine': 'tortoise.backends.mysql', # MySQL or Mariadb
- 'credentials': {
- 'host': '127.0.0.1',
- 'port': '3306',
- 'user': 'root',
- 'password': 'enterprise',
- 'database': 'dict',
- 'minsize': 1,
- 'maxsize': 5,
- 'charset': 'utf8mb4',
- "echo": True
- }
- },
+ "default": "mysql://local_admin:enterprise@127.0.0.1:3306/dict",
+ "production": "mysql://local_admin:enterprise@127.0.0.1:3306/prod_db",
},
'apps': {
'models': {
@@ -34,8 +21,10 @@ TORTOISE_ORM = {
'timezone': 'Asia/Shanghai'
}
+
class Settings(BaseSettings):
USE_OAUTH = False
SECRET_KEY = "asdasdasd-odjfnsodfnosidnfdf-0oq2j01j0jf0i1ej0fij10fd"
+
settings = Settings()