settings.py:
-更新用户名(由于数据库连接更新) -更新了多数据库连接(预留prod_db备用) backfill_search_text.py: -统一回填search_text脚本 signals.py: -后续加入内容时自动处理text为search_text ./scripts: -数据库导入脚本
This commit is contained in:
parent
264315ae9d
commit
fde510803e
|
|
@ -4,7 +4,7 @@
|
|||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.12 (dict_server)" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.12 (dict_server) (2)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
@ -3,5 +3,5 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.12 (dict_server)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (dict_server)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (dict_server) (2)" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
|
|
@ -6,7 +6,7 @@ import redis.asyncio as redis
|
|||
|
||||
from app.models.base import ReservedWords, User, Language
|
||||
from app.utils.security import verify_password, hash_password, validate_password, validate_username, get_current_user
|
||||
from settings import SECRET_KEY
|
||||
from settings import settings
|
||||
from app.core.redis import get_redis
|
||||
|
||||
from app.schemas.user_schemas import UserIn, UserOut, UpdateUserRequest, UserLoginRequest
|
||||
|
|
@ -67,7 +67,7 @@ async def user_login(user_in: UserLoginRequest):
|
|||
"is_admin": user.is_admin,
|
||||
}
|
||||
|
||||
token = jwt.encode(payload, SECRET_KEY, algorithm="HS256")
|
||||
token = jwt.encode(payload, settings.SECRET_KEY, algorithm="HS256")
|
||||
|
||||
return {
|
||||
"access_token": token,
|
||||
|
|
|
|||
116
app/models/fr.py
116
app/models/fr.py
|
|
@ -12,42 +12,17 @@ sheet_name_fr = "法英中释义"
|
|||
|
||||
class WordlistFr(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
language = fields.CharField(max_length=20, description="单词语种")
|
||||
text = fields.CharField(max_length=40, unique=True, description="单词")
|
||||
definitions = fields.ReverseRelation("DefinitionFr")
|
||||
attachments = fields.ReverseRelation("AttachmentsFr")
|
||||
definitions: fields.ReverseRelation["DefinitionFr"]
|
||||
attachments: fields.ReverseRelation["AttachmentFr"]
|
||||
freq = fields.IntField() # 词频排序用
|
||||
search_text = fields.CharField(max_length=255, index=True) # 检索字段
|
||||
|
||||
# attachment = fields.ForeignKeyField("models.Attachment", related_name="wordlists", on_delete=fields.CASCADE)
|
||||
# source = fields.CharField(max_length=20, description="<UNK>", null=True)
|
||||
class Meta:
|
||||
table = "wordlist_fr"
|
||||
|
||||
T = TypeVar("T", bound=Model)
|
||||
|
||||
@classmethod
|
||||
async def update_or_create(cls: Type[T], **kwargs) -> Tuple[T, bool]:
|
||||
print("传入参数为:", kwargs)
|
||||
if not kwargs:
|
||||
raise ValueError("必须提供至少一个字段作为参数")
|
||||
|
||||
created: bool = False
|
||||
|
||||
# 使用 kwargs 中第一个字段作为查找条件
|
||||
first_key = next(iter(kwargs))
|
||||
lookup = {first_key: kwargs[first_key]}
|
||||
|
||||
word = await cls.filter(**lookup).first() # 参数展开语法
|
||||
if word:
|
||||
for k, v in kwargs.items():
|
||||
if k != first_key:
|
||||
setattr(word, k, v)
|
||||
await word.save()
|
||||
else:
|
||||
await cls.create(**kwargs)
|
||||
created = True
|
||||
|
||||
return word, created
|
||||
|
||||
|
||||
class AttachmentFr(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
|
|
@ -63,91 +38,10 @@ class AttachmentFr(Model):
|
|||
class DefinitionFr(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
word = fields.ForeignKeyField("models.WordlistFr", related_name="definitions", on_delete=fields.CASCADE)
|
||||
pos = fields.CharEnumField(PosEnumFr, max_length=30) # ✅ 把词性放在释义层面
|
||||
pos = fields.CharEnumField(PosEnumFr, max_length=30, null=True) # ✅ 把词性放在释义层面
|
||||
meaning = fields.TextField(description="单词释义") # 如:“学习”
|
||||
example = fields.TextField(null=True, description="单词例句")
|
||||
eng_explanation = fields.TextField(null=True, description="English explanation")
|
||||
|
||||
class Meta:
|
||||
table = "definitions_fr"
|
||||
|
||||
@classmethod
|
||||
async def init_from_xlsx(
|
||||
cls,
|
||||
filepath: str,
|
||||
sheet_name: str
|
||||
):
|
||||
"""
|
||||
Initiate the database from xlsx file. Only read in data without checking
|
||||
whether the content already exists.
|
||||
:param filepath: receive both relative or absolute path
|
||||
:param sheet_name: specific sheet name inside the .xlsx file
|
||||
:return: None
|
||||
"""
|
||||
df = pd.read_excel(filepath, sheet_name=sheet_name, na_filter=True)
|
||||
df.columns = [col.strip() for col in df.columns]
|
||||
df.dropna(how="all", inplace=True)
|
||||
|
||||
# create_cnt = 0
|
||||
DEF_COUNT = 1
|
||||
|
||||
for row in df.itertuples():
|
||||
word = row.单词
|
||||
cls_word = await WordlistFr.filter(text=word).first()
|
||||
if cls_word is None:
|
||||
print(f"未找到 word: {word}")
|
||||
continue
|
||||
pos = getattr(row, f"词性{DEF_COUNT}")
|
||||
if pd.isna(pos):
|
||||
continue
|
||||
meaning = getattr(row, f"中文释义{DEF_COUNT}")
|
||||
eng_exp = getattr(row, f"英语释义{DEF_COUNT}")
|
||||
await DefinitionFr.create(
|
||||
part_of_speech=pos,
|
||||
meaning=meaning,
|
||||
eng_explanation=eng_exp,
|
||||
word=cls_word
|
||||
)
|
||||
|
||||
# TODO revise the function (check update or create by id)
|
||||
@classmethod
|
||||
async def update_or_create_meaning(
|
||||
cls,
|
||||
word_obj,
|
||||
target_language_obj,
|
||||
part_of_speech: str,
|
||||
meaning: str,
|
||||
example: str = None,
|
||||
eng_explanation: str = None,
|
||||
) -> tuple["DefinitionFr", bool]:
|
||||
"""
|
||||
查询某个单词是否已有该释义(依据四元组作为唯一标识),存在则更新,不存在则新增。
|
||||
返回:(对象, 是否为新创建)
|
||||
"""
|
||||
query = {
|
||||
"word": word_obj,
|
||||
"target_language": target_language_obj,
|
||||
"part_of_speech": part_of_speech,
|
||||
"meaning": meaning
|
||||
}
|
||||
|
||||
obj = await cls.filter(**query).first()
|
||||
created = False
|
||||
|
||||
if obj:
|
||||
# 可更新其他字段
|
||||
obj.example = example
|
||||
obj.eng_explanation = eng_explanation
|
||||
await obj.save()
|
||||
else:
|
||||
obj = await cls.create(
|
||||
word=word_obj,
|
||||
target_language=target_language_obj,
|
||||
part_of_speech=part_of_speech,
|
||||
meaning=meaning,
|
||||
example=example,
|
||||
eng_explanation=eng_explanation,
|
||||
)
|
||||
created = True
|
||||
|
||||
return obj, created
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ sheet_name_jp = "日汉释义"
|
|||
class WordlistJp(Model):
|
||||
id = fields.IntField(pk=True)
|
||||
text = fields.CharField(max_length=40, description="单词")
|
||||
definitions = fields.ReverseRelation("DefinitionJp")
|
||||
attachments = fields.ReverseRelation("AttachmentsJp")
|
||||
definitions : fields.ReverseRelation["DefinitionJp"]
|
||||
attachments : fields.ReverseRelation["AttachmentJp"]
|
||||
|
||||
class Meta:
|
||||
table = "wordlist_jp"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
from tortoise.signals import pre_save
|
||||
from tortoise import BaseDBAsyncClient
|
||||
from typing import Optional
|
||||
|
||||
from app.utils.textnorm import normalize_text
|
||||
from app.models.fr import WordlistFr
|
||||
|
||||
|
||||
@pre_save(WordlistFr)
|
||||
async def wordlist_fr_pre_save(
|
||||
sender: type[WordlistFr],
|
||||
instance: WordlistFr,
|
||||
using_db: BaseDBAsyncClient,
|
||||
update_fields: Optional[list[str]]
|
||||
) -> None:
|
||||
"""
|
||||
仅当 text 变更时,同步 search_text。
|
||||
- 新建:总是写入 search_text
|
||||
- 修改:只有当 text 在本次更新范围内,或 text 实际发生变化时才更新
|
||||
- 若调用方用了 update_fields,只包含 text,则自动把 'search_text' 追加进去,确保写回
|
||||
"""
|
||||
desired = normalize_text(instance.text or "")
|
||||
# 不变则不写,减少无谓 UPDATE
|
||||
if instance.search_text == desired:
|
||||
return
|
||||
|
||||
# 情况 1:完整更新(没有传 update_fields)
|
||||
if update_fields is None:
|
||||
instance.search_text = desired
|
||||
return # ✅ 会写入
|
||||
|
||||
# 情况 2:部分更新——只有当这次确实更新了 text,才同步 search_text
|
||||
if "text" in update_fields:
|
||||
instance.search_text = desired
|
||||
# update_fields 可能是 tuple,转成 list 再补充
|
||||
fields = list(update_fields)
|
||||
if "search_text" not in fields:
|
||||
fields.append("search_text")
|
||||
# 交还给 ORM:确保此次 UPDATE 包含 search_text
|
||||
instance._update_fields = fields
|
||||
# 否则(这次没更 text),不动 search_text
|
||||
|
|
@ -3,10 +3,6 @@ from enum import Enum
|
|||
from pydantic import BaseModel, validator, field_validator, Field
|
||||
from typing import Optional, Literal, List
|
||||
|
||||
from tortoise.exceptions import DoesNotExist
|
||||
|
||||
from app.models.fr import WordlistFr
|
||||
|
||||
|
||||
class PosEnumFr(str, Enum):
|
||||
# noun
|
||||
|
|
@ -21,6 +17,10 @@ class PosEnumFr(str, Enum):
|
|||
v_i = "v.i."
|
||||
v_pr = "v.pr."
|
||||
v_t_i = "v.t./v.i."
|
||||
v_t_dir = "v.t.dir."
|
||||
v_t_ind = "v.t.ind."
|
||||
v_t_pr = "v.t.(v.pr.)"
|
||||
v_i_ind = "v.t.ind./v.i."
|
||||
|
||||
adj = "adj." # adj
|
||||
adv = "adv." # adv
|
||||
|
|
@ -29,6 +29,8 @@ class PosEnumFr(str, Enum):
|
|||
conj = "conj."
|
||||
interj = "interj."
|
||||
chauff = "chauff"
|
||||
art = "art."
|
||||
|
||||
|
||||
|
||||
class PosEnumJp(str, Enum):
|
||||
|
|
@ -55,18 +57,18 @@ class CreateWord(BaseModel):
|
|||
@classmethod
|
||||
@field_validator("eng_explanation")
|
||||
def validate_eng_explanation(cls, v):
|
||||
if cls.language is "jp" and v:
|
||||
if cls.language == "jp" and v:
|
||||
raise ValueError("Japanese word has no English explanation")
|
||||
if cls.language is "fr" and v is None or v == "":
|
||||
if cls.language == "fr" and v is None or v == "":
|
||||
raise ValueError("French word must have English explanation")
|
||||
return v
|
||||
|
||||
@classmethod
|
||||
@field_validator("pos")
|
||||
def validate_pos(cls, v):
|
||||
if cls.language is "fr" and v not in PosEnumFr:
|
||||
if cls.language == "fr" and v not in PosEnumFr:
|
||||
raise ValueError("Pos is not a valid type")
|
||||
if cls.language is "jp" and v not in PosEnumJp:
|
||||
if cls.language == "jp" and v not in PosEnumJp:
|
||||
raise ValueError("Pos is not a valid type")
|
||||
return v
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,23 @@
|
|||
import re
|
||||
import unicodedata
|
||||
|
||||
|
||||
def normalize_text(s: str) -> str:
|
||||
"""
|
||||
规范化字符串,用于搜索/存储 search_text
|
||||
- Unicode 标准化
|
||||
- 去除重音符号(é -> e)
|
||||
- 转小写
|
||||
- 去掉前后空格,多空格合并
|
||||
"""
|
||||
if not s:
|
||||
return ""
|
||||
# 1. Unicode 标准化(NFKD 拆分)
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
# 2. 去掉音标/重音符
|
||||
s = "".join(ch for ch in s if not unicodedata.combining(ch))
|
||||
# 3. 转小写
|
||||
s = s.lower()
|
||||
# 4. 去掉首尾空格 & 合并多个空格
|
||||
s = re.sub(r"\s+", " ", s.strip())
|
||||
return s
|
||||
1
main.py
1
main.py
|
|
@ -8,6 +8,7 @@ from settings import TORTOISE_ORM
|
|||
from app.api.users import users_router
|
||||
from app.api.admin.router import admin_router
|
||||
from app.core.redis import init_redis_pool
|
||||
import app.models.signals
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -0,0 +1,17 @@
|
|||
import asyncio
|
||||
from tortoise import Tortoise, run_async
|
||||
from app.models.fr import WordlistFr
|
||||
from app.utils.textnorm import normalize_text
|
||||
from settings import TORTOISE_ORM
|
||||
|
||||
async def main():
|
||||
await Tortoise.init(config=TORTOISE_ORM)
|
||||
async for w in WordlistFr.all().only("id", "text", "search_text"): # type: WordlistFr
|
||||
want = normalize_text(w.text)
|
||||
if w.search_text != want:
|
||||
w.search_text = want
|
||||
await w.save(update_fields=["search_text"])
|
||||
await Tortoise.close_connections()
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_async(main())
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
from tortoise import Tortoise
|
||||
from tortoise.exceptions import MultipleObjectsReturned
|
||||
|
||||
from app.models.fr import DefinitionFr, WordlistFr
|
||||
from settings import TORTOISE_ORM
|
||||
import app.models.signals
|
||||
|
||||
xlsx_name = "./DictTable_20250811.xlsx"
|
||||
xlsx_path = Path(xlsx_name)
|
||||
|
||||
|
||||
def pos_process(pos: str) -> str:
|
||||
pos = pos.replace(" ", "")
|
||||
pos = pos.replace(",", "")
|
||||
if not pos.endswith(".") and not pos.endswith(")") and pos != "chauff":
|
||||
pos = pos + "."
|
||||
return pos
|
||||
|
||||
|
||||
async def import_wordlist_fr(path: Path = xlsx_path, sheet_name: str = "法英中释义"):
|
||||
df = pd.read_excel(path, sheet_name=sheet_name)
|
||||
df.columns = [col.strip() for col in df.columns]
|
||||
|
||||
for row in df.itertuples():
|
||||
word = str(row.单词).strip()
|
||||
if pd.isna(word):
|
||||
break
|
||||
|
||||
word_obj, created = await WordlistFr.get_or_create(text=word, defaults={"freq": 0})
|
||||
if created:
|
||||
print(f"✅ 新增词条: {word}")
|
||||
else:
|
||||
print(f"⚠️ 已存在: {word},跳过")
|
||||
|
||||
|
||||
async def import_def_fr(
|
||||
path: Path = xlsx_path,
|
||||
sheet_name: str = "法英中释义"
|
||||
):
|
||||
df = pd.read_excel(path, sheet_name=sheet_name)
|
||||
df.columns = [col.strip() for col in df.columns]
|
||||
|
||||
for row in df.itertuples():
|
||||
word = row.单词
|
||||
if pd.isna(word):
|
||||
continue
|
||||
|
||||
word = str(word).strip()
|
||||
|
||||
# 查找 WordlistFr 实例(注意异常处理)
|
||||
try:
|
||||
cls_word = await WordlistFr.get(text=word)
|
||||
except MultipleObjectsReturned:
|
||||
ids = await WordlistFr.filter(text=word).values_list("id", flat=True)
|
||||
print(f"❗ 重复单词 {word},id为: {' '.join(str(i) for i in ids)}")
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"❌ 查找单词 {word} 出错: {e}")
|
||||
continue
|
||||
|
||||
# 字段处理
|
||||
example = None if pd.isna(row.法语例句1) else str(row.法语例句1).strip()
|
||||
pos = None if pd.isna(row.词性1) else pos_process(str(row.词性1).strip())
|
||||
eng_exp = None if pd.isna(row.英语释义1) else str(row.英语释义1).strip()
|
||||
chi_exp = str(row[2]).strip()
|
||||
|
||||
# 去重:同一个词条不能有重复释义(同 pos + meaning)
|
||||
exists = await DefinitionFr.filter(
|
||||
word=cls_word,
|
||||
pos=pos,
|
||||
meaning=chi_exp
|
||||
).exists()
|
||||
if exists:
|
||||
print(f"⚠️ 已存在释义,跳过:{word} - {pos} - {chi_exp[:10]}...")
|
||||
continue
|
||||
|
||||
# 创建定义
|
||||
try:
|
||||
await DefinitionFr.create(
|
||||
word=cls_word,
|
||||
pos=pos,
|
||||
eng_explanation=eng_exp,
|
||||
meaning=chi_exp,
|
||||
example=example,
|
||||
)
|
||||
print(f"✅ 导入释义:{word} - {pos}")
|
||||
except Exception as e:
|
||||
print(f"❌ 插入释义失败:{word} - {pos},错误: {e}")
|
||||
|
||||
|
||||
async def main():
|
||||
await Tortoise.init(config=TORTOISE_ORM)
|
||||
await DefinitionFr.all().delete()
|
||||
await import_def_fr()
|
||||
# await import_wordlist_fr()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
19
settings.py
19
settings.py
|
|
@ -2,21 +2,8 @@ from pydantic.v1 import BaseSettings
|
|||
|
||||
TORTOISE_ORM = {
|
||||
'connections': {
|
||||
'default': {
|
||||
# 'engine': 'tortoise.backends.asyncpg', PostgreSQL
|
||||
'engine': 'tortoise.backends.mysql', # MySQL or Mariadb
|
||||
'credentials': {
|
||||
'host': '127.0.0.1',
|
||||
'port': '3306',
|
||||
'user': 'root',
|
||||
'password': 'enterprise',
|
||||
'database': 'dict',
|
||||
'minsize': 1,
|
||||
'maxsize': 5,
|
||||
'charset': 'utf8mb4',
|
||||
"echo": True
|
||||
}
|
||||
},
|
||||
"default": "mysql://local_admin:enterprise@127.0.0.1:3306/dict",
|
||||
"production": "mysql://local_admin:enterprise@127.0.0.1:3306/prod_db",
|
||||
},
|
||||
'apps': {
|
||||
'models': {
|
||||
|
|
@ -34,8 +21,10 @@ TORTOISE_ORM = {
|
|||
'timezone': 'Asia/Shanghai'
|
||||
}
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
USE_OAUTH = False
|
||||
SECRET_KEY = "asdasdasd-odjfnsodfnosidnfdf-0oq2j01j0jf0i1ej0fij10fd"
|
||||
|
||||
|
||||
settings = Settings()
|
||||
|
|
|
|||
Loading…
Reference in New Issue