From ff9b8d0f4ed3fe0d57fead85affb56cb2ac0b774 Mon Sep 17 00:00:00 2001 From: Miyamizu-MitsuhaSang <2510681107@qq.com> Date: Sat, 1 Nov 2025 20:11:32 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0AI=E5=86=99=E4=BD=9C=E6=8C=87?= =?UTF-8?q?=E5=AF=BC=E6=A8=A1=E5=9D=97=20=E6=96=B0=E5=A2=9E=E5=8F=91?= =?UTF-8?q?=E9=9F=B3=E6=B5=8B=E8=AF=95=E6=A8=A1=E5=9D=97=20=E7=BF=BB?= =?UTF-8?q?=E8=AF=91=E6=A8=A1=E5=9D=97=E4=BD=BF=E7=94=A8=E5=AE=9E=E6=97=B6?= =?UTF-8?q?=E5=88=B7=E6=96=B0=E7=AD=96=E7=95=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 529 ++++++++++++++++-- app/api/ai_assist/routes.py | 4 +- app/api/article_director/__init__.py | 0 app/api/article_director/article_schemas.py | 10 + app/api/article_director/routes.py | 103 ++++ app/api/article_director/service.py | 64 +++ app/api/pronounciation_test/routes.py | 309 +++++++++- app/api/pronounciation_test/service.py | 345 ++++++++++++ app/api/search_dict/__init__.py | 0 app/api/{search.py => search_dict/routes.py} | 38 +- .../search_dict}/search_schemas.py | 12 +- app/api/search_dict/service.py | 208 +++++++ app/api/translator.py | 9 +- app/article_teacher.py | 26 - app/core/email_utils.py | 1 - app/models/__init__.py | 4 +- app/models/base.py | 19 + app/models/fr.py | 19 + app/models/jp.py | 17 +- app/utils/audio_init.py | 13 + app/utils/autocomplete.py | 2 +- main.py | 9 +- pyproject.toml | 4 + requirements.txt | 5 + scripts/fr/__init__.py | 0 scripts/fr/import_proverb.py | 41 ++ scripts/update_fr.py | 1 - settings.py | 25 +- 28 files changed, 1705 insertions(+), 112 deletions(-) create mode 100644 app/api/article_director/__init__.py create mode 100644 app/api/article_director/article_schemas.py create mode 100644 app/api/article_director/routes.py create mode 100644 app/api/article_director/service.py create mode 100644 app/api/pronounciation_test/service.py create mode 100644 app/api/search_dict/__init__.py rename app/api/{search.py => search_dict/routes.py} (76%) rename app/{schemas => api/search_dict}/search_schemas.py (73%) create mode 100644 app/api/search_dict/service.py delete mode 100644 app/article_teacher.py create mode 100644 app/utils/audio_init.py create mode 100644 scripts/fr/__init__.py create mode 100644 scripts/fr/import_proverb.py diff --git a/README.md b/README.md index 94b92c3..31440d7 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ Authorization: Bearer ```json { - "user_email" : "string" + "email" : "string" } ``` @@ -221,53 +221,106 @@ Authorization: Bearer - `200`: 密码重置成功 - `400`: 密码不合法或令牌无效 +#### 1.8 手机找回密码(已废弃) + +> **说明**: 该接口仍在服务端保留,但已不再推荐使用,后续版本可能会移除。 + +- **接口**: `POST /users/auth/forget-password/phone` +- **描述**: 通过手机号码请求验证码以找回密码 +- **请求体**: + +```json +{ + "phone_number": "string" +} +``` + +- **响应**: + +```json +{ + "message": "验证码已发送" +} +``` + +- **状态码**: + - `200`: 发送成功 + - `404`: 手机号未注册 + +#### 1.9 手机验证码验证(已废弃) + +> **说明**: 该接口与 1.8 配合使用,已不再推荐使用。 + +- **接口**: `POST /users/auth/varify_code` +- **描述**: 校验短信验证码是否有效 +- **请求体**: + +```json +{ + "phone": "string", + "code": "string" +} +``` + +- **响应**: + +```json +{ + "message": "验证成功,可以重置密码" +} +``` + +- **状态码**: + - `200`: 验证成功 + - `400`: 验证码错误或已过期 + --- -### 2. 词典搜索模块 +### 2. 词典搜索模块 (`Dictionary Search API`) -#### 2.1 词典搜索 +#### 2.1 单词精确搜索 -- **接口**: `POST /search` -- **描述**: 根据关键词搜索词典内容 +- **接口**: `POST /search/word` +- **描述**: 根据语言精确查询词条,自动累计词频并返回按词性分组的释义。 - **需要认证**: 是 - **请求体**: ```json { - "query": "string", - "language": "fr" | "jp", - "sort": "relevance" | "date", - "order": "asc" | "des" + "query": "bonjour", + "language": "fr", + "sort": "relevance", + "order": "des" } ``` -- **法语响应示例**: +- **法语响应示例** (`language = fr`): ```json { - "query": "string", - "pos": ["n.m.", "v.t."], + "query": "bonjour", + "pos": ["n.m."], "contents": [ { "pos": "n.m.", - "chi_exp": "中文解释", - "eng_explanation": "English explanation", - "example": "例句" + "chi_exp": "问候语;用于见面时打招呼", + "eng_explanation": "greeting; hello", + "example": "Bonjour, comment ça va ?" } ] } ``` -- **日语响应示例**: +- **日语响应示例** (`language = jp`): ```json { - "query": "string", - "pos": ["名词", "动词"], + "query": "日本語", + "pos": ["名词"], "contents": [ { - "chi_exp": "中文解释", - "example": "例句" + "chi_exp": "日语;日本的语言", + "example": "日本語を勉強しています。" } ] } @@ -278,17 +331,77 @@ Authorization: Bearer - `404`: 未找到词条 - `401`: 未授权 -#### 2.2 搜索建议 +#### 2.2 法语谚语详情 -- **接口**: `POST /search/list` -- **描述**: 获取搜索自动完成建议 +- **接口**: `POST /search/proverb` +- **描述**: 通过谚语ID获取法语谚语原文与中文解释。 +- **需要认证**: 是 +- **查询参数**: + - `proverb_id`: 谚语ID (integer) +- **响应**: + +```json +{ + "proverb_text": "Petit à petit, l'oiseau fait son nid.", + "chi_exp": "循序渐进才能取得成功。" +} +``` + +- **状态码**: + - `200`: 查询成功 + - `404`: 谚语不存在 + +#### 2.3 单词联想建议 + +- **接口**: `POST /search/word/list` +- **描述**: 根据用户输入返回单词联想列表,含前缀匹配与包含匹配。 - **需要认证**: 是 - **请求体**: ```json { - "query": "string", - "language": "fr" | "jp" + "query": "bon", + "language": "fr", + "sort": "relevance", + "order": "des" +} +``` + +- **响应示例**: + +```json +{ + "list": ["bonjour", "bonsoir", "bonheur"] +} +``` + +> **说明**: `language = "jp"` 时返回形如 `[["愛", "あい"], ["愛する", "あいする"]]` 的二维数组,第二列为假名读音。 + +#### 2.4 谚语联想建议 + +- **接口**: `POST /search/proverb/list` +- **描述**: 按输入内容(自动识别法语或中文)返回谚语候选列表。 +- **需要认证**: 是 +- **请求体**: + +```json +{ + "query": "慢", + "language": "fr" +} +``` + +- **响应示例**: + +```json +{ + "list": [ + { + "id": 12, + "proverb": "Rien ne sert de courir, il faut partir à point.", + "chi_exp": "做事要循序渐进,贵在及时出发。" + } + ] } ``` @@ -358,7 +471,12 @@ Authorization: Bearer ```json { - "pong": true + "pong": true, + "redis": { + "host": "127.0.0.1", + "port": 6379, + "db": 0 + } } ``` @@ -508,9 +626,291 @@ Authorization: Bearer --- -### 6. 数据模型 +### 6. 用户反馈模块 (`/improvements`) -#### 6.1 法语词性枚举 +#### 6.1 提交用户反馈 + +- **接口**: `POST /improvements` +- **描述**: 登录用户提交产品改进或问题反馈,系统会向预设邮箱发送通知。 +- **需要认证**: 是 +- **请求体**: + +```json +{ + "report_part": "string", + "text": "string" +} +``` + +- **字段说明**: + - `report_part`: 反馈类别,可选值 `ui_design`, `dict_fr`, `dict_jp`, `user`, `translate`, `writting`, `ai_assist`, `pronounce`(`comment_api_test` 仅用于内部测试) + - `text`: 反馈正文,不能为空 + +- **响应**: + +```json +{ + "massages": "feedback succeed" +} +``` + +- **状态码**: + - `200`: 提交成功 + - `422`: 字段校验失败(不合法的类别或空文本) + +--- + +### 7. 词条评论模块 (`/comment/word`) + +#### 7.1 新增词条评论 + +- **接口**: `POST /comment/word/{lang}` +- **描述**: 为指定语言的词条添加用户评论 +- **需要认证**: 是 +- **路径参数**: + - `lang`: `fr` 或 `jp` +- **请求体**: + +```json +{ + "comment_word": "string", + "comment_content": "string" +} +``` + +- **响应**: 创建成功时返回 `200`,响应体为空。 +- **状态码**: + - `200`: 创建成功 + - `422`: 字段校验失败 + +--- + +### 8. 作文指导模块 (`/article-director`) + +#### 8.1 作文批改会话 + +- **接口**: `POST /article-director/article` +- **描述**: 将学生作文(文本形式)提交给 EduChat 模型获取结构化点评,会话上下文保存在 Redis 中。 +- **需要认证**: 是 +- **查询参数**: + - `lang`: 作文语种,默认 `fr-FR`,可选值 `fr-FR`(法语)、`ja-JP`(日语)、`en-US`(英文) +- **请求体**: + +```json +{ + "title_content": "我的作文全文......", + "article_type": "议论文" +} +``` + +- **响应**: + +```json +{ + "reply": "整体点评内容……", + "tokens": 1834, + "conversation_length": 3 +} +``` + +- **状态码**: + - `200`: 批改成功 + - `401`: 未授权 + +> **提示**: 每次调用批改/追问接口之后,前端应根据需要调用重置接口清空 Redis 中的上下文。 + +#### 8.2 作文追问 + +- **接口**: `POST /article-director/question` +- **描述**: 在现有作文会话上追加提问,获取针对性回复。 +- **需要认证**: 是 +- **请求体**: + +```json +{ + "query": "请给出第三段的改写示例" +} +``` + +- **响应**: + +```json +{ + "reply": "改写建议……", + "tokens": 924, + "conversation_length": 5 +} +``` + +- **状态码**: + - `200`: 追问成功 + - `401`: 未授权 + +#### 8.3 重置作文会话 + +- **接口**: `POST /article-director/reset` +- **描述**: 清除当前用户在 Redis 中的作文指导上下文,确保下一次批改从头开始。 +- **需要认证**: 是 +- **响应**: + +```json +{ + "message": "已重置用户 的作文对话记录" +} +``` + +--- + +### 9. 发音测评模块 (`/test/pron`) + +#### 9.1 开始/恢复测评 + +- **接口**: `GET /test/pron/start` +- **描述**: 为当前用户新建或恢复发音测评会话,默认随机抽取20句目标语言的测评文本。 +- **需要认证**: 是 +- **查询参数**: + - `count`: 抽题数量 (integer,默认 `20`) + - `lang`: 语种代码,支持 `fr-FR`(法语)、`ja-JP`(日语),默认 `fr-FR` +- **响应**: + +```json +{ + "ok": true, + "resumed": false, + "message": "New fr-FR test started", + "session": { + "lang": "fr-FR", + "current_index": 0, + "sentence_ids": [12, 45, 87], + "total": 3 + } +} +``` + +- **状态码**: + - `200`: 成功创建或恢复会话 + - `400`: 不支持的语言参数 + - `404`: 题库为空 + +#### 9.2 提交语音测评 + +- **接口**: `POST /test/pron/sentence_test` +- **描述**: 上传 `.wav` 录音进行发音测评,服务端自动转换格式并调用 Azure Speech 评分。 +- **需要认证**: 是 +- **请求类型**: `multipart/form-data` +- **表单字段**: + - `record`: 上传的音频文件(仅支持 `.wav`) + - `lang`: 语种代码,默认 `fr-FR` +- **响应示例**: + +```json +{ + "ok": true, + "data": { + "ok": true, + "recognized_text": "Bonjour tout le monde", + "overall_score": 84.5, + "accuracy": 82.0, + "fluency": 86.0, + "completeness": 85.0, + "progress": "3/10" + } +} +``` + +- **状态码**: + - `200`: 评分成功(若全部句子完成,会自动结束会话) + - `400`: 会话不存在或音频转换失败 + - `404`: 对应题目不存在 + - `415`: 音频格式不符合要求 + +#### 9.3 查询当前题目 + +- **接口**: `GET /test/pron/current_sentence` +- **描述**: 返回当前需要朗读的句子。 +- **需要认证**: 是 +- **响应**: + +```json +{ + "ok": true, + "index": 2, + "current_sentence": "Bonjour tout le monde" +} +``` + +- **状态码**: + - `200`: 查询成功 + - `404`: 会话不存在 + +#### 9.4 查看本次题目列表 + +- **接口**: `POST /test/pron/testlist` +- **描述**: 返回本次测评抽取的所有句子列表及其 ID。 +- **需要认证**: 是 +- **响应示例**: + +```json +[ + {"id": 12, "text": "Bonjour tout le monde"}, + {"id": 45, "text": "Je m'appelle Léa"} +] +``` + +- **状态码**: + - `200`: 查询成功 + - `404`: 会话不存在 + +#### 9.5 结束测评 + +- **接口**: `POST /test/pron/finish` +- **描述**: 结束当前测评会话,并返回成绩。若测评未完成,需要携带 `confirm=true` 强制结束。 +- **需要认证**: 是 +- **请求体**: `application/x-www-form-urlencoded` + - `confirm`: boolean,默认 `false` +- **响应示例(强制结束)**: + +```json +{ + "ok": true, + "forced_end": true, + "message": "⚠️ Test forcefully ended. 3/10 sentences completed.", + "data": { + "ok": true, + "average_score": 82.3, + "records": [ + { + "sentence_id": 12, + "overall_score": 84.5 + } + ] + } +} +``` + +- **状态码**: + - `200`: 成功结束会话 + - `404`: 会话或结果不存在 + +#### 9.6 清除测评会话 + +- **接口**: `POST /test/pron/clear_session` +- **描述**: 主动清除 Redis 中的测评会话(用户放弃测评时使用)。 +- **需要认证**: 是 +- **响应**: + +```json +{ + "ok": true, + "message": "Session cleared" +} +``` + +--- + +### 10. 数据模型 + +#### 10.1 法语词性枚举 ```text n. - 名词 @@ -528,7 +928,7 @@ interj. - 感叹词 art. - 冠词 ``` -#### 6.2 日语词性枚举 +#### 10.2 日语词性枚举 ```text 名词, 形容词, 形容动词, 连用, 一段动词, 五段动词, @@ -538,7 +938,7 @@ art. - 冠词 --- -### 7. 错误码说明 +### 11. 错误码说明 | 状态码 | 说明 | |--------|------| @@ -554,13 +954,13 @@ art. - 冠词 --- -### 8. AI助手模块 (`/ai_assist`) +### 12. AI助手模块 (`/ai_assist`) -#### 8.1 词语智能问答 +#### 12.1 词语智能问答 -- **接口**: `POST /ai_assist/exp` -- **描述**: 针对指定词语,向AI助手提问相关问题,获取简洁自然的答案,适合初学者。 -- **需要认证**: 是 +- **接口**: `POST /ai_assist/word/exp` +- **描述**: 针对指定词语向AI助手提问,服务端会基于Redis保存的上下文历史给出简洁、贴合学习者的回答。 +- **需要认证**: 是(`Bearer` Token) - **请求体**: ```json @@ -570,6 +970,10 @@ art. - 冠词 } ``` +- **限制**: + - 普通用户调用次数超过100次时会返回 `400 本月API使用量已超` + - 每个 `word` 独立维护聊天上下文,历史保存于Redis + - **响应**: ```json @@ -584,15 +988,24 @@ art. - 冠词 - **状态码**: - `200`: 问答成功 - `400`: 本月API使用量已超 + - `401`: 未授权 - `500`: AI调用失败 -#### 8.2 清除词语聊天记录 +#### 12.2 通用AI对话(预留) + +- **接口**: `POST /ai_assist/univer` +- **描述**: 预留的通用AI对话接口,当前版本尚未实现业务逻辑,调用将返回空响应。 +- **需要认证**: 是 +- **状态码**: + - `200`: 请求成功(响应体为空) + +#### 12.3 清除词语聊天记录 - **接口**: `POST /ai_assist/clear` - **描述**: 清除指定词语的AI助手聊天记录 - **需要认证**: 是 - **请求参数**: - - `word`: 词语 (string) + - `word`: 词语 (query 参数,string) - **响应**: @@ -607,7 +1020,7 @@ art. - 冠词 --- -### 9. 使用示例 +### 13. 使用示例 #### 完整的API调用流程示例 @@ -630,15 +1043,26 @@ curl -X POST "http://127.0.0.1:8000/users/login" \ }' # 3. 使用返回的token进行词典搜索 -curl -X POST "http://127.0.0.1:8000/search" \ +curl -X POST "http://127.0.0.1:8000/search/word" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer " \ -d '{ "query": "bonjour", + "language": "fr", + "sort": "relevance", + "order": "des" + }' + +# 4. 获取单词联想列表 +curl -X POST "http://127.0.0.1:8000/search/word/list" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "query": "bon", "language": "fr" }' -# 4. 使用翻译API +# 5. 使用翻译API curl -X POST "http://127.0.0.1:8000/translate" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer " \ @@ -648,11 +1072,11 @@ curl -X POST "http://127.0.0.1:8000/translate" \ "to_lang": "zh" }' -# 5. 测试Redis连接 +# 6. 测试Redis连接 curl -X GET "http://127.0.0.1:8000/ping-redis" -# 6. 词语智能问答 -curl -X POST "http://127.0.0.1:8000/ai_assist/exp" \ +# 7. 词语智能问答 +curl -X POST "http://127.0.0.1:8000/ai_assist/word/exp" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer " \ -d '{ @@ -660,18 +1084,18 @@ curl -X POST "http://127.0.0.1:8000/ai_assist/exp" \ "question": "什么是法语?" }' -# 7. 清除词语聊天记录 -curl -X POST "http://127.0.0.1:8000/ai_assist/clear" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer " \ - -d '{ - "word": "法语" - }' +# 8. 清除词语聊天记录 +curl -X POST "http://127.0.0.1:8000/ai_assist/clear?word=法语" \ + -H "Authorization: Bearer " + +# 9. 开启发音测评 +curl -X GET "http://127.0.0.1:8000/test/pron/start?count=5&lang=fr-FR" \ + -H "Authorization: Bearer " ``` --- -### 9. 开发者说明 +### 14. 开发者说明 - **数据库**: 使用MySQL存储词典数据和用户信息 - **缓存**: 使用Redis进行token黑名单管理和API限流 @@ -681,10 +1105,11 @@ curl -X POST "http://127.0.0.1:8000/ai_assist/clear" \ - **文件上传**: 支持Excel格式的批量词典导入 - **CORS**: 支持本地开发环境跨域访问 - **API文档**: 启动服务后访问 `http://127.0.0.1:8000/docs` 查看Swagger文档 +- **发音评测**: `/test/pron` 路由已预留,当前版本尚未提供具体接口 --- -### 10. 部署说明 +### 15. 部署说明 1. 安装依赖: `pip install -r requirements.txt` 2. 配置数据库连接 (settings.py) diff --git a/app/api/ai_assist/routes.py b/app/api/ai_assist/routes.py index 80134f2..2a79715 100644 --- a/app/api/ai_assist/routes.py +++ b/app/api/ai_assist/routes.py @@ -33,7 +33,7 @@ async def dict_exp( :param user: :return: """ - if user[0].token_usage > CHAT_TTL and not user[0].is_admin: + if user[0].token_usage > MAX_USAGE_PER and not user[0].is_admin: raise HTTPException(status_code=400, detail="本月API使用量已超") redis = request.app.state.redis @@ -104,6 +104,6 @@ async def universal_main(): @ai_router.post("/clear") async def clear_history(word: str, request: Request, user: Tuple[User, Dict] = Depends(get_current_user)): redis = request.app.state.redis - user_id = user[0].id + user_id = str(user[0].id) await clear_chat_history(redis, user_id, word) return {"msg": f"已清除 {word} 的聊天记录"} diff --git a/app/api/article_director/__init__.py b/app/api/article_director/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/article_director/article_schemas.py b/app/api/article_director/article_schemas.py new file mode 100644 index 0000000..f7eb70b --- /dev/null +++ b/app/api/article_director/article_schemas.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + + +class UserArticleRequest(BaseModel): + # theme: Optional[str] + title_content: str + article_type: str + +class UserQuery(BaseModel): + query: str \ No newline at end of file diff --git a/app/api/article_director/routes.py b/app/api/article_director/routes.py new file mode 100644 index 0000000..1a40645 --- /dev/null +++ b/app/api/article_director/routes.py @@ -0,0 +1,103 @@ +""" +每次调用 article-director/article 接口时都要同时调用reset以清空 redis 中的上下文 +""" +from typing import Literal, Dict, Tuple + +from fastapi import APIRouter, Depends +from starlette.requests import Request + +from app.api.article_director import service +from app.api.article_director.article_schemas import UserArticleRequest, UserQuery +from app.models import User +from app.utils.security import get_current_user + +article_router = APIRouter() + + +@article_router.post("/article-director/article") +async def article_director( + request: Request, + upload_article: UserArticleRequest, + lang: Literal["en-US", "fr-FR", "ja-JP"] = "fr-FR", + user: Tuple[User, Dict] = Depends(get_current_user) +): + """ + 文本形式接口,即直接从文本框中获取 + 每次调用本接口的同时都要同时调用reset接口 + :param upload_article: + :param lang: + :return: + """ + redis = request.app.state.redis + + article_lang = "法语" if lang == "fr-FR" else "日语" + + user_id = user[0].id + article = upload_article.title_content + + # 读取历史对话 + session = await service.get_session(redis_client=redis, user_id=user_id) + + # 追加用户输入 + user_prompt = service.set_user_prompt(upload_article, article_lang=article_lang) + session.append({"role": "user", "content": user_prompt}) + + # 调用 EduChat 模型 + completion = service.chat_ecnu_request(session) + + # 取出回答内容 + assistant_reply = completion.choices[0].message.content + + # 保存模型回复 + session.append({"role": "assistant", "content": assistant_reply}) + + # 存入 Redis + await service.save_session(redis, user_id, session) + + return { + "reply": assistant_reply, + "tokens": completion.usage.total_tokens, + "conversation_length": len(session), + } + + +@article_router.post("/article-director/question", description="用户进一步询问") +async def further_question( + request: Request, + user_prompt: UserQuery, + user: Tuple[User, Dict] = Depends(get_current_user) +): + redis = request.app.state.redis + + user_id = user[0].id + + # 读取历史对话 + session = await service.get_session(redis_client=redis, user_id=user_id) + + # 追加用户输入 + session.append({"role": "user", "content": user_prompt.query}) + + # 调用 EduChat 模型 + completion = service.chat_ecnu_request(session) + + # 取出回答内容 + assistant_reply = completion.choices[0].message.content + + # 保存模型回复 + session.append({"role": "assistant", "content": assistant_reply}) + + # 存入 Redis + await service.save_session(redis, user_id, session) + + return { + "reply": assistant_reply, + "tokens": completion.usage.total_tokens, + "conversation_length": len(session), + } + +@article_router.post("/article-director/reset", description="重置上下文") +async def reset_conversation(request: Request, user: Tuple[User, Dict] = Depends(get_current_user)): + user_id = user[0].id + redis = request.app.state.redis + await service.reset_session(redis, user_id) + return {"message": f"已重置用户 {user_id} 的作文对话记录"} diff --git a/app/api/article_director/service.py b/app/api/article_director/service.py new file mode 100644 index 0000000..e74951c --- /dev/null +++ b/app/api/article_director/service.py @@ -0,0 +1,64 @@ +import json +from typing import List, Dict + +from openai import OpenAI +from redis import Redis + +from app.api.article_director.article_schemas import UserArticleRequest +from settings import settings + +SYSTEM_PROMPT = """ +# 背景 +你是一个人工智能助手,名字叫EduChat,是一个由华东师范大学开发的教育领域大语言模型。 +# 对话主题:作文指导 +## 作文指导主题的要求: +EduChat你需要扮演一位经验丰富的语文老师,现在需要帮助一位学生审阅作文并给出修改建议。请按照以下步骤进行: +整体评价:先对作文的整体质量进行简要评价,指出主要优点和需要改进的方向。 +亮点分析:具体指出作文中的亮点(如结构、描写、情感表达等方面的优点)。 +具体修改建议:针对作文中的不足,从以下几个方面提出具体修改建议,并给出修改后的示例: +语言表达:是否生动、准确?有无冗余或重复?可以如何优化? +细节描写:是否足够具体?能否加入更多感官描写(视觉、听觉、嗅觉、触觉等)使画面更立体? +情感表达:情感是否自然?能否更深入或升华? +结构布局:段落衔接是否自然?开头结尾是否呼应? (注意:每个建议点都要结合原文具体句子进行分析,并给出修改后的句子或段落作为示例) +写作技巧提示:提供2-3条实用的写作技巧(如动态描写公式、感官交织法等),帮助学生举一反三。 +修改效果总结:简要说明按照建议修改后,作文会有哪些方面的提升(如文学性、情感层次、场景沉浸感等)。 +请用亲切、鼓励的语气进行点评,保持专业性同时让学生易于接受。 +""" + + +def chat_ecnu_request( + session: List[Dict[str, str]], +): + client = OpenAI( + api_key=settings.ECNU_TEACH_AI_KEY, + base_url="https://chat.ecnu.edu.cn/open/api/v1" + ) + completion = client.chat.completions.create( + model="educhat-r1", + messages=session, + temperature=0.8, # 保持创造性 + top_p=0.9, # 保持多样性 + ) + + return completion + +def set_user_prompt(user_article: UserArticleRequest, article_lang: str): + user_prompt = f"以下是我的{article_lang}作文,作文体裁为{user_article.article_type},请帮我修改:{user_article.title_content}" + return user_prompt + +async def get_session(redis_client: Redis, user_id: str) -> List[Dict[str, str]]: + """从 Redis 读取对话上下文""" + data = await redis_client.get(f"session:{user_id}") + if data: + return json.loads(data) + else: + # 如果没有记录,创建带 system prompt 的初始会话 + return [{"role": "system", "content": SYSTEM_PROMPT}] + +async def save_session(redis_client: Redis, user_id: str, session: List[Dict[str, str]]): + """保存对话上下文到 Redis""" + await redis_client.setex(f"session:{user_id}", 86400, json.dumps(session)) + +async def reset_session(redis_client: Redis, user_id: str): + """清空用户上下文""" + await redis_client.delete(f"session:{user_id}") \ No newline at end of file diff --git a/app/api/pronounciation_test/routes.py b/app/api/pronounciation_test/routes.py index addc3be..4c78cf2 100644 --- a/app/api/pronounciation_test/routes.py +++ b/app/api/pronounciation_test/routes.py @@ -1,3 +1,308 @@ -from fastapi import APIRouter +import json +import os +import random +import tempfile +from typing import Literal, Tuple, Dict -pron_test_router = APIRouter() \ No newline at end of file +import azure.cognitiveservices.speech as speechsdk +from fastapi import APIRouter, Depends, UploadFile, File, HTTPException, Form +from starlette.requests import Request + +from app.api.pronounciation_test import service +from app.models import PronunciationTestFr, User, PronunciationTestJp +from app.utils.security import get_current_user +from settings import settings + +pron_test_router = APIRouter() + +AZURE_KEY = settings.AZURE_SUBSCRIPTION_KEY +SERVICE_REGION = "eastasia" + +speech_config = speechsdk.SpeechConfig(subscription=AZURE_KEY, region=SERVICE_REGION) +audio_config = speechsdk.audio.AudioConfig(filename="test.wav") + + +@pron_test_router.get("/start") +async def start_test( + request: Request, + count: int = 20, + lang: Literal["fr-FR", "ja-JP"] = Form("fr-FR"), + user: Tuple[User, Dict] = Depends(get_current_user) +): + """ + 开始新的发音测评会话: + - 若存在未完成测试,则自动恢复; + - 若无会话,则随机选取句子并创建新的 session; + - 支持多语言(法语/日语)。 + """ + redis = request.app.state.redis + user_id = user[0].id + + key = f"test_session:{user_id}" + data = await redis.get(key) + + # === 若存在未完成的测试会话 === + if data: + session = json.loads(data) + return { + "ok": True, + "resumed": True, + "message": "Resumed existing test", + "session": session + } + + # === 根据语言选择对应题库 === + if lang == "fr-FR": + total_count = await PronunciationTestFr.all().count() + table = PronunciationTestFr + elif lang == "ja-JP": + total_count = await PronunciationTestJp.all().count() + table = PronunciationTestJp + else: + raise HTTPException(status_code=400, detail="Unsupported language code") + + # === 随机抽取句子 ID === + if total_count == 0: + raise HTTPException(status_code=404, detail=f"No test sentences found for {lang}") + + selected = random.sample(range(1, total_count + 1), k=min(count, total_count)) + + # === 构建并保存会话 === + session = { + "lang": lang, # ← 新增语言字段 + "current_index": 0, + "sentence_ids": selected, + "total": len(selected), + } + + await redis.set(key, json.dumps(session), ex=3600) + + return { + "ok": True, + "resumed": False, + "message": f"New {lang} test started", + "session": session + } + + +@pron_test_router.post("/sentence_test") +async def pron_sentence_test( + request: Request, + record: UploadFile = File(...), + lang: Literal["fr-FR", "ja-JP"] = Form("fr-FR"), + user: Tuple[User, Dict] = Depends(get_current_user) +): + """ + 目前暂时只提供打分服务,不支持回听录音 + :param request: + :param record: + :param lang: + :param user: + :return: + """ + redis = request.app.state.redis + user_id = user[0].id + + key = f"test_session:{user_id}" + data = await redis.get(key) + if not data: + return {"ok": False, "error": "No active test session"} + + session = json.loads(data) + sentence_ids = session["sentence_ids"] + index = session["current_index"] + + if index >= len(sentence_ids): + await redis.delete(key) + return {"ok": True, "finished": True, "message": "All sentences tested"} + + sentence_id = sentence_ids[index] + sentence = await PronunciationTestFr.get(id=sentence_id) + if not sentence: + raise HTTPException(status_code=404, detail=f"Sentence {sentence_id} not found") + text = sentence.text + + if not record.filename.endswith(".wav"): + raise HTTPException(status_code=415, detail="Invalid file suffix, only '.wav' supported") + + with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(record.filename)[1]) as tmp: + tmp.write(await record.read()) + tmp.flush() + src_path = tmp.name + + # 调用转换函数 + norm_path = src_path + "_norm.wav" + result = service.convert_to_pcm16_mono_wav(src_path, norm_path) + if not result["ok"]: + raise HTTPException(status_code=400, detail=result["message"]) + + # 再验证格式 + if not service.verify_audio_format(norm_path): + raise HTTPException(status_code=415, detail="Invalid audio format") + + try: + result = service.assess_pronunciation(norm_path, text, lang) + if not result["ok"]: + raise HTTPException(status_code=400, detail=result) + except HTTPException as e: + return result + except Exception as e: + return {"ok": False, "error": str(e)} + finally: + os.remove(norm_path) + + await service.save_pron_result( + redis=redis, + user_id=user[0].id, + sentence_id=sentence_id, + text=text, + scores=result, + expire=3600 + ) + + session["current_index"] += 1 + await redis.set(key, json.dumps(session), ex=3600) + + result["progress"] = f"{session['current_index']}/{len(sentence_ids)}" + + return {"ok": True, "data": result} + + +@pron_test_router.get("/current_sentence") +async def get_current_sentence( + request: Request, + user: Tuple[User, Dict] = Depends(get_current_user), +): + redis = request.app.state.redis + user_id = user[0].id + + key = f"test_session:{user_id}" + data = await redis.get(key) + if not data: + return {"ok": False, "error": "No active test session"} + + session = json.loads(data) + sentence_ids = session["sentence_ids"] + index = session["current_index"] + if index >= len(sentence_ids): + return {"ok": True, "finished": True, "message": "All sentences tested"} + sentence_id = sentence_ids[index] + sentence = await PronunciationTestFr.get(id=sentence_id) + if not sentence: + return {"ok": False, "error": "Sentence not found"} + text = sentence.text + + return { + "ok": True, + "index": index, + "current_sentence": text, + } + + +@pron_test_router.post("/testlist") +async def get_testlist( + request: Request, + user: Tuple[User, Dict] = Depends(get_current_user), +): + redis = request.app.state.redis + user_id = user[0].id + + key = f"test_session:{user_id}" + data = await redis.get(key) + if not data: + return {"ok": False, "error": "No active test session"} + + session = json.loads(data) + sentence_ids = session["sentence_ids"] + sentences = [] + + for sentence_id in sentence_ids: + sentence = await PronunciationTestFr.get(id=sentence_id) + if not sentence: + raise HTTPException(status_code=404, detail=f"Sentence {sentence_id} not found") + text = sentence.text + sentences.append({"id": sentence_id, "text": text}) + + return sentences + + +@pron_test_router.post("/finish") +async def finish_test( + request: Request, + confirm: bool = Form(False), + user: Tuple[User, Dict] = Depends(get_current_user), +): + """ + 结束测试: + - 若用户未开始测试 → 返回提示; + - 若测试未完成且 confirm=False → 返回提示; + - 若测试未完成但 confirm=True → 强制结束,返回已完成部分结果; + - 若测试已完成 → 返回完整成绩并清除缓存。 + """ + redis = request.app.state.redis + user_id = user[0].id + session_key = f"test_session:{user_id}" + + session_data = await redis.get(session_key) + if not session_data: + return {"ok": False, "message": "No active test session to finish"} + + session = json.loads(session_data) + current_index = session.get("current_index", 0) + sentence_ids = session.get("sentence_ids", []) + total = len(sentence_ids) + lang = session["lang"] + + if current_index < len(sentence_ids): + remaining = total - current_index + # 如果没有确认,则提醒用户 + if not confirm: + return { + "ok": False, + "unfinished": True, + "message": f"Test not finished. {remaining} sentence(s) remaining. " + "Resend with confirm=true to force end and view partial results." + } + + # 如果用户确认强制结束,则读取已完成部分成绩 + result = await service.get_pron_result(redis, user_id, delete_after=True) + await redis.delete(session_key) + + return { + "ok": True, + "forced_end": True, + "message": f"⚠️ Test forcefully ended. {current_index}/{total} sentences completed.", + "data": result + } + + # === 已完成测试 === + result = await service.get_pron_result(redis, user_id, delete_after=True) + if not result["ok"]: + raise HTTPException(status_code=404, detail=result.get("error", "Unknown error")) + # 删除 Redis session + await redis.delete(session_key) + + # 存入数据库 + record = await service.record_test_result(user=user[0], result=result, lang=lang) + + return { + "ok": True, + "message": "Test session cleared", + "data": result + } + + +@pron_test_router.post("/clear_session") +async def clear_session(request: Request, user: Tuple[User, Dict] = Depends(get_current_user)): + """ + 用户在未完成测试的情况下选择退出,询问是否保存进度,如果不保存则调用本接口清除 Redis + """ + redis = request.app.state.redis + user_id = user[0].id + + key = f"test_session:{user_id}" + await redis.delete(key) + return { + "ok": True, + "message": "Session cleared", + } diff --git a/app/api/pronounciation_test/service.py b/app/api/pronounciation_test/service.py new file mode 100644 index 0000000..b1d6491 --- /dev/null +++ b/app/api/pronounciation_test/service.py @@ -0,0 +1,345 @@ +import contextlib +import json +import os +import wave +from io import BytesIO +from typing import Literal, Dict, Any, List + +import azure.cognitiveservices.speech as speechsdk +from fastapi import HTTPException +from pydub import AudioSegment +from redis.asyncio import Redis + +from app.models import User +from app.models.base import UserTestRecord +from settings import settings + + +# from imageio_ffmpeg import get_ffmpeg_exe +# AudioSegment.converter = get_ffmpeg_exe() + + +def verify_audio_format(path: str) -> bool: + """ + 检测音频文件是否符合 Azure Speech 要求: + 采样率 16000Hz, 16-bit, 单声道 (PCM). + 返回字典包含格式信息和布尔结果。 + """ + if not os.path.exists(path): + raise FileNotFoundError(f"Audio file not found: {path}") + + try: + with contextlib.closing(wave.open(path, 'rb')) as wf: + rate = wf.getframerate() + channels = wf.getnchannels() + width = wf.getsampwidth() + + ok = (rate == 16000 and channels == 1 and width == 2) + if not ok: + raise HTTPException( + status_code=400, + detail={ + "ok": False, + "rate": rate, + "channels": channels, + "width": width, + "message": ( + f"⚠️ Invalid format (rate={rate}, channels={channels}, width={width}). " + "Expected: 16000Hz, mono, 16-bit PCM." + ) + } + ) + except wave.Error as e: + raise HTTPException(status_code=401, detail=f"Invalid WAV file: {e}") + return True + +def assess_pronunciation( + audio_path: str, + reference_text: str, + lang: Literal["fr-FR", "ja-JP"] = "fr-FR", + grading_system: Literal["HundredMark", "FivePoint"] = "FivePoint", + granularity: Literal["Phoneme", "Word", "FullText"] = "Phoneme", + enable_miscue: bool = True, +) -> Dict[str, Any]: + """ + 使用 Azure Speech SDK 对音频文件进行发音测评。(增强错误输出版) + :param audio_path: 音频文件路径(必须是 PCM16/Mono/WAV) + :param reference_text: 期望朗读的文本 + :param lang: 语种代码,例如 'fr-FR'(法语)、'ja-JP'(日语)、'en-US'(英语) + :param grading_system: 评分体系 ('HundredMark' / 'FivePoint') + :param granularity: 评分粒度 ('Phoneme' / 'Word' / 'FullText') + :param enable_miscue: 是否检测漏读/多读(True 推荐) + :return: 包含整体分、准确度、流畅度、完整度及识别文本的字典 + """ + # === 1. 加载 Azure Speech 配置 === + subsciption_key = settings.AZURE_SUBSCRIPTION_KEY + region = "eastasia" + print(">>> Azure Key Loaded:", settings.AZURE_SUBSCRIPTION_KEY[:8], "...") + print(">>> Azure Region:", "eastasia") + + if not subsciption_key or not region: + raise RuntimeError("缺少 Azure Speech 环境变量 AZURE_SPEECH_KEY / AZURE_SPEECH_REGION") + + speech_config = speechsdk.SpeechConfig(subscription=subsciption_key, region=region) + speech_config.speech_recognition_language = lang + + # === 2. 加载音频文件 === + audio_config = speechsdk.audio.AudioConfig(filename=audio_path) + recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) + + print(reference_text) + + # === 3. 构建发音测评配置 === + pron_assestment = speechsdk.PronunciationAssessmentConfig( + reference_text=reference_text, + grading_system=getattr(speechsdk.PronunciationAssessmentGradingSystem, grading_system), + granularity=getattr(speechsdk.PronunciationAssessmentGranularity, granularity), + enable_miscue=enable_miscue + ) + pron_assestment.apply_to(recognizer) + + # === 4. 执行识别与打分 === + result = recognizer.recognize_once() + + if result.reason != speechsdk.ResultReason.RecognizedSpeech: + return __parse_azure_error(result) + + pa_result = result.properties.get(speechsdk.PropertyId.SpeechServiceResponse_JsonResult) + data = json.loads(pa_result) + pa_data = data["NBest"][0]["PronunciationAssessment"] + + return { + "ok": True, + "recognized_text": data.get("DisplayText"), + "overall_score": pa_data.get("PronScore"), + "accuracy": pa_data.get("AccuracyScore"), + "fluency": pa_data.get("FluencyScore"), + "completeness": pa_data.get("CompletenessScore") + } + +def __parse_azure_error(result: Any) -> Dict[str, Any]: + """ + 从 Azure Speech 识别结果中提取详细错误信息。 + 用于处理 ResultReason != RecognizedSpeech 的情况。 + :param result: SpeechRecognizer 的识别结果对象 + :return: 包含 ok=False 与详细错误字段的 dict + """ + err_data = { + "ok": False, + "error": str(result.reason), + "details": getattr(result, "error_details", None) + } + + # ① 无法识别语音(NoMatch) + if result.reason == speechsdk.ResultReason.NoMatch: + err_data["no_match_details"] = str(getattr(result, "no_match_details", None)) + print("[Azure] ⚠️ NoMatch: Speech could not be recognized.") + print(f"[Azure] Details: {err_data['no_match_details']}") + + # ② 请求被取消(Canceled) + elif result.reason == speechsdk.ResultReason.Canceled: + cancellation_details = getattr(result, "cancellation_details", None) + if cancellation_details: + err_data["cancel_reason"] = str(getattr(cancellation_details, "reason", None)) + err_data["cancel_error_details"] = getattr(cancellation_details, "error_details", None) + err_data["cancel_error_code"] = getattr(cancellation_details, "error_code", None) + + print("[Azure] ❌ Canceled by Speech Service") + print(f"[Azure] Reason: {err_data['cancel_reason']}") + print(f"[Azure] Error details: {err_data['cancel_error_details']}") + print(f"[Azure] Error code: {err_data['cancel_error_code']}") + else: + print("[Azure] ❌ Canceled but no details provided.") + + # ③ 其他未知类型 + else: + print(f"[Azure] ⚠️ Unexpected recognition result: {result.reason}") + print(f"[Azure] Error details: {err_data['details']}") + + return err_data + +def convert_to_pcm16_mono_wav(input_path: str, output_path: str): + """ + 将任意音频格式转换为 Azure Speech API 要求的标准 WAV 文件: + - 采样率 16 kHz + - 单声道 + - 16 bit PCM + """ + from pydub import AudioSegment + + try: + audio = AudioSegment.from_file(input_path) + duration_ms = len(audio) + + # 重新采样 + audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2) + audio.export(output_path, format="wav") + + return { + "ok": True, + "path": output_path, + "message": f"Converted successfully ({duration_ms / 1000:.2f}s)" + } + + except Exception as e: + return { + "ok": False, + "path": None, + "message": f"Audio conversion failed: {str(e)}" + } + +def convert_audio_to_memory(file_obj): + """ + 完全在内存中转化(更快) + :param file_obj: + :return: 转换后的 BinaryStream + """ + audio = AudioSegment.from_file(file_obj) + audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2) + buf = BytesIO() + audio.export(buf, format="wav") + buf.seek(0) + return buf + +async def save_pron_result( + redis: Redis, + user_id: int, + sentence_id: int, + text: str, + scores: Dict[str, float], + expire: int = 3600 +) -> None: + """ + 将测评结果保存到 Redis。 + 结构:test_result:{user_id} -> {"sentences": [ {...}, {...} ]} + """ + key = f"test_result:{user_id}" + existing = await redis.get(key) + if existing: + data = json.loads(existing) + else: + data = {"sentences": []} + + # 防止重复写入同一条 sentence_id + if not any(item["id"] == sentence_id for item in data["sentences"]): + entry = { + "id": sentence_id, + "text": text, + "overall": scores.get("overall_score"), + "accuracy": scores.get("accuracy"), + "fluency": scores.get("fluency"), + "completeness": scores.get("completeness") + } + data["sentences"].append(entry) + await redis.set(key, json.dumps(data), ex=expire) + +async def get_pron_result( + redis: Redis, + user_id: int, + delete_after: bool = False +) -> Dict[str, Any]: + """ + 从 Redis 获取用户的所有句子测评结果, + 返回每句分数 + 总分 + 平均分 + 等级评定。 + """ + key = f"test_result:{user_id}" + data = await redis.get(key) + + if not data: + return {"ok": False, "error": "No result found"} + + result_data = json.loads(data) + sentences: List[Dict[str, Any]] = result_data.get("sentences", []) + + if not sentences: + return {"ok": False, "error": "Empty result list"} + + fields = ["overall", "accuracy", "fluency", "completeness"] + + # 计算总分与平均分 + totals = {f: 0.0 for f in fields} + counts = {f: 0 for f in fields} + for s in sentences: + for f in fields: + if s.get(f) is not None: + totals[f] += s[f] + counts[f] += 1 + + averages = { + f: round(totals[f] / counts[f], 2) if counts[f] else 0.0 + for f in fields + } + + # 等级映射函数 + def grade(score: float) -> str: + if score >= 4.5: + return "优秀 🏆" + elif score >= 3.5: + return "良好 👍" + elif score >= 2.5: + return "一般 🙂" + elif score > 0: + return "需改进 ⚠️" + return "无数据" + + # 各项等级 + 总体等级 + grade_map = {f: grade(averages[f]) for f in fields} + grade_map["overall_level"] = grade(averages["overall"]) + + if delete_after: + await redis.delete(key) + + return { + "ok": True, + "count": len(sentences), + "totals": {f: round(totals[f], 2) for f in fields}, + "average": averages, + "grades": grade_map, + "sentences": sentences + } + +async def record_test_result( + user: User, + result: Dict[str, Any], + lang: Literal["fr", "jp"] +) -> Dict[str, Any]: + """ + 将一次完整测评结果写入数据库。 + + :param user: 当前用户对象 + :param result: 从 get_pron_result() 返回的结果字典 + :param lang: 测试语种 ('fr' 或 'jp') + :return: 数据库存储结果摘要 + """ + if not result.get("ok"): + return {"ok": False, "error": "Invalid test result"} + + avg = result.get("average", {}) + grades = result.get("grades", {}) + count = result.get("count", 0) + sentences = result.get("sentences", []) + + # 构建可存储的数据 + record = await UserTestRecord.create( + user=user, # 外键绑定用户对象 + username=user.name, + language=lang, + total_sentences=count, + average_score=avg.get("overall", 0.0), + accuracy_score=avg.get("accuracy", 0.0), + fluency_score=avg.get("fluency", 0.0), + completeness_score=avg.get("completeness", 0.0), + level=grades.get("overall_level", "无"), + raw_result=json.dumps(result, ensure_ascii=False), + ) + + return { + "ok": True, + "id": record.id, + "user": user.name, + "language": lang, + "average_score": avg.get("overall"), + "level": grades.get("overall_level"), + "count": count, + "timestamp": record.created_at.isoformat() + } \ No newline at end of file diff --git a/app/api/search_dict/__init__.py b/app/api/search_dict/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/search.py b/app/api/search_dict/routes.py similarity index 76% rename from app/api/search.py rename to app/api/search_dict/routes.py index e0b4a8a..a6ea8e1 100644 --- a/app/api/search.py +++ b/app/api/search_dict/routes.py @@ -2,12 +2,14 @@ from typing import Literal, List from fastapi import APIRouter, Depends, HTTPException, Request +from app.api.search_dict import service +from app.api.search_dict.search_schemas import SearchRequest, WordSearchResponse, SearchItemFr, SearchItemJp, \ + ProverbSearchRequest +from app.api.search_dict.service import suggest_autocomplete from app.api.word_comment.word_comment_schemas import CommentSet from app.models import DefinitionJp, CommentFr, CommentJp from app.models.fr import DefinitionFr -from app.schemas.search_schemas import SearchRequest, SearchResponse, SearchItemFr, SearchItemJp from app.utils.all_kana import all_in_kana -from app.utils.autocomplete import suggest_autocomplete from app.utils.security import get_current_user from app.utils.textnorm import normalize_text @@ -54,7 +56,7 @@ async def __get_comments( return commentlist -@dict_search.post("/search", response_model=SearchResponse) +@dict_search.post("/search/word", response_model=WordSearchResponse) async def search(request: Request, body: SearchRequest, user=Depends(get_current_user)): """ 精确搜索 @@ -96,7 +98,7 @@ async def search(request: Request, body: SearchRequest, user=Depends(get_current eng_explanation=wc.eng_explanation, ) ) - return SearchResponse( + return WordSearchResponse( query=query, pos=pos_contents, contents=contents, @@ -126,26 +128,44 @@ async def search(request: Request, body: SearchRequest, user=Depends(get_current example=wc.example, ) ) - return SearchResponse( + return WordSearchResponse( query=query, pos=pos_contents, contents=contents, ) +@dict_search.post("/search/proverb") +async def proverb(request: Request, proverb_id: int, user=Depends(get_current_user)): + """ + 用于法语谚语搜索 + :param request: + :param body: 要求用户输入的内容必须为法语 + :param user: + :return: + """ + content = await service.accurate_proverb(proverb_id=proverb_id) + return content + + # TODO 相关度排序(转换为模糊匹配) # TODO 输入搜索框时反馈内容 -@dict_search.post("/search/list") -async def search_list(query_word: SearchRequest, user=Depends(get_current_user)): +@dict_search.post("/search/word/list") +async def search_word_list(query_word: SearchRequest, user=Depends(get_current_user)): """ 检索时的提示接口 :param query_word: 用户输入的内容 :param user: :return: 待选列表 """ - print(query_word.query, query_word.language, query_word.sort, query_word.order) + # print(query_word.query, query_word.language, query_word.sort, query_word.order) word_contents = await suggest_autocomplete(query=query_word) return {"list": word_contents} -#TODO 用户搜索历史 + +@dict_search.post("/search/proverb/list") +async def search_proverb_list(query_word: ProverbSearchRequest, user=Depends(get_current_user)): + lang: Literal['fr', 'zh'] = 'zh' if service.contains_chinese(query_word.query) else 'fr' + suggest_proverbs = await service.suggest_proverb(query=query_word, lang=lang) + return {"list": suggest_proverbs} diff --git a/app/schemas/search_schemas.py b/app/api/search_dict/search_schemas.py similarity index 73% rename from app/schemas/search_schemas.py rename to app/api/search_dict/search_schemas.py index a4df09f..4ec5272 100644 --- a/app/schemas/search_schemas.py +++ b/app/api/search_dict/search_schemas.py @@ -2,7 +2,6 @@ from typing import Literal, List, Union, Optional from pydantic import BaseModel -from app.models import PosType from app.schemas.admin_schemas import PosEnumFr @@ -12,6 +11,10 @@ class SearchRequest(BaseModel): sort: Literal['relevance', 'date'] = 'date' order: Literal['asc', 'des'] = 'des' +class ProverbSearchRequest(BaseModel): + query: str + language: Literal['fr', 'jp'] = "fr" + class SearchItemJp(BaseModel): chi_exp: str @@ -25,7 +28,12 @@ class SearchItemFr(BaseModel): example: Optional[str] -class SearchResponse(BaseModel): +class WordSearchResponse(BaseModel): query: str pos: list contents: Union[List[SearchItemFr], List[SearchItemJp]] + + +class ProverbSearchResponse(BaseModel): + proverb_text: str + chi_exp: str diff --git a/app/api/search_dict/service.py b/app/api/search_dict/service.py new file mode 100644 index 0000000..6fadeb0 --- /dev/null +++ b/app/api/search_dict/service.py @@ -0,0 +1,208 @@ +import asyncio +import re +from typing import List, Tuple, Dict, Literal + +from fastapi import HTTPException +from tortoise import Tortoise +from tortoise.expressions import Q + +from app.api.search_dict.search_schemas import SearchRequest, ProverbSearchResponse, ProverbSearchRequest +from app.models import WordlistFr, WordlistJp +from app.models.fr import ProverbFr +from app.utils.all_kana import all_in_kana +from app.utils.textnorm import normalize_text +from settings import TORTOISE_ORM + + +def contains_chinese(text: str) -> bool: + """判断字符串中是否包含至少一个中文字符""" + return bool(re.search(r'[\u4e00-\u9fff]', text)) + + +async def accurate_proverb(proverb_id: int) -> ProverbSearchResponse: + proverb = await ProverbFr.get_or_none(id=proverb_id) + if not proverb: + raise HTTPException(status_code=404, detail="Proverb not found") + return ProverbSearchResponse( + proverb_text=proverb.proverb, + chi_exp=proverb.chi_exp, + ) + + +async def suggest_proverb(query: ProverbSearchRequest, lang: Literal['fr', 'zh']) -> List[Dict[str, str]]: + """ + 对法语谚语表进行搜索建议。 + 参数: + query.query: 搜索关键词 + lang: 'fr' 或 'zh' + 逻辑: + 1. 若 lang='fr',按谚语字段 (proverb) 搜索; + 2. 若 lang='zh',按中文释义字段 (chi_exp) 搜索; + 3. 优先以输入开头的匹配; + 4. 其次为包含输入但不以其开头的匹配(按 freq 排序)。 + :return: [{'id': 1, 'proverb': 'xxx'}, ...] + """ + keyword = query.query.strip() + results: List[Dict[str, str]] = [] + + if not keyword: + return results + + # ✅ 根据语言决定搜索字段 + if lang == "zh": + startswith_field = "chi_exp__istartswith" + contains_field = "chi_exp__icontains" + else: # 默认法语 + startswith_field = "proverb__istartswith" + contains_field = "proverb__icontains" + + # ✅ 1. 开头匹配 + start_matches = await ( + ProverbFr.filter(**{startswith_field: keyword}) + .order_by("-freq") + .limit(10) + .values("id", "proverb", "chi_exp") + ) + + # ✅ 2. 包含匹配(但不是开头) + contain_matches = await ( + ProverbFr.filter( + Q(**{contains_field: keyword}) & ~Q(**{startswith_field: keyword}) + ) + .order_by("-freq") + .limit(10) + .values("id", "proverb", "chi_exp") + ) + + # ✅ 合并结果(去重并保持顺序) + seen_ids = set() + for row in start_matches + contain_matches: + if row["id"] not in seen_ids: + seen_ids.add(row["id"]) + results.append({ + "id": row["id"], + "proverb": row["proverb"], + "chi_exp": row["chi_exp"] + }) + + return results + + +async def suggest_autocomplete(query: SearchRequest, limit: int = 10): + """ + + :param query: 当前用户输入的内容 + :param limit: 返回列表限制长度 + :return: 联想的单词列表(非完整信息,单纯单词) + """ + if query.language == 'fr': + query_word = normalize_text(query.query) + exact = await ( + WordlistFr + .get_or_none(search_text=query.query) + .values("text", "freq") + ) + if exact: + exact_word = [(exact.get("text"), exact.get("freq"))] + else: + exact_word = [] + + qs_prefix = ( + WordlistFr + .filter(Q(search_text__startswith=query_word) | Q(text__startswith=query.query)) + .exclude(search_text=query.query) + .only("text", "freq") + ) + prefix_objs = await qs_prefix[:limit] + prefix: List[Tuple[str, int]] = [(o.text, o.freq) for o in prefix_objs] + + need = max(0, limit - len(prefix)) + contains: List[Tuple[str, int]] = [] + + if need > 0: + qs_contain = ( + WordlistFr + .filter(Q(search_text__icontains=query_word) | Q(text__icontains=query.query)) + .exclude(Q(search_text__startswith=query_word) | Q(text__startswith=query.query) | Q(text=query.query)) + .only("text", "freq") + .only("text", "freq") + ) + contains_objs = await qs_contain[: need * 2] + contains = [(o.text, o.freq) for o in contains_objs] + + seen_text, out = set(), [] + for text, freq in list(exact_word) + list(prefix) + list(contains): + key = text + if key not in seen_text: + seen_text.add(key) + out.append((text, freq)) + if len(out) >= limit: + break + out = sorted(out, key=lambda w: (-w[2], len(w[0]), w[0])) + return [text for text, _ in out] + + else: + query_word = all_in_kana(query.query) + exact = await ( + WordlistJp + .get_or_none( + text=query.query + ) + .only("text", "hiragana", "freq") + ) + if exact: + exact_word = [(exact.text, exact.hiragana, exact.freq)] + else: + exact_word = [] + + qs_prefix = ( + WordlistJp + .filter(Q(hiragana__startswith=query_word) | Q(text__startswith=query.query)) + .exclude(text=query.query) + .only("text", "hiragana", "freq") + ) + prefix_objs = await qs_prefix[:limit] + prefix: List[Tuple[str, str, int]] = [(o.text, o.hiragana, o.freq) for o in prefix_objs] + + need = max(0, limit - len(prefix)) + contains: List[Tuple[str, str, int]] = [] + + if need > 0: + qs_contain = await ( + WordlistJp + .filter(Q(hiragana__icontains=query_word) | Q(text__icontains=query.query)) + .exclude(Q(hiragana__startswith=query_word) | Q(text__startswith=query.query) | Q(text=query.query)) + .only("text", "hiragana", "freq") + ) + contains_objs = qs_contain[:need * 2] + contains: List[Tuple[str, str, int]] = [(o.text, o.hiragana, o.freq) for o in contains_objs] + + seen_text, out = set(), [] + for text, hiragana, freq in list(exact_word) + list(prefix) + list(contains): + key = (text, hiragana) + if key not in seen_text: + seen_text.add(key) + out.append((text, hiragana, freq)) + if len(out) >= limit: + break + out = sorted(out, key=lambda w: (-w[2], len(w[0]), w[0])) + return [(text, hiragana) for text, hiragana, _ in out] + + +async def __test(): + query_word: str = '棋逢' + return await ( + suggest_proverb( + query=ProverbSearchRequest(query=query_word), + lang='zh' + ) + ) + + +async def __main(): + await Tortoise.init(config=TORTOISE_ORM) + print(await __test()) + + +if __name__ == '__main__': + asyncio.run(__main()) diff --git a/app/api/translator.py b/app/api/translator.py index d1eebbd..d1979b1 100644 --- a/app/api/translator.py +++ b/app/api/translator.py @@ -1,10 +1,9 @@ +import json +import random from typing import Tuple, Dict -import redis.asyncio as redis_asyncio import httpx -import random -import json - +import redis.asyncio as redis_asyncio from fastapi import APIRouter, Depends, HTTPException from app.models import User @@ -115,7 +114,7 @@ async def rate_limiter( raise HTTPException(status_code=429, detail=f"Too many requests") -@translator_router.post('/translate', response_model=TransResponse) +@translator_router.post('/translate', response_model=TransResponse, dependencies=[Depends(rate_limiter)]) async def translate( translate_request: TransRequest, user=Depends(get_current_user) diff --git a/app/article_teacher.py b/app/article_teacher.py deleted file mode 100644 index f5a54a1..0000000 --- a/app/article_teacher.py +++ /dev/null @@ -1,26 +0,0 @@ -import os - -from fastapi import APIRouter - -""" -# 背景 -你是一个人工智能助手,名字叫EduChat,是一个由华东师范大学开发的教育领域大语言模型。 -# 对话主题:作文指导 -## 作文指导主题的要求: -EduChat你需要扮演一位经验丰富的语文老师,现在需要帮助一位学生审阅作文并给出修改建议。请按照以下步骤进行: -整体评价:先对作文的整体质量进行简要评价,指出主要优点和需要改进的方向。 -亮点分析:具体指出作文中的亮点(如结构、描写、情感表达等方面的优点)。 -具体修改建议:针对作文中的不足,从以下几个方面提出具体修改建议,并给出修改后的示例: -语言表达:是否生动、准确?有无冗余或重复?可以如何优化? -细节描写:是否足够具体?能否加入更多感官描写(视觉、听觉、嗅觉、触觉等)使画面更立体? -情感表达:情感是否自然?能否更深入或升华? -结构布局:段落衔接是否自然?开头结尾是否呼应? (注意:每个建议点都要结合原文具体句子进行分析,并给出修改后的句子或段落作为示例) -写作技巧提示:提供2-3条实用的写作技巧(如动态描写公式、感官交织法等),帮助学生举一反三。 -修改效果总结:简要说明按照建议修改后,作文会有哪些方面的提升(如文学性、情感层次、场景沉浸感等)。 -请用亲切、鼓励的语气进行点评,保持专业性同时让学生易于接受。 -""" - -article_router = APIRouter() - -ECNU_API_KEY = os.getenv("ECNU_TEACH_AI_KEY") - diff --git a/app/core/email_utils.py b/app/core/email_utils.py index ad60a04..c402c5c 100644 --- a/app/core/email_utils.py +++ b/app/core/email_utils.py @@ -64,6 +64,5 @@ def main(receiver: str, code: int = 123456): if __name__ == '__main__': xza = "3480039769@qq.com" - bb = "1530799205@qq.com" me = "GodricTan@gmail.com" main(xza, code=123833) diff --git a/app/models/__init__.py b/app/models/__init__.py index e901846..310b788 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -1,5 +1,5 @@ from . import signals from .base import User from .comments import CommentFr, CommentJp -from .fr import WordlistFr, DefinitionFr, AttachmentFr -from .jp import WordlistJp, DefinitionJp, AttachmentJp, PosType +from .fr import WordlistFr, DefinitionFr, AttachmentFr, PronunciationTestFr +from .jp import WordlistJp, DefinitionJp, AttachmentJp, PosType, PronunciationTestJp diff --git a/app/models/base.py b/app/models/base.py index 5a97af5..b837f07 100644 --- a/app/models/base.py +++ b/app/models/base.py @@ -49,3 +49,22 @@ class Language(Model): id = fields.IntField(pk=True) name = fields.CharField(max_length=30, unique=True) # e.g. "Japanese" code = fields.CharField(max_length=10, unique=True) # e.g. "ja", "fr", "zh" + +class UserTestRecord(Model): + id = fields.IntField(pk=True) + user = fields.ForeignKeyField("models.User", related_name="test_records") + username = fields.CharField(max_length=20) + + language = fields.CharField(max_length=10) + total_sentences = fields.IntField() + average_score = fields.FloatField() + accuracy_score = fields.FloatField() + fluency_score = fields.FloatField() + completeness_score = fields.FloatField() + level = fields.CharField(max_length=20) + + raw_result = fields.JSONField() # 或 TextField 存 JSON 字符串 + created_at = fields.DatetimeField(auto_now_add=True) + + class Meta: + table = "user_test_record" diff --git a/app/models/fr.py b/app/models/fr.py index ab52c80..b3c959d 100644 --- a/app/models/fr.py +++ b/app/models/fr.py @@ -13,6 +13,7 @@ class WordlistFr(Model): attachments: fields.ReverseRelation["AttachmentFr"] freq = fields.IntField(default=0) # 词频排序用 search_text = fields.CharField(max_length=255, index=True) # 检索字段 + proverb = fields.ManyToManyField("models.ProverbFr", related_name="wordlists") # attachment = fields.ForeignKeyField("models.Attachment", related_name="wordlists", on_delete=fields.CASCADE) # source = fields.CharField(max_length=20, description="", null=True) @@ -41,3 +42,21 @@ class DefinitionFr(Model): example_varification = fields.BooleanField(default=False, description="例句是否审核") class Meta: table = "definitions_fr" + +class ProverbFr(Model): + id = fields.IntField(pk=True) + proverb = fields.TextField(description="法语谚语及常用表达") + chi_exp = fields.TextField(description="中文释义") + freq = fields.IntField(default=0) + created_at = fields.DatetimeField(auto_now_add=True) + updated_at = fields.DatetimeField(auto_now=True) + + class Meta: + table = "proverb_fr" + +class PronunciationTestFr(Model): + id = fields.IntField(pk=True) + text = fields.TextField(description="朗读文段") + + class Meta: + table = "pronunciationtest_fr" \ No newline at end of file diff --git a/app/models/jp.py b/app/models/jp.py index 15cf90f..68a207c 100644 --- a/app/models/jp.py +++ b/app/models/jp.py @@ -1,13 +1,13 @@ from __future__ import annotations -from enum import Enum -from app.schemas.admin_schemas import PosEnumJp +from typing import Tuple, TypeVar import pandas as pd -from tortoise.exceptions import DoesNotExist, MultipleObjectsReturned -from tortoise.models import Model from tortoise import fields -from typing import Tuple, TYPE_CHECKING, TypeVar, Type, Optional +from tortoise.exceptions import DoesNotExist +from tortoise.models import Model + +from app.schemas.admin_schemas import PosEnumJp sheet_name_jp = "日汉释义" @@ -80,3 +80,10 @@ class PosType(Model): class Meta: table = "pos_type" + +class PronunciationTestJp(Model): + id = fields.IntField(pk=True) + text = fields.TextField(description="朗读文段") + + class Meta: + table = "pronunciationtest_jp" diff --git a/app/utils/audio_init.py b/app/utils/audio_init.py new file mode 100644 index 0000000..f947f91 --- /dev/null +++ b/app/utils/audio_init.py @@ -0,0 +1,13 @@ +import os + +import ffprobe8_binaries # 或 ffprobe_binaries_only +from imageio_ffmpeg import get_ffmpeg_exe +from pydub import AudioSegment + +ffprobe_path = os.path.join(os.path.dirname(ffprobe8_binaries.__file__), "bin", "ffprobe") + +AudioSegment.converter = get_ffmpeg_exe() +AudioSegment.ffprobe = ffprobe_path # 👈 指定 ffprobe 路径 + +print(f"[INIT] ffmpeg: {AudioSegment.converter}") +print(f"[INIT] ffprobe: {AudioSegment.ffprobe}") \ No newline at end of file diff --git a/app/utils/autocomplete.py b/app/utils/autocomplete.py index d25e564..d9c8904 100644 --- a/app/utils/autocomplete.py +++ b/app/utils/autocomplete.py @@ -4,8 +4,8 @@ from typing import List, Literal, Tuple from tortoise import Tortoise from tortoise.expressions import Q +from app.api.search_dict.search_schemas import SearchRequest from app.models import WordlistFr, WordlistJp -from app.schemas.search_schemas import SearchRequest from app.utils.all_kana import all_in_kana from app.utils.textnorm import normalize_text from settings import TORTOISE_ORM diff --git a/main.py b/main.py index be2099f..ec10dc8 100644 --- a/main.py +++ b/main.py @@ -5,13 +5,14 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from tortoise.contrib.fastapi import register_tortoise -import app.models.signals +import app.utils.audio_init from app.api.admin.router import admin_router from app.api.ai_assist.routes import ai_router +from app.api.article_director.routes import article_router from app.api.make_comments.routes import comment_router from app.api.pronounciation_test.routes import pron_test_router from app.api.redis_test import redis_test_router -from app.api.search import dict_search +from app.api.search_dict.routes import dict_search from app.api.translator import translator_router from app.api.user.routes import users_router from app.api.word_comment.routes import word_comment_router @@ -62,7 +63,9 @@ app.include_router(comment_router, tags=["Comment API"]) app.include_router(word_comment_router, tags=["Word Comment API"], prefix="/comment/word") -app.include_router(pron_test_router, tags=["Pron Test API"], prefix="/test") +app.include_router(pron_test_router, tags=["Pron Test API"], prefix="/test/pron") + +app.include_router(article_router, tags=["Article API"]) if __name__ == "__main__": uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True) diff --git a/pyproject.toml b/pyproject.toml index 2183a4a..cd6bba9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,3 +66,7 @@ dependencies = [ tortoise_orm = "settings.TORTOISE_ORM" location = "./migrations" src_folder = "./." + +[tool.uv.sources] +[tool.uv.sources.default] +url = "https://pypi.tuna.tsinghua.edu.cn/simple" diff --git a/requirements.txt b/requirements.txt index a8e7439..d85baf0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +pydub +imageio-ffmpeg aerich==0.9.1 aiosqlite==0.21.0 annotated-types==0.7.0 @@ -5,6 +7,8 @@ anyio==4.10.0 async-timeout==5.0.1 asyncclick==8.2.2.2 asyncmy==0.2.10 +azure-cognitiveservices-speech==1.46.0 +azure-core==1.36.0 bcrypt==4.3.0 certifi==2025.8.3 cffi==1.17.1 @@ -46,6 +50,7 @@ six==1.17.0 sniffio==1.3.1 starlette==0.47.2 tortoise-orm==0.25.1 +types-pytz==2025.2.0.20250809 typing-inspection==0.4.1 typing_extensions==4.14.1 tzdata==2025.2 diff --git a/scripts/fr/__init__.py b/scripts/fr/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/fr/import_proverb.py b/scripts/fr/import_proverb.py new file mode 100644 index 0000000..fb874d8 --- /dev/null +++ b/scripts/fr/import_proverb.py @@ -0,0 +1,41 @@ +import asyncio +from pathlib import Path + +import pandas as pd +from tortoise import Tortoise + +from app.models.fr import ProverbFr +from settings import TORTOISE_ORM + +__xlsx_name = "../DictTable_20251029.xlsx" +__table_name = "法语谚语常用表达" + + +class FrProverb: + def __init__(self, __xlsx_name, __table_name): + self.__xlsx_name = __xlsx_name + self.__table_name = __table_name + + async def get_proverb(self) -> None: + df = pd.read_excel(Path(self.__xlsx_name), sheet_name=self.__table_name) + df.columns = [col.strip() for col in df.columns] + + for row in df.itertuples(): + proverb = str(row.法语谚语常用表达).strip() + chi_exp = str(row.中文释义).strip() + + cls_proverb, created = await ProverbFr.get_or_create(proverb=proverb, chi_exp=chi_exp) + if not created: + print(f"{proverb} 已存在!位于第{row.index}行") + + async def build_connection(self): + pass + + +async def main(): + await Tortoise.init(config=TORTOISE_ORM) + proverb = FrProverb(__xlsx_name, __table_name) + await proverb.get_proverb() + +if __name__ == '__main__': + asyncio.run(main()) \ No newline at end of file diff --git a/scripts/update_fr.py b/scripts/update_fr.py index f1dd767..243aefb 100644 --- a/scripts/update_fr.py +++ b/scripts/update_fr.py @@ -1,6 +1,5 @@ import asyncio from pathlib import Path -from tkinter.scrolledtext import example import pandas as pd from tortoise import Tortoise, connections diff --git a/settings.py b/settings.py index df4bf2c..2e27ff2 100644 --- a/settings.py +++ b/settings.py @@ -1,5 +1,11 @@ +from pathlib import Path + from pydantic.v1 import BaseSettings +# 计算项目根目录:假设 settings.py 位于 dict_server/settings.py +BASE_DIR = Path(__file__).resolve().parent +ROOT_DIR = BASE_DIR # 如果 settings.py 就在根目录,否则改成 BASE_DIR.parent + TORTOISE_ORM = { 'connections': { "default": "mysql://local_admin:enterprise@127.0.0.1:3306/dict", @@ -51,8 +57,25 @@ class Settings(BaseSettings): BAIDU_APPKEY: str REDIS_URL: str + AES_SECRET_KEY: str + + SMTP_HOST: str + SMTP_PORT: int + SMTP_USER: str + SMTP_PASS: str + SMTP_SENDER_NAME: str + + RESET_SECRET_KEY: str + + AI_ASSIST_KEY: str + + ECNU_TEACH_AI_KEY: str + + AZURE_SUBSCRIPTION_KEY: str + class Config: - env_file = '.env' + env_file = ROOT_DIR / '.env' + case_sensitive = False settings = Settings()