You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
84 lines
3.2 KiB
84 lines
3.2 KiB
import json
|
|
import pymysql
|
|
import os
|
|
|
|
# 数据库配置
|
|
DB_CONFIG = {
|
|
"host": "39.108.252.248",
|
|
"port": 3306,
|
|
"user": "root",
|
|
"password": "Zp.123456",
|
|
"database": "yd_gzlps_test",
|
|
"charset": "utf8mb4"
|
|
}
|
|
|
|
VOCAB_DIR = r"d:\Projects\US_RPT_QA\data\vocab"
|
|
|
|
def migrate():
|
|
# 建立数据库连接
|
|
try:
|
|
conn = pymysql.connect(**DB_CONFIG)
|
|
cursor = conn.cursor()
|
|
print("🚀 已连接到数据库,准备开始迁移数据...")
|
|
except Exception as e:
|
|
print(f"❌ 无法连接到数据库: {e}")
|
|
return
|
|
|
|
try:
|
|
count = 0
|
|
# 1. 迁移 L3 纠错对照库
|
|
l3_path = os.path.join(VOCAB_DIR, "l3_mapping.json")
|
|
if os.path.exists(l3_path):
|
|
with open(l3_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
for raw, correct in data.items():
|
|
sql = "INSERT IGNORE INTO ai_qc_vocabulary (raw_text, correct_text, vocab_type, status) VALUES (%s, %s, 'L3', 'approved')"
|
|
cursor.execute(sql, (raw, correct))
|
|
count += cursor.rowcount
|
|
print(f"--- L3 纠错库处理完成")
|
|
|
|
# 2. 迁移 Pinyin 映射库
|
|
py_path = os.path.join(VOCAB_DIR, "pinyin_map.json")
|
|
if os.path.exists(py_path):
|
|
with open(py_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
for raw, correct in data.items():
|
|
sql = "INSERT IGNORE INTO ai_qc_vocabulary (raw_text, correct_text, vocab_type, status) VALUES (%s, %s, 'Pinyin', 'approved')"
|
|
cursor.execute(sql, (raw, correct))
|
|
count += cursor.rowcount
|
|
print(f"--- Pinyin 映射库处理完成")
|
|
|
|
# 3. 迁移 L1 标准术语库
|
|
l1_path = os.path.join(VOCAB_DIR, "l1_standard.json")
|
|
if os.path.exists(l1_path):
|
|
with open(l1_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
for word in data:
|
|
# L1这种白名单,raw和correct存一样
|
|
sql = "INSERT IGNORE INTO ai_qc_vocabulary (raw_text, correct_text, vocab_type, status) VALUES (%s, %s, 'L1', 'approved')"
|
|
cursor.execute(sql, (word, word))
|
|
count += cursor.rowcount
|
|
print(f"--- L1 标准库处理完成")
|
|
|
|
# 4. 迁移 L2 本院特色术语库
|
|
l2_path = os.path.join(VOCAB_DIR, "l2_hospital.json")
|
|
if os.path.exists(l2_path):
|
|
with open(l2_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
for word in data:
|
|
sql = "INSERT IGNORE INTO ai_qc_vocabulary (raw_text, correct_text, vocab_type, status) VALUES (%s, %s, 'L2', 'approved')"
|
|
cursor.execute(sql, (word, word))
|
|
count += cursor.rowcount
|
|
print(f"--- L2 本院库处理完成")
|
|
|
|
conn.commit()
|
|
print(f"✅ 迁移任务圆满完成!共成功导入/更新 {count} 条词项到 ai_qc_vocabulary 表。")
|
|
|
|
except Exception as e:
|
|
conn.rollback()
|
|
print(f"❌ 迁移过程中发生错误: {e}")
|
|
finally:
|
|
conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
migrate()
|
|
|