pdf_code/zzb_data_word/get_keyword.py

72 lines
1.7 KiB
Python

import pandas as pd
import json
import utils
from config_p import MYSQL_HOST,MYSQL_USER,MYSQL_PASSWORD,MYSQL_DB
import mysql.connector
def getId(name):
categorys = [
{
name: "术后康复",
id: 1
},
{
name: "运动损伤康复",
id: 2
},
{
name: "慢病康复",
id: 3
},
{
name: "运动训练",
id: 4
},
{
name: "健康科普",
id: 5
},
]
object_dict = {obj[name]: obj[id] for obj in categorys}
if name in object_dict:
return object_dict[name]
else:
return 6
# 读取 Excel 文件
df = pd.read_excel('/Users/zhengfei/Desktop/book.xlsx', header=0)
# 将 DataFrame 转换为字典列表
data_list = df.to_dict(orient='records')
conn = mysql.connector.connect(
host = 'rm-bp1vns6jjy6yu46lhio.mysql.rds.aliyuncs.com',
user = 'hematiyu',
password = '00a09f971769499f8c0495505ab0922C',
database = 'km'
)
# 创建一个cursor对象来执行SQL语句
cursor = conn.cursor()
for data in data_list:
print(data)
book_name = str(data['书名']).replace('\n', '')
category = str(data['分类'])
category_name = category.split(',')[0]
category = getId(category_name)
keywords = str(data['关键词'])
if keywords == 'nan':
keywords = ''
insert_query = '''
update km_doc set category = {category},keywords = '{keywords}',source = 1 where title = '{book_name}'
'''.format(book_name=book_name, category=category, keywords=keywords)
print(insert_query)
cursor.execute(insert_query)
conn.commit()
cursor.close()
conn.close()