pdf_code/zzb_data_word/sports_health.py

63 lines
1.8 KiB
Python

import pandas as pd
import json
import utils
import mysql.connector
conn = mysql.connector.connect(
host = 'rm-bp1vns6jjy6yu46lhio.mysql.rds.aliyuncs.com',
user = 'hematiyu',
password = '00a09f971769499f8c0495505ab0922C',
database = 'ai_chat_mgmt_test'
)
# 创建一个cursor对象来执行SQL语句
cursor = conn.cursor()
excel_file_path = '/Users/zhengfei/Desktop/healthy_book.xlsx'
# 读取Excel文件
xls = pd.ExcelFile(excel_file_path)
# 遍历每个sheet
for sheet_name in xls.sheet_names:
# 读取sheet内容
df = pd.read_excel(xls, sheet_name, header=0)
# 将 DataFrame 转换为字典列表
data_list = df.to_dict(orient='records')
insert_query = '''
INSERT INTO ai_chat_book_info
(name, publish, author, isbn, pub_time, word_flag, category, keywords)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
'''
for data in data_list:
name = str(data['书名'])
if name == 'nan':
continue
publish = str(data['出版单位'])
author = str(data['作者']).replace('[', '')
isbn = str(data['ISBN'])
pub_time = str(data['年份'])
if pub_time == 'NaT':
pub_time = ''
else:
#提取前四位
pub_time = pub_time[:4]
word_flag = str(data['是否转换为word格式'])
if word_flag == 'nan':
word_flag = ''
category = str(data['分类'])
if category == 'nan':
category = ''
keywords = str(data['关键词'])
if keywords == 'nan':
keywords = ''
insert_query_data = (name, publish, author, isbn, pub_time, word_flag, category, keywords)
cursor.execute(insert_query, insert_query_data)
conn.commit()
cursor.close()
conn.close()