pdf_code/zzb_data_word/redis_insert.py

199 lines
7.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import mysql.connector
import utils
#from config import MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, MYSQL_DB
import redis_service
import redis
def process_excel_and_db(input_excel_path1, input_excel_path2, output_file_path):
# 读取第一个 Excel 文件
df = pd.read_excel(input_excel_path1, sheet_name='Sheet7', header=0)#对应ttt表
# 将 DataFrame 转换为字典列表
data_list = df.to_dict(orient='records')
# 连接到 MySQL 数据库
conn = mysql.connector.connect(
host=MYSQL_HOST,
user=MYSQL_USER,
password=MYSQL_PASSWORD,
database=MYSQL_DB
)
cursor = conn.cursor()
# 插入数据到 measure_create_config 表
insert_query = '''
INSERT INTO measure_create_config
(config_id, meta_measure, same_mean_measure, measure_period, change_type, black_list)
VALUES (%s, %s, %s, %s, %s, %s)
'''
for data in data_list:
show_measure = str(data['指标'])
same_mean_measure = str(data['同义表述'])
period_measure = str(data['周期'])
change_measure = str(data['变动'])
black_list = str(data['黑名单词'])
config_id = utils.get_md5(show_measure)
insert_query_data = (config_id, show_measure, same_mean_measure, period_measure, change_measure, black_list)
cursor.execute(insert_query, insert_query_data)
conn.commit()
# 读取第二个 Excel 文件
df_period = pd.read_excel(input_excel_path2, sheet_name='Sheet11', header=0)#对应周期表
# 将 DataFrame 转换为字典列表
period_list = df_period.to_dict(orient='records')
# 插入数据到 measure_create_period 表
period_insert_query = '''
INSERT INTO measure_create_period
(period_name, same_mean_period)
VALUES (%s, %s)
'''
for data in period_list:
period_name = str(data['标准表述'])
same_mean_period = str(data['同义表述'])
insert_query_data = (period_name, same_mean_period)
cursor.execute(period_insert_query, insert_query_data)
conn.commit()
# 查询数据库
data_query = '''
SELECT * FROM measure_create_config WHERE delete_status = 0
'''
period_query = '''
SELECT * FROM measure_create_period
'''
cursor.execute(data_query)
data_list = cursor.fetchall()
cursor.execute(period_query)
period_list = cursor.fetchall()
# 输出到文件
with open(output_file_path, 'w', encoding='utf-8') as file:
for data in data_list:
config_id = data[0]
show_measure = data[1]
same_mean_measure = data[2]
period_measure = data[3]
change_measure = data[4]
same_mean_measure_arr = []
period_measure_arr = []
change_measure_arr = []
if same_mean_measure != 'nan':
same_mean_measure_arr = same_mean_measure.split(',')
same_mean_measure_arr.append(show_measure)
if period_measure != 'nan':
period_measure_arr = period_measure.split(',')
if change_measure != 'nan':
change_measure_arr = change_measure.split(',')
for c in change_measure_arr:
period_measure_arr.append(c)
for x in period_measure_arr:
if x in change_measure_arr:
show_name = show_measure + x
else:
show_name = x + show_measure
for y in same_mean_measure_arr:
if x in change_measure:
parser_name = y + x
else:
parser_name = x + y
file.write(f'{show_name},{parser_name}\n')
for p in period_list:
period_exra_name = p[0]
period_exra_value = p[1]
if period_exra_name in x:
for v in period_exra_value.split(','):
if x in change_measure:
parser_name = y + x.replace(period_exra_name, v)
else:
parser_name = x.replace(period_exra_name, v) + y
file.write(f'{show_name},{parser_name}\n')
cursor.close()
conn.close()
def measure_config_to_db(conn, cursor, file_path):
insert_query = '''
INSERT INTO measure_config_third_quarter
(measure_id, measure_name, ori_measure_id, ori_measure_name)
VALUES (%s, %s, %s, %s)
'''
check_query = '''
SELECT ori_measure_id FROM measure_config_third_quarter
'''
# 打开文本文件
with open(file_path, 'r', encoding='utf-8') as file:
# 读取所有行到一个列表中
lines = file.readlines()
# 打印每一行
for line in lines:
config_list = line.strip().split(',')
measure = config_list[0]
ori_measure = config_list[1]
ori_measure_id = utils.get_md5(ori_measure)
# 判断数据库中是否有数据
cursor.execute(check_query)
check_records = cursor.fetchall()
#if any(record[0] == ori_measure_id for record in check_records):
# continue
data_to_insert = (utils.get_md5(measure), measure, ori_measure_id, ori_measure)
cursor.execute(insert_query, data_to_insert)
conn.commit()
def insert_measure_vector(conn,cursor):
redis_client = redis.Redis(host='192.168.0.172', port=6379, password='Xgf_redis', db=6)# 192.168.0.172 #测试123.60.153.169
# 执行SQL语句更新数据
select_query = '''
SELECT ori_measure_id,ori_measure_name FROM measure_config_1024
'''
cursor.execute(select_query)
records = cursor.fetchall()
for record in records:
if redis_client.hexists('measure_config', record[0]):
measure_vector = redis_client.hget('measure_config', record[0])
else:
print('新增指标',record[1])
vector_obj = utils.embed_with_str(record[1])
measure_vector = str(vector_obj.output["embeddings"][0]["embedding"])
redis_client.hset('measure_config', record[0], measure_vector)
redis_client.close()
conn.close()
#from config import MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, MYSQL_DB
if __name__ == "__main__":
MYSQL_HOST = '121.37.185.246'
MYSQL_PORT = 3306
MYSQL_USER = 'financial'
MYSQL_PASSWORD = 'financial_8000'
MYSQL_DB = 'financial_report'
# 需要先清空本地数据库的 measure_create_config 和 measure_create_period 表
process_excel_and_db(
'ttt_1.xlsx',#ttt文件
'period_1.xlsx',#period文件
'out_2022_new_year.txt'#输出文件
)
conn = mysql.connector.connect(
host=MYSQL_HOST,
user=MYSQL_USER,
password=MYSQL_PASSWORD,
database=MYSQL_DB
)
cursor = conn.cursor()
file_path = 'out_2022_new_year.txt'
measure_config_to_db(conn, cursor, file_path)
insert_measure_vector(conn,cursor)