pdf_code/zzb_data_prod/excel.py

121 lines
4.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import json
import utils
from config import MYSQL_HOST,MYSQL_USER,MYSQL_PASSWORD,MYSQL_DB
import mysql.connector
# 读取 Excel 文件
df = pd.read_excel('/Users/zhengfei/Desktop/ttt.xlsx', header=0)
# 将 DataFrame 转换为字典列表
data_list = df.to_dict(orient='records')
period_exra_arr =['当期:本期,本报告期,报告期,报告期内,本年度,本期发生额,2023年,2023年全年,2023年金额','上年同期:上期,上年度,2022年,2022年全年,2022年金额','前年同期:2021年,2021年全年,2021年金额','同比变动:同比增减,同比上升,同比下降,变化幅度,变动比例,本期比上年同期增减,本年比上年增减','报告期末:本报告期末,期末,期末数,期末金额,2023年年末,2023年12月31日','年初至报告期末:上年年末,上年末,2022年年末,2022年12月31日','报告期初:期初,期初数,期初金额,2023年1月1日','当期第一季度:第一季度,1-3月,第一季度1-3月,2023年第一季度','当期第二季度:第二季度,4-6月,第二季度4-6月,2023年第二季度','当期第三季度:第三季度,7-9月,第三季度7-9月,2023年第三季度','当期第四季度:第四季度,10-12月,第四季度10-12月,2023年第四季度']
year = 2023
conn = mysql.connector.connect(
host = MYSQL_HOST,
user = MYSQL_USER,
password = MYSQL_PASSWORD,
database = MYSQL_DB
)
# 创建一个cursor对象来执行SQL语句
cursor = conn.cursor()
# insert_query = '''
# INSERT INTO measure_create_config
# (config_id, meta_measure, same_mean_measure, measure_period, change_type, black_list)
# VALUES (%s, %s, %s, %s, %s, %s)
# '''
# for data in data_list:
# show_measure = str(data['指标'])
# same_mean_measure = str(data['同义表述'])
# period_measure = str(data['周期'])
# change_measure = str(data['变动'])
# black_list = str(data['黑名单词'])
# config_id = utils.get_md5(show_measure)
# insert_query_data = (config_id, show_measure, same_mean_measure, period_measure, change_measure, black_list)
# cursor.execute(insert_query, insert_query_data)
# conn.commit()
# 读取 Excel 文件
# df_period = pd.read_excel('/Users/zhengfei/Desktop/period.xlsx', header=0)
# # 将 DataFrame 转换为字典列表
# period_list = df_period.to_dict(orient='records')
# period_insert_query = '''
# INSERT INTO measure_create_period
# (period_name, same_mean_period)
# VALUES (%s, %s)
# '''
# for data in period_list:
# period_name = str(data['标准表述'])
# same_mean_period = str(data['同义表述'])
# insert_query_data = (period_name, same_mean_period)
# cursor.execute(period_insert_query, insert_query_data)
# conn.commit()
data_query = '''
SELECT * FROM measure_create_config where delete_status = 0
'''
period_query = '''
SELECT * FROM measure_create_period
'''
cursor.execute(data_query)
data_list = cursor.fetchall()
cursor.execute(period_query)
period_list = cursor.fetchall()
for data in data_list:
config_id = data[0]
show_measure = data[1]
same_mean_measure = data[2]
period_measure = data[3]
change_measure = data[4]
same_mean_measure_arr = []
period_measure_arr = []
change_measure_arr = []
if same_mean_measure != 'nan' :
same_mean_measure_arr = same_mean_measure.split(',')
if period_measure != 'nan' :
period_measure_arr = period_measure.split(',')
if change_measure != 'nan' :
change_measure_arr = change_measure.split(',')
for c in change_measure_arr:
period_measure_arr.append(c)
for x in period_measure_arr:
if x in change_measure_arr:
show_name = show_measure+x
else:
show_name = x+show_measure
for y in same_mean_measure_arr:
if x in change_measure:
parser_name = y+x
else:
parser_name = x+y
print(f'{show_name},{parser_name}')
for p in period_list:
period_exra_name = p[0]
period_exra_value = p[1]
if x.find(period_exra_name) != -1:
for v in period_exra_value.split(','):
if x in change_measure:
parser_name = y + x.replace(period_exra_name, v)
else:
parser_name = x.replace(period_exra_name, v) + y
print(f'{show_name},{parser_name}')
cursor.close()
conn.close()