199 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			Python
		
	
	
	
		
		
			
		
	
	
			199 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			Python
		
	
	
	
|  | import pandas as pd | |||
|  | import mysql.connector | |||
|  | import utils | |||
|  | #from config import MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, MYSQL_DB | |||
|  | import redis_service | |||
|  | import redis | |||
|  | 
 | |||
|  | def process_excel_and_db(input_excel_path1, input_excel_path2, output_file_path): | |||
|  |     # 读取第一个 Excel 文件 | |||
|  |     df = pd.read_excel(input_excel_path1, sheet_name='Sheet7', header=0)#对应ttt表 | |||
|  |     # 将 DataFrame 转换为字典列表 | |||
|  |     data_list = df.to_dict(orient='records') | |||
|  | 
 | |||
|  |     # 连接到 MySQL 数据库 | |||
|  |     conn = mysql.connector.connect( | |||
|  |         host=MYSQL_HOST, | |||
|  |         user=MYSQL_USER, | |||
|  |         password=MYSQL_PASSWORD, | |||
|  |         database=MYSQL_DB | |||
|  |     ) | |||
|  |     cursor = conn.cursor() | |||
|  | 
 | |||
|  |     # 插入数据到 measure_create_config 表 | |||
|  |     insert_query = '''
 | |||
|  |         INSERT INTO measure_create_config | |||
|  |         (config_id, meta_measure, same_mean_measure, measure_period, change_type, black_list)  | |||
|  |         VALUES (%s, %s, %s, %s, %s, %s) | |||
|  |     '''
 | |||
|  |     for data in data_list: | |||
|  |         show_measure = str(data['指标']) | |||
|  |         same_mean_measure = str(data['同义表述']) | |||
|  |         period_measure = str(data['周期']) | |||
|  |         change_measure = str(data['变动']) | |||
|  |         black_list = str(data['黑名单词']) | |||
|  |         config_id = utils.get_md5(show_measure) | |||
|  |         insert_query_data = (config_id, show_measure, same_mean_measure, period_measure, change_measure, black_list) | |||
|  |         cursor.execute(insert_query, insert_query_data) | |||
|  |         conn.commit() | |||
|  | 
 | |||
|  |     # 读取第二个 Excel 文件 | |||
|  |     df_period = pd.read_excel(input_excel_path2, sheet_name='Sheet11', header=0)#对应周期表 | |||
|  |     # 将 DataFrame 转换为字典列表 | |||
|  |     period_list = df_period.to_dict(orient='records') | |||
|  | 
 | |||
|  |     # 插入数据到 measure_create_period 表 | |||
|  |     period_insert_query = '''
 | |||
|  |         INSERT INTO measure_create_period | |||
|  |         (period_name, same_mean_period)  | |||
|  |         VALUES (%s, %s) | |||
|  |     '''
 | |||
|  |     for data in period_list: | |||
|  |         period_name = str(data['标准表述']) | |||
|  |         same_mean_period = str(data['同义表述']) | |||
|  |         insert_query_data = (period_name, same_mean_period) | |||
|  |         cursor.execute(period_insert_query, insert_query_data) | |||
|  |         conn.commit() | |||
|  | 
 | |||
|  |     # 查询数据库 | |||
|  |     data_query = '''
 | |||
|  |         SELECT * FROM measure_create_config WHERE delete_status = 0 | |||
|  |     '''
 | |||
|  |     period_query = '''
 | |||
|  |         SELECT * FROM measure_create_period | |||
|  |     '''
 | |||
|  | 
 | |||
|  |     cursor.execute(data_query) | |||
|  |     data_list = cursor.fetchall() | |||
|  | 
 | |||
|  |     cursor.execute(period_query) | |||
|  |     period_list = cursor.fetchall() | |||
|  | 
 | |||
|  |     # 输出到文件 | |||
|  |     with open(output_file_path, 'w', encoding='utf-8') as file: | |||
|  |         for data in data_list: | |||
|  |             config_id = data[0] | |||
|  |             show_measure = data[1] | |||
|  |             same_mean_measure = data[2] | |||
|  |             period_measure = data[3] | |||
|  |             change_measure = data[4] | |||
|  |             same_mean_measure_arr = [] | |||
|  |             period_measure_arr = [] | |||
|  |             change_measure_arr = [] | |||
|  | 
 | |||
|  |             if same_mean_measure != 'nan': | |||
|  |                 same_mean_measure_arr = same_mean_measure.split(',') | |||
|  |                 same_mean_measure_arr.append(show_measure) | |||
|  |             if period_measure != 'nan': | |||
|  |                 period_measure_arr = period_measure.split(',') | |||
|  |             if change_measure != 'nan': | |||
|  |                 change_measure_arr = change_measure.split(',') | |||
|  | 
 | |||
|  |             for c in change_measure_arr: | |||
|  |                 period_measure_arr.append(c) | |||
|  | 
 | |||
|  |             for x in period_measure_arr: | |||
|  |                 if x in change_measure_arr: | |||
|  |                     show_name = show_measure + x | |||
|  |                 else: | |||
|  |                     show_name = x + show_measure | |||
|  |                 for y in same_mean_measure_arr: | |||
|  |                     if x in change_measure: | |||
|  |                         parser_name = y + x | |||
|  |                     else: | |||
|  |                         parser_name = x + y | |||
|  |                      | |||
|  |                     file.write(f'{show_name},{parser_name}\n') | |||
|  |                      | |||
|  |                     for p in period_list: | |||
|  |                         period_exra_name = p[0] | |||
|  |                         period_exra_value = p[1] | |||
|  |                         if period_exra_name in x: | |||
|  |                             for v in period_exra_value.split(','): | |||
|  |                                 if x in change_measure: | |||
|  |                                     parser_name = y + x.replace(period_exra_name, v) | |||
|  |                                 else: | |||
|  |                                     parser_name = x.replace(period_exra_name, v) + y | |||
|  |                                 file.write(f'{show_name},{parser_name}\n') | |||
|  | 
 | |||
|  |     cursor.close() | |||
|  |     conn.close() | |||
|  | 
 | |||
|  | def measure_config_to_db(conn, cursor, file_path): | |||
|  |     insert_query = '''
 | |||
|  |                 INSERT INTO measure_config_third_quarter | |||
|  |                 (measure_id, measure_name, ori_measure_id, ori_measure_name)  | |||
|  |                 VALUES (%s, %s, %s, %s) | |||
|  |                 '''
 | |||
|  |     check_query = '''
 | |||
|  |                 SELECT ori_measure_id FROM measure_config_third_quarter | |||
|  |                 '''
 | |||
|  |      | |||
|  |     # 打开文本文件 | |||
|  |     with open(file_path, 'r', encoding='utf-8') as file: | |||
|  |         # 读取所有行到一个列表中 | |||
|  |         lines = file.readlines() | |||
|  | 
 | |||
|  |     # 打印每一行 | |||
|  |     for line in lines: | |||
|  |         config_list = line.strip().split(',') | |||
|  |         measure = config_list[0] | |||
|  |         ori_measure = config_list[1] | |||
|  |         ori_measure_id = utils.get_md5(ori_measure) | |||
|  |          | |||
|  |         # 判断数据库中是否有数据 | |||
|  |         cursor.execute(check_query) | |||
|  |         check_records = cursor.fetchall() | |||
|  |         #if any(record[0] == ori_measure_id for record in check_records): | |||
|  |          #   continue | |||
|  |          | |||
|  |         data_to_insert = (utils.get_md5(measure), measure, ori_measure_id, ori_measure) | |||
|  |         cursor.execute(insert_query, data_to_insert) | |||
|  |         conn.commit() | |||
|  | 
 | |||
|  | def insert_measure_vector(conn,cursor): | |||
|  | 
 | |||
|  |     redis_client = redis.Redis(host='192.168.0.172', port=6379, password='Xgf_redis', db=6)# 192.168.0.172 #测试123.60.153.169 | |||
|  |     # 执行SQL语句,更新数据 | |||
|  |     select_query = '''
 | |||
|  |                 SELECT ori_measure_id,ori_measure_name FROM measure_config_1024 | |||
|  |                 '''
 | |||
|  |     cursor.execute(select_query) | |||
|  |     records = cursor.fetchall() | |||
|  |     for record in records: | |||
|  |         if redis_client.hexists('measure_config', record[0]): | |||
|  |             measure_vector = redis_client.hget('measure_config', record[0]) | |||
|  |         else: | |||
|  |             print('新增指标',record[1]) | |||
|  |             vector_obj = utils.embed_with_str(record[1]) | |||
|  |             measure_vector = str(vector_obj.output["embeddings"][0]["embedding"]) | |||
|  | 
 | |||
|  |         redis_client.hset('measure_config', record[0], measure_vector) | |||
|  |     redis_client.close() | |||
|  |     conn.close() | |||
|  | #from config import MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, MYSQL_DB | |||
|  | if __name__ == "__main__": | |||
|  |     MYSQL_HOST = '121.37.185.246' | |||
|  |     MYSQL_PORT = 3306 | |||
|  |     MYSQL_USER = 'financial'  | |||
|  |     MYSQL_PASSWORD = 'financial_8000' | |||
|  |     MYSQL_DB = 'financial_report' | |||
|  |     # 需要先清空本地数据库的  measure_create_config 和   measure_create_period   表 | |||
|  | 
 | |||
|  |     process_excel_and_db( | |||
|  |        'ttt_1.xlsx',#ttt文件 | |||
|  |        'period_1.xlsx',#period文件 | |||
|  |        'out_2022_new_year.txt'#输出文件 | |||
|  |     ) | |||
|  |     conn = mysql.connector.connect( | |||
|  |         host=MYSQL_HOST, | |||
|  |         user=MYSQL_USER, | |||
|  |         password=MYSQL_PASSWORD, | |||
|  |         database=MYSQL_DB | |||
|  |     ) | |||
|  |     cursor = conn.cursor() | |||
|  |     file_path = 'out_2022_new_year.txt' | |||
|  |     measure_config_to_db(conn, cursor, file_path) | |||
|  |     insert_measure_vector(conn,cursor) | |||
|  | 
 |