2025-06-11 10:49:55 +08:00
|
|
|
|
# 获取 标签的消息 写入数据库
|
|
|
|
|
|
import pika
|
|
|
|
|
|
import json
|
|
|
|
|
|
import logging, time
|
|
|
|
|
|
from config import *
|
|
|
|
|
|
import pymysql
|
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
|
import datetime
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
def message_callback(ch, method, properties, body):
|
|
|
|
|
|
"""消息处理回调函数"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = json.loads(body)
|
|
|
|
|
|
news_score = data.get('news_score', -1)
|
|
|
|
|
|
if news_score < 0:
|
|
|
|
|
|
ch.basic_ack(delivery_tag=method.delivery_tag)
|
|
|
|
|
|
return
|
|
|
|
|
|
# 在此处添加业务处理逻辑 写入mysql数据库
|
|
|
|
|
|
write_to_mysql(data)
|
|
|
|
|
|
# 数据写入es
|
|
|
|
|
|
write_to_es(data)
|
|
|
|
|
|
|
|
|
|
|
|
# 数据写入资讯精选表
|
|
|
|
|
|
write_to_news(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 手动确认消息
|
|
|
|
|
|
ch.basic_ack(delivery_tag=method.delivery_tag)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"消息处理失败: {str(e)}")
|
|
|
|
|
|
# 拒绝消息并重新入队
|
|
|
|
|
|
ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False)
|
|
|
|
|
|
|
2026-03-05 17:34:29 +08:00
|
|
|
|
def prepare_db_value(value):
|
|
|
|
|
|
"""准备数据库值:空列表和None都转为None,其他情况JSON序列化"""
|
|
|
|
|
|
if value is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
if isinstance(value, list):
|
|
|
|
|
|
return json.dumps(value) if value else None
|
|
|
|
|
|
# 如果不是列表,保持原样(或者根据需求处理)
|
|
|
|
|
|
return value
|
|
|
|
|
|
|
2025-06-11 10:49:55 +08:00
|
|
|
|
def write_to_news(data):
|
|
|
|
|
|
news_score = data.get('news_score', 0.0)
|
|
|
|
|
|
if float(news_score) < 80: # 过滤掉news_score小于80的消息
|
|
|
|
|
|
return
|
2026-03-05 17:34:29 +08:00
|
|
|
|
|
2025-06-11 10:49:55 +08:00
|
|
|
|
# 获取返回数据里面的 新闻id
|
|
|
|
|
|
news_id = data.get('id', "")
|
|
|
|
|
|
adr = jx_adr.replace("news_id", news_id)
|
|
|
|
|
|
print(f"接口地址为{adr}")
|
|
|
|
|
|
response = requests.get(adr)
|
|
|
|
|
|
if response.status_code != 200:
|
|
|
|
|
|
print(f"新闻id:{news_id} 得分:{news_score}, 调用精选接口失败, 错误码:{response.status_code}")
|
|
|
|
|
|
return
|
|
|
|
|
|
print(f"新闻id:{news_id} 得分:{news_score}, 调用精选接口成功")
|
|
|
|
|
|
|
2026-03-05 17:34:29 +08:00
|
|
|
|
# 某些自建字段由python脚本自行更新 by 朱思南
|
|
|
|
|
|
conn = pymysql.connect(
|
|
|
|
|
|
host=MYSQL_HOST_APP,
|
|
|
|
|
|
port=MYSQL_PORT_APP,
|
|
|
|
|
|
user=MYSQL_USER_APP,
|
|
|
|
|
|
password=MYSQL_PASSWORD_APP,
|
|
|
|
|
|
db=MYSQL_DB_APP,
|
|
|
|
|
|
charset='utf8mb4'
|
|
|
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
|
|
|
with conn.cursor() as cursor:
|
|
|
|
|
|
sql = '''UPDATE news
|
|
|
|
|
|
set overseas_event = %s,
|
|
|
|
|
|
overseas_macro = %s,
|
|
|
|
|
|
china_macro = %s,
|
|
|
|
|
|
industry_news = %s,
|
|
|
|
|
|
company_news = %s,
|
|
|
|
|
|
reprint_source = %s,
|
|
|
|
|
|
company_name = %s,
|
|
|
|
|
|
etf_labels = %s,
|
|
|
|
|
|
etf_names = %s,
|
|
|
|
|
|
tdx_industry = %s,
|
|
|
|
|
|
tdx_industry_confidence = %s,
|
|
|
|
|
|
stock_codes = %s,
|
|
|
|
|
|
stock_names = %s,
|
|
|
|
|
|
political_sensitivity = %s,
|
|
|
|
|
|
export_domestic = %s,
|
|
|
|
|
|
political_notes = %s,
|
|
|
|
|
|
additional_notes = %s,
|
|
|
|
|
|
llm_abstract = %s
|
|
|
|
|
|
WHERE newsinfo_id = %s '''
|
|
|
|
|
|
values = (data.get("overseas_event", None),
|
|
|
|
|
|
data.get("overseas_macro", None),
|
|
|
|
|
|
data.get("china_macro", None),
|
|
|
|
|
|
data.get("industry_news", None),
|
|
|
|
|
|
data.get("company_news", None),
|
|
|
|
|
|
data.get("reprint_source", None),
|
|
|
|
|
|
data.get("company_name", None),
|
|
|
|
|
|
prepare_db_value(data.get("etf_labels", [])),
|
|
|
|
|
|
prepare_db_value(data.get("etf_names", [])),
|
|
|
|
|
|
prepare_db_value(data.get("tdx_industry", [])),
|
|
|
|
|
|
prepare_db_value(data.get("tdx_industry_confidence", [])),
|
|
|
|
|
|
prepare_db_value(data.get("stock_codes", [])),
|
|
|
|
|
|
prepare_db_value(data.get("stock_names", [])),
|
|
|
|
|
|
data.get("political_sensitivity", None),
|
|
|
|
|
|
data.get("export_domestic", None),
|
|
|
|
|
|
data.get("political_notes", None),
|
|
|
|
|
|
data.get("additional_notes", None),
|
|
|
|
|
|
data.get("llm_abstract", None),
|
|
|
|
|
|
news_id)
|
|
|
|
|
|
cursor.execute(sql, values)
|
|
|
|
|
|
if cursor.rowcount == 0:
|
|
|
|
|
|
print(f'warning: newsinfo_id={news_id} 不存在,更新 0 行')
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
print(f"新闻id:{news_id} 海外事件置信度得分:{data.get('overseas_event', None)}, 关联ETF名称列表:{data.get('etf_names', None)}, 审核政治敏感度:{data.get('political_sensitivity', None)}. 自建字段写入精选news表成功!")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"自建字段写入精选news表失败: {str(e)}")
|
|
|
|
|
|
finally:
|
|
|
|
|
|
if conn:
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
2025-06-11 10:49:55 +08:00
|
|
|
|
|
|
|
|
|
|
def write_to_es(data):
|
|
|
|
|
|
"""写入ES"""
|
|
|
|
|
|
# 初始化ES连接(添加在文件顶部)
|
|
|
|
|
|
es = Elasticsearch(
|
|
|
|
|
|
[f"http://{ES_HOST}:{ES_PORT}"], # 将协议直接包含在hosts中
|
|
|
|
|
|
basic_auth=(ES_USER, ES_PASSWORD)
|
|
|
|
|
|
)
|
|
|
|
|
|
news_id = data.get('id', "")
|
|
|
|
|
|
es.update(
|
|
|
|
|
|
index="news_info",
|
|
|
|
|
|
id=news_id,
|
|
|
|
|
|
doc={
|
|
|
|
|
|
"news_tags": {
|
|
|
|
|
|
"id": news_id,
|
|
|
|
|
|
"abstract": data.get('abstract', ""),
|
|
|
|
|
|
"title": data.get('title', ""),
|
|
|
|
|
|
"rewrite_content": data.get('rewrite_content', ""),
|
2026-03-05 17:34:29 +08:00
|
|
|
|
"industry_label": data.get('industry_label', []),
|
2025-06-11 10:49:55 +08:00
|
|
|
|
"industry_confidence": data.get('industry_confidence', []),
|
2026-03-05 17:34:29 +08:00
|
|
|
|
"industry_score": data.get('industry_score', []),
|
2025-06-11 10:49:55 +08:00
|
|
|
|
"concept_label": data.get('concept_label', []),
|
|
|
|
|
|
"concept_confidence": data.get('concept_confidence', []),
|
|
|
|
|
|
"concept_score": data.get('concept_score', []),
|
|
|
|
|
|
"public_opinion_score": data.get('public_opinion_score', 10),
|
|
|
|
|
|
"China_factor": data.get('China_factor', 0.1),
|
|
|
|
|
|
"source": data.get('source', "其他"),
|
|
|
|
|
|
"source_impact": data.get('source_impact', 5),
|
|
|
|
|
|
"news_score": data.get('news_score', 0.0),
|
|
|
|
|
|
"news_id": news_id,
|
|
|
|
|
|
"deleted": '0',
|
|
|
|
|
|
"create_time": datetime.datetime.now(),
|
|
|
|
|
|
"update_time": datetime.datetime.now()
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
print(f"news_id:{news_id} 得分:{data.get('news_score', 0.0)}, 写入ES成功")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def write_to_mysql(data):
|
|
|
|
|
|
conn = pymysql.connect(
|
|
|
|
|
|
host=MYSQL_HOST_APP,
|
|
|
|
|
|
port=MYSQL_PORT_APP,
|
|
|
|
|
|
user=MYSQL_USER_APP,
|
|
|
|
|
|
password=MYSQL_PASSWORD_APP,
|
|
|
|
|
|
db=MYSQL_DB_APP,
|
|
|
|
|
|
charset='utf8mb4'
|
|
|
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
|
|
|
with conn.cursor() as cursor:
|
|
|
|
|
|
# 新增JSON结构解析逻辑
|
|
|
|
|
|
# 修改后的SQL语句
|
|
|
|
|
|
sql = """INSERT INTO news_tags
|
2026-03-05 17:34:29 +08:00
|
|
|
|
(abstract, title, rewrite_content, industry_label, industry_confidence, industry_score, concept_label, concept_confidence, concept_score, public_opinion_score, China_factor, source, source_impact, news_score, news_id, news_score_exp, cluster_score, center_news_id)
|
|
|
|
|
|
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) """
|
2025-06-11 10:49:55 +08:00
|
|
|
|
|
|
|
|
|
|
values = (data.get('abstract', ""),
|
|
|
|
|
|
data.get('title', ""),
|
|
|
|
|
|
data.get('rewrite_content', ""),
|
2026-03-05 17:34:29 +08:00
|
|
|
|
prepare_db_value(data.get('industry_label', [])),
|
|
|
|
|
|
prepare_db_value(data.get('industry_confidence', [])),
|
|
|
|
|
|
prepare_db_value(data.get('industry_score', [])),
|
|
|
|
|
|
prepare_db_value(data.get('concept_label', [])),
|
|
|
|
|
|
prepare_db_value(data.get('concept_confidence', [])),
|
|
|
|
|
|
prepare_db_value(data.get('concept_score', [])),
|
2025-06-11 10:49:55 +08:00
|
|
|
|
data.get('public_opinion_score', 10),
|
|
|
|
|
|
data.get('China_factor', 0.1),
|
|
|
|
|
|
data.get('source', "其他"),
|
|
|
|
|
|
data.get('source_impact', 5),
|
|
|
|
|
|
data.get('news_score', 0.0),
|
2026-03-05 17:34:29 +08:00
|
|
|
|
data.get('id', ""),
|
|
|
|
|
|
data.get('news_score_exp', 5.0),
|
|
|
|
|
|
data.get('cluster_score', 5.0),
|
|
|
|
|
|
data.get('center_news_id', None)
|
2025-06-11 10:49:55 +08:00
|
|
|
|
)
|
|
|
|
|
|
cursor.execute(sql, values)
|
|
|
|
|
|
conn.commit()
|
2026-03-05 17:34:29 +08:00
|
|
|
|
id = data.get('id', "")
|
|
|
|
|
|
industry_label = data.get('industry_label', [])
|
|
|
|
|
|
concept_label = data.get('concept_label', [])
|
|
|
|
|
|
overseas_event = data.get('overseas_event', None)
|
|
|
|
|
|
print(f"{id} {industry_label} {concept_label} {data.get('news_score', 0.0)} {data.get('news_score_exp', 5.0)} 写入news_tags 表成功")
|
2025-06-11 10:49:55 +08:00
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"写入news_tags失败: {str(e)}")
|
|
|
|
|
|
finally:
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def create_connection():
|
|
|
|
|
|
"""创建并返回RabbitMQ连接"""
|
|
|
|
|
|
credentials = pika.PlainCredentials(mq_user, mq_password)
|
|
|
|
|
|
return pika.BlockingConnection(
|
|
|
|
|
|
pika.ConnectionParameters(
|
|
|
|
|
|
host="localhost",
|
|
|
|
|
|
credentials=credentials,
|
|
|
|
|
|
heartbeat=600,
|
|
|
|
|
|
connection_attempts=3,
|
|
|
|
|
|
retry_delay=5 # 重试延迟5秒
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def start_consumer():
|
|
|
|
|
|
"""启动MQ消费者"""
|
|
|
|
|
|
while True: # 使用循环而不是递归,避免递归深度问题
|
|
|
|
|
|
try:
|
|
|
|
|
|
connection = create_connection()
|
|
|
|
|
|
channel = connection.channel()
|
|
|
|
|
|
|
|
|
|
|
|
# 设置QoS,限制每次只取一条消息
|
|
|
|
|
|
channel.basic_qos(prefetch_count=1)
|
|
|
|
|
|
|
|
|
|
|
|
channel.exchange_declare(
|
|
|
|
|
|
exchange="zzck_llm_exchange",
|
|
|
|
|
|
exchange_type="fanout"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 声明持久化队列
|
|
|
|
|
|
res = channel.queue_declare(
|
|
|
|
|
|
queue="from_ai_to_mysql"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
mq_queue = res.method.queue
|
|
|
|
|
|
channel.queue_bind(
|
|
|
|
|
|
exchange="zzck_llm_exchange",
|
|
|
|
|
|
queue=mq_queue,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 启动消费,关闭自动ACK
|
|
|
|
|
|
channel.basic_consume(
|
|
|
|
|
|
queue=mq_queue,
|
|
|
|
|
|
on_message_callback=message_callback,
|
|
|
|
|
|
auto_ack=False # 关闭自动确认
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
print("消费者已启动,等待消息...")
|
|
|
|
|
|
channel.start_consuming()
|
|
|
|
|
|
|
|
|
|
|
|
except pika.exceptions.ConnectionClosedByBroker:
|
|
|
|
|
|
# 代理主动关闭连接,可能是临时错误
|
|
|
|
|
|
print("连接被代理关闭,将在5秒后重试...")
|
|
|
|
|
|
time.sleep(5)
|
|
|
|
|
|
except pika.exceptions.AMQPConnectionError:
|
|
|
|
|
|
# 连接错误
|
|
|
|
|
|
print("连接失败,将在10秒后重试...")
|
|
|
|
|
|
time.sleep(10)
|
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
|
|
print("消费者被用户中断")
|
|
|
|
|
|
try:
|
|
|
|
|
|
if connection and connection.is_open:
|
|
|
|
|
|
connection.close()
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
break
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"消费者异常: {str(e)}")
|
|
|
|
|
|
print("将在15秒后重试...")
|
|
|
|
|
|
time.sleep(15)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
try:
|
|
|
|
|
|
if connection and connection.is_open:
|
|
|
|
|
|
connection.close()
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# def start_consumer():
|
|
|
|
|
|
# """启动MQ消费者"""
|
|
|
|
|
|
# try:
|
|
|
|
|
|
# credentials = pika.PlainCredentials(mq_user, mq_password)
|
|
|
|
|
|
# connection = pika.BlockingConnection(
|
|
|
|
|
|
# pika.ConnectionParameters(
|
|
|
|
|
|
# host="localhost",
|
|
|
|
|
|
# credentials=credentials,
|
|
|
|
|
|
# heartbeat=600
|
|
|
|
|
|
# )
|
|
|
|
|
|
# )
|
|
|
|
|
|
# channel = connection.channel()
|
|
|
|
|
|
# channel.exchange_declare(
|
|
|
|
|
|
# exchange="zzck_exchange",
|
|
|
|
|
|
# exchange_type="fanout",
|
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
# # 声明队列(匹配现有队列类型) queue 的名字可以自定义
|
|
|
|
|
|
# res = channel.queue_declare(
|
|
|
|
|
|
# queue="from_ai_to_mysql"
|
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
# mq_queue = res.method.queue
|
|
|
|
|
|
# channel.queue_bind(
|
|
|
|
|
|
# exchange="zzck_llm_exchange",
|
|
|
|
|
|
# queue=mq_queue,
|
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
# # 启动消费
|
|
|
|
|
|
# channel.basic_consume(
|
|
|
|
|
|
# queue=mq_queue,
|
|
|
|
|
|
# on_message_callback=message_callback,
|
|
|
|
|
|
|
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
# print("消费者已启动,等待消息...")
|
|
|
|
|
|
# channel.start_consuming()
|
|
|
|
|
|
|
|
|
|
|
|
# except Exception as e:
|
|
|
|
|
|
# print(f"消费者启动失败: {str(e)}")
|
|
|
|
|
|
# start_consumer()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
start_consumer()
|