pdf_code/zzb_data_prod/Mil_unit.py

74 lines
2.6 KiB
Python

from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection,MilvusClient
from config import MILVUS_CLIENT
import time
from datetime import datetime, timedelta
from log_config import logger
def create_partition_by_hour(file_id):
# 连接到 Milvus 服务器
connections.connect(uri=MILVUS_CLIENT)
# 获取集合
collection_name = "pdf_measure_v4"
collection = Collection(collection_name)
# 创建当前id的分区
partition_name = f"partition_{file_id}"
if not collection.has_partition(partition_name):
collection.create_partition(partition_name)
logger.info(f"Created partition: {partition_name}")
partition = collection.partition(partition_name)
partition.load()
# 获取所有分区
partitions = collection.partitions
# 删除所有分区(除了默认分区和当前分区)
for partition in partitions:
name = partition.name
if name not in ["_default", partition_name]: # 保留默认分区
pre_partition = collection.partition(name)
pre_partition.release()
collection.drop_partition(name)
logger.info(f"Partition '{name}' deleted.")
connections.disconnect("default")
# from pymilvus import connections, CollectionSchema, Collection,utility,FieldSchema,DataType
# # 连接到 B 服务器上的 Milvus
# # connections.connect(host='124.70.129.232', port='19530')# 测试服务器
# connections.connect(host='1.94.60.103', port='19530')# 测试服务器
# # # 获取集合列表
# utility.drop_collection("pdf_measure_v4")
#
# # 定义字段
# fields = [
# FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
# FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=1536),
# FieldSchema(name="table_num", dtype=DataType.INT16),
# FieldSchema(name="table_index", dtype=DataType.INT16),
# FieldSchema(name="measure_name", dtype=DataType.VARCHAR, max_length=200),
# FieldSchema(name="measure_value", dtype=DataType.VARCHAR, max_length=200),
# FieldSchema(name="file_id", dtype=DataType.VARCHAR, max_length=200),
# FieldSchema(name="measure_unit", dtype=DataType.VARCHAR, max_length=200)
# ]
#
# # 定义集合的 schema
# schema = CollectionSchema(fields=fields, description="My Milvus collection")
#
# # 创建集合
# collection = Collection(name="pdf_measure_v4", schema=schema)
#
# collection = Collection("pdf_measure_v4")
# index_params = {
# "index_type": "IVF_FLAT",
# "metric_type": "COSINE",
# "params": {"nlist": 128}
# }
# collection.create_index(field_name="vector", index_params=index_params)
# collection.load()