pdf_code/milvus_init.py

33 lines
1.4 KiB
Python
Raw Normal View History

2025-09-09 17:39:44 +08:00
from pymilvus import connections, CollectionSchema, Collection,utility,FieldSchema,DataType
# 连接到 B 服务器上的 Milvus
# connections.connect(host='124.70.129.232', port='19530')# 测试服务器
connections.connect(host='127.0.0.1', port='19530')# 测试服务器
# # 获取集合列表
utility.drop_collection("pdf_measure_v4")
# 定义字段
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=1536),
FieldSchema(name="table_num", dtype=DataType.INT16),
FieldSchema(name="table_index", dtype=DataType.INT16),
FieldSchema(name="measure_name", dtype=DataType.VARCHAR, max_length=200),
FieldSchema(name="measure_value", dtype=DataType.VARCHAR, max_length=200),
FieldSchema(name="file_id", dtype=DataType.VARCHAR, max_length=200),
FieldSchema(name="measure_unit", dtype=DataType.VARCHAR, max_length=200)
]
# 定义集合的 schema
schema = CollectionSchema(fields=fields, description="My Milvus collection")
# 创建集合
collection = Collection(name="pdf_measure_v4", schema=schema)
collection = Collection("pdf_measure_v4")
index_params = {
"index_type": "IVF_FLAT",
"metric_type": "COSINE",
"params": {"nlist": 128}
}
collection.create_index(field_name="vector", index_params=index_params)
collection.load()