100 lines
3.2 KiB
Python
100 lines
3.2 KiB
Python
# Requires transformers>=4.51.0
|
|
|
|
import torch
|
|
import torch.nn.functional as F
|
|
|
|
from torch import Tensor
|
|
from modelscope import AutoTokenizer, AutoModel
|
|
import datetime
|
|
import dashscope
|
|
from http import HTTPStatus
|
|
|
|
|
|
dashscope.api_key = 'sk-f8413fcfa63a40e49dbb8ac1d0239f6d'
|
|
|
|
def embed_with_str(input):
|
|
retry = 0
|
|
max_retry = 5
|
|
t = 0.2
|
|
while retry < max_retry:
|
|
# time.sleep(t)
|
|
#阿里接口限流
|
|
resp = dashscope.TextEmbedding.call(
|
|
model=dashscope.TextEmbedding.Models.text_embedding_v2,
|
|
input=input)
|
|
if resp.status_code == HTTPStatus.OK:
|
|
return resp
|
|
elif resp.status_code == 429:
|
|
logger.info(f'触发限流,等待{t}秒后重试')
|
|
retry += 1
|
|
t+=0.1
|
|
else:
|
|
logger.error(f'请求失败,状态码:{resp.status_code}')
|
|
return None
|
|
logger.error('重试超过上限')
|
|
return None
|
|
|
|
def last_token_pool(last_hidden_states: Tensor,
|
|
attention_mask: Tensor) -> Tensor:
|
|
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
|
|
if left_padding:
|
|
return last_hidden_states[:, -1]
|
|
else:
|
|
sequence_lengths = attention_mask.sum(dim=1) - 1
|
|
batch_size = last_hidden_states.shape[0]
|
|
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
|
|
|
|
|
|
def get_detailed_instruct(task_description: str, query: str) -> str:
|
|
return f'Instruct: {task_description}\nQuery:{query}'
|
|
|
|
# Each query must come with a one-sentence instruction that describes the task
|
|
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
|
|
|
queries = [
|
|
get_detailed_instruct(task, 'What is the capital of China?'),
|
|
get_detailed_instruct(task, 'Explain gravity')
|
|
]
|
|
# No need to add instruction for retrieval documents
|
|
documents = [
|
|
"The capital of China is Beijing.",
|
|
"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun."
|
|
]
|
|
input_texts = queries + documents
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen3-Embedding-0.6B', padding_side='left')
|
|
model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B')
|
|
|
|
# We recommend enabling flash_attention_2 for better acceleration and memory saving.
|
|
# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B', attn_implementation="flash_attention_2", torch_dtype=torch.float16).cuda()
|
|
print(datetime.datetime.now())
|
|
max_length = 8192
|
|
|
|
# Tokenize the input texts
|
|
batch_dict = tokenizer(
|
|
input_texts,
|
|
padding=True,
|
|
truncation=True,
|
|
max_length=max_length,
|
|
return_tensors="pt",
|
|
)
|
|
batch_dict.to(model.device)
|
|
outputs = model(**batch_dict)
|
|
|
|
|
|
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
|
|
|
# normalize embeddings
|
|
embeddings = F.normalize(embeddings, p=2, dim=1)
|
|
print(f"=========embeddings=========")
|
|
print(datetime.datetime.now())
|
|
|
|
scores = (embeddings[:2] @ embeddings[2:].T)
|
|
print(len(embeddings.tolist()[0]))
|
|
# [[0.7645568251609802, 0.14142508804798126], [0.13549736142158508, 0.5999549627304077]]
|
|
|
|
|
|
vector_obj = embed_with_str(input_texts)
|
|
vector = vector_obj.output["embeddings"][0]["embedding"]
|
|
print(len(vector))
|