100 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
	
		
		
			
		
	
	
			100 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
	
|  | # Requires transformers>=4.51.0 | ||
|  | 
 | ||
|  | import torch | ||
|  | import torch.nn.functional as F | ||
|  | 
 | ||
|  | from torch import Tensor | ||
|  | from modelscope import AutoTokenizer, AutoModel | ||
|  | import datetime | ||
|  | import dashscope | ||
|  | from http import HTTPStatus | ||
|  | 
 | ||
|  | 
 | ||
|  | dashscope.api_key = 'sk-f8413fcfa63a40e49dbb8ac1d0239f6d' | ||
|  | 
 | ||
|  | def embed_with_str(input): | ||
|  |     retry = 0 | ||
|  |     max_retry = 5 | ||
|  |     t = 0.2 | ||
|  |     while retry < max_retry: | ||
|  |         # time.sleep(t) | ||
|  |         #阿里接口限流  | ||
|  |         resp = dashscope.TextEmbedding.call( | ||
|  |             model=dashscope.TextEmbedding.Models.text_embedding_v2, | ||
|  |             input=input) | ||
|  |         if resp.status_code == HTTPStatus.OK: | ||
|  |             return resp | ||
|  |         elif resp.status_code == 429: | ||
|  |             logger.info(f'触发限流,等待{t}秒后重试') | ||
|  |             retry += 1 | ||
|  |             t+=0.1 | ||
|  |         else: | ||
|  |             logger.error(f'请求失败,状态码:{resp.status_code}') | ||
|  |             return None | ||
|  |     logger.error('重试超过上限') | ||
|  |     return None | ||
|  | 
 | ||
|  | def last_token_pool(last_hidden_states: Tensor, | ||
|  |                  attention_mask: Tensor) -> Tensor: | ||
|  |     left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0]) | ||
|  |     if left_padding: | ||
|  |         return last_hidden_states[:, -1] | ||
|  |     else: | ||
|  |         sequence_lengths = attention_mask.sum(dim=1) - 1 | ||
|  |         batch_size = last_hidden_states.shape[0] | ||
|  |         return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths] | ||
|  | 
 | ||
|  | 
 | ||
|  | def get_detailed_instruct(task_description: str, query: str) -> str: | ||
|  |     return f'Instruct: {task_description}\nQuery:{query}' | ||
|  | 
 | ||
|  | # Each query must come with a one-sentence instruction that describes the task | ||
|  | task = 'Given a web search query, retrieve relevant passages that answer the query' | ||
|  | 
 | ||
|  | queries = [ | ||
|  |     get_detailed_instruct(task, 'What is the capital of China?'), | ||
|  |     get_detailed_instruct(task, 'Explain gravity') | ||
|  | ] | ||
|  | # No need to add instruction for retrieval documents | ||
|  | documents = [ | ||
|  |     "The capital of China is Beijing.", | ||
|  |     "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun." | ||
|  | ] | ||
|  | input_texts = queries + documents | ||
|  | 
 | ||
|  | tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen3-Embedding-0.6B', padding_side='left') | ||
|  | model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B') | ||
|  | 
 | ||
|  | # We recommend enabling flash_attention_2 for better acceleration and memory saving. | ||
|  | # model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B', attn_implementation="flash_attention_2", torch_dtype=torch.float16).cuda() | ||
|  | print(datetime.datetime.now()) | ||
|  | max_length = 8192 | ||
|  | 
 | ||
|  | # Tokenize the input texts | ||
|  | batch_dict = tokenizer( | ||
|  |     input_texts, | ||
|  |     padding=True, | ||
|  |     truncation=True, | ||
|  |     max_length=max_length, | ||
|  |     return_tensors="pt", | ||
|  | ) | ||
|  | batch_dict.to(model.device) | ||
|  | outputs = model(**batch_dict) | ||
|  | 
 | ||
|  | 
 | ||
|  | embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask']) | ||
|  | 
 | ||
|  | # normalize embeddings | ||
|  | embeddings = F.normalize(embeddings, p=2, dim=1) | ||
|  | print(f"=========embeddings=========") | ||
|  | print(datetime.datetime.now()) | ||
|  | 
 | ||
|  | scores = (embeddings[:2] @ embeddings[2:].T) | ||
|  | print(len(embeddings.tolist()[0])) | ||
|  | # [[0.7645568251609802, 0.14142508804798126], [0.13549736142158508, 0.5999549627304077]] | ||
|  | 
 | ||
|  | 
 | ||
|  | vector_obj = embed_with_str(input_texts)           | ||
|  | vector = vector_obj.output["embeddings"][0]["embedding"] | ||
|  | print(len(vector)) |