| import onnxruntime as rt |
|
|
| from sentence_transformers.util import cos_sim |
| from sentence_transformers import SentenceTransformer |
|
|
| import transformers |
|
|
| import gc |
| import json |
|
|
|
|
| with open('conversion_config.json') as json_file: |
| conversion_config = json.load(json_file) |
|
|
|
|
| model_id = conversion_config["model_id"] |
| number_of_generated_embeddings = conversion_config["number_of_generated_embeddings"] |
| precision_to_filename_map = conversion_config["precision_to_filename_map"] |
| |
| sentences_1 = 'How is the weather today?' |
| sentences_2 = 'What is the current weather like today?' |
| |
| print(f"Testing on cosine similiarity between sentences: \n'{sentences_1}'\n'{sentences_2}'\n\n\n") |
| |
| tokenizer = transformers.AutoTokenizer.from_pretrained("./") |
| enc1 = tokenizer(sentences_1) |
| enc2 = tokenizer(sentences_2) |
| |
| for precision, file_name in precision_to_filename_map.items(): |
| |
| |
| onnx_session = rt.InferenceSession(file_name) |
| embeddings_1_onnx = onnx_session.run(None, {"input_ids": [enc1.input_ids], |
| "attention_mask": [enc1.attention_mask]})[1][0] |
| |
| embeddings_2_onnx = onnx_session.run(None, {"input_ids": [enc2.input_ids], |
| "attention_mask": [enc2.attention_mask]})[1][0] |
| |
| del onnx_session |
| gc.collect() |
| print(f'Cosine similiarity for ONNX model with precision "{precision}" is {str(cos_sim(embeddings_1_onnx, embeddings_2_onnx))}') |
| |
| |
| |
| |
| model = SentenceTransformer(model_id, trust_remote_code=True) |
| embeddings_1_sentence_transformer = model.encode(sentences_1, normalize_embeddings=True, trust_remote_code=True) |
| embeddings_2_sentence_transformer = model.encode(sentences_2, normalize_embeddings=True, trust_remote_code=True) |
| print('Cosine similiarity for original sentence transformer model is '+str(cos_sim(embeddings_1_sentence_transformer, embeddings_2_sentence_transformer))) |