Spaces:
Sleeping
Sleeping
| from datasets import load_from_disk | |
| import numpy as np | |
| from faiss import IndexBinaryFlat, write_index_binary | |
| from sentence_transformers.quantization import quantize_embeddings | |
| import os | |
| path_to_vectorised_dataset = os.path.join(os.getcwd(),'vectorized_dataset') | |
| dataset = load_from_disk(path_to_vectorised_dataset) | |
| embeddings = np.array(dataset["embedding"], dtype=np.float32) | |
| ubinary_embeddings = quantize_embeddings(embeddings, "ubinary") | |
| index = IndexBinaryFlat(384) ## embedding dimension | |
| index.add(ubinary_embeddings) | |
| write_index_binary(index, "conala.index") |