-
-
Save sugatoray/6d8fe44bdc5f35e2bc83bcf64b54ff50 to your computer and use it in GitHub Desktop.
Revisions
-
tomaarsen created this gist
Oct 15, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,27 @@ # requires sentence_transformers>=3.2.0 from sentence_transformers import SentenceTransformer, export_optimized_onnx_model, export_dynamic_quantized_onnx_model # The model to export to ONNX (+ optimize, quantize), OpenVINO model_id = "mixedbread-ai/mxbai-embed-large-v1" # Where to save the exported models locally output_dir = model_id.replace("/", "-") onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True}) onnx_model.save_pretrained(output_dir) for optimization_config in ["O1", "O2", "O3", "O4"]: export_optimized_onnx_model( onnx_model, optimization_config=optimization_config, model_name_or_path=output_dir, ) for quantization_config in ['arm64', 'avx2', 'avx512', 'avx512_vnni']: export_dynamic_quantized_onnx_model( onnx_model, quantization_config=quantization_config, model_name_or_path=output_dir, ) openvino_model = SentenceTransformer(model_id, backend="openvino") openvino_model.save_pretrained(output_dir) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,34 @@ # requires sentence_transformers>=3.2.0 from sentence_transformers import SentenceTransformer, export_optimized_onnx_model, export_dynamic_quantized_onnx_model # The model to export to ONNX (+ optimize, quantize), OpenVINO model_id = "mixedbread-ai/mxbai-embed-large-v1" # The repository to push the ONNX, OpenVINO models to output_model_id = "tomaarsen/mxbai-embed-large-v1-exported" # Do we push directly, or create a PR? A PR is useful for reviewing the changes # before merging or if you don't have write access. create_pr = False onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True}) onnx_model.push_to_hub(output_model_id, exist_ok=True, create_pr=create_pr) for optimization_config in ["O1", "O2", "O3", "O4"]: export_optimized_onnx_model( onnx_model, optimization_config=optimization_config, model_name_or_path=output_model_id, push_to_hub=True, create_pr=create_pr, ) for quantization_config in ['arm64', 'avx2', 'avx512', 'avx512_vnni']: export_dynamic_quantized_onnx_model( onnx_model, quantization_config=quantization_config, model_name_or_path=output_model_id, push_to_hub=True, create_pr=create_pr, ) openvino_model = SentenceTransformer(model_id, backend="openvino") openvino_model.push_to_hub(output_model_id, exist_ok=True, create_pr=create_pr)