Skip to content

Instantly share code, notes, and snippets.

@sugatoray
Forked from tomaarsen/export_locally.py
Created October 15, 2024 14:32
Show Gist options
  • Save sugatoray/6d8fe44bdc5f35e2bc83bcf64b54ff50 to your computer and use it in GitHub Desktop.
Save sugatoray/6d8fe44bdc5f35e2bc83bcf64b54ff50 to your computer and use it in GitHub Desktop.

Revisions

  1. @tomaarsen tomaarsen created this gist Oct 15, 2024.
    27 changes: 27 additions & 0 deletions export_locally.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,27 @@
    # requires sentence_transformers>=3.2.0
    from sentence_transformers import SentenceTransformer, export_optimized_onnx_model, export_dynamic_quantized_onnx_model

    # The model to export to ONNX (+ optimize, quantize), OpenVINO
    model_id = "mixedbread-ai/mxbai-embed-large-v1"
    # Where to save the exported models locally
    output_dir = model_id.replace("/", "-")

    onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True})
    onnx_model.save_pretrained(output_dir)

    for optimization_config in ["O1", "O2", "O3", "O4"]:
    export_optimized_onnx_model(
    onnx_model,
    optimization_config=optimization_config,
    model_name_or_path=output_dir,
    )

    for quantization_config in ['arm64', 'avx2', 'avx512', 'avx512_vnni']:
    export_dynamic_quantized_onnx_model(
    onnx_model,
    quantization_config=quantization_config,
    model_name_or_path=output_dir,
    )

    openvino_model = SentenceTransformer(model_id, backend="openvino")
    openvino_model.save_pretrained(output_dir)
    34 changes: 34 additions & 0 deletions export_to_hub.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,34 @@
    # requires sentence_transformers>=3.2.0
    from sentence_transformers import SentenceTransformer, export_optimized_onnx_model, export_dynamic_quantized_onnx_model

    # The model to export to ONNX (+ optimize, quantize), OpenVINO
    model_id = "mixedbread-ai/mxbai-embed-large-v1"
    # The repository to push the ONNX, OpenVINO models to
    output_model_id = "tomaarsen/mxbai-embed-large-v1-exported"
    # Do we push directly, or create a PR? A PR is useful for reviewing the changes
    # before merging or if you don't have write access.
    create_pr = False

    onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True})
    onnx_model.push_to_hub(output_model_id, exist_ok=True, create_pr=create_pr)

    for optimization_config in ["O1", "O2", "O3", "O4"]:
    export_optimized_onnx_model(
    onnx_model,
    optimization_config=optimization_config,
    model_name_or_path=output_model_id,
    push_to_hub=True,
    create_pr=create_pr,
    )

    for quantization_config in ['arm64', 'avx2', 'avx512', 'avx512_vnni']:
    export_dynamic_quantized_onnx_model(
    onnx_model,
    quantization_config=quantization_config,
    model_name_or_path=output_model_id,
    push_to_hub=True,
    create_pr=create_pr,
    )

    openvino_model = SentenceTransformer(model_id, backend="openvino")
    openvino_model.push_to_hub(output_model_id, exist_ok=True, create_pr=create_pr)