sitex · October 2, 2023 07:57 · sitex · Oct 2, 2023
diff --git a/convert_hf_whisper.py b/convert_hf_whisper.py
 # pip install git+https://github.com/huggingface/transformers --force-reinstall
 # pip install git+https://github.com/bayartsogt-ya/whisper-multiple-hf-datasets

 from multiple_datasets.hub_default_utils import convert_hf_whisper

 convert_hf_whisper("mitchelldehaven/whisper-large-v2-ru", "large-v2-ru.pt")


 # We also provide a script to convert any Whisper models compatible with the Transformers library. They could be the original OpenAI models or user fine-tuned models.
 # For example the command below converts the original "large-v2" Whisper model and saves the weights in FP16:

 !pip install transformers[torch]>=4.23

 !ct2-transformers-converter --model openai/whisper-large-v2 --output_dir whisper-large-v2-ct2 \
    --copy_files tokenizer.json --quantization float16

 # The option --model accepts a model name on the Hub or a path to a model directory.
 # If the option --copy_files tokenizer.json is not used, the tokenizer configuration is automatically downloaded when the model is loaded later.
	# pip install git+https://github.com/huggingface/transformers --force-reinstall
	# pip install git+https://github.com/bayartsogt-ya/whisper-multiple-hf-datasets

	from multiple_datasets.hub_default_utils import convert_hf_whisper

	convert_hf_whisper("mitchelldehaven/whisper-large-v2-ru", "large-v2-ru.pt")


	# We also provide a script to convert any Whisper models compatible with the Transformers library. They could be the original OpenAI models or user fine-tuned models.
	# For example the command below converts the original "large-v2" Whisper model and saves the weights in FP16:

	!pip install transformers[torch]>=4.23

	!ct2-transformers-converter --model openai/whisper-large-v2 --output_dir whisper-large-v2-ct2 \
	--copy_files tokenizer.json --quantization float16

	# The option --model accepts a model name on the Hub or a path to a model directory.
	# If the option --copy_files tokenizer.json is not used, the tokenizer configuration is automatically downloaded when the model is loaded later.