Update README.md (#4)

- Update README.md (c329f74454be12bf7e475f5406b456f7a5855f96) Co-authored-by: Vaibhav Srivastav <reach-vb@users.noreply.huggingface.co>
2024-10-02 08:21:21 +00:00 · 2024-10-02 08:21:21 +00:00 · bb8db30333
commit bb8db30333
parent 45f16f295c
1 changed files with 7 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -119,8 +119,8 @@ Whisper is a state-of-the-art model for automatic speech recognition (ASR) and s
 et al. from OpenAI. Trained on >5M hours of labeled data, Whisper demonstrates a strong ability to generalise to many 
 datasets and domains in a zero-shot setting.

-Whisper large-v3-turbo is a distilled version of [Whisper large-v3](https://huggingface.co/openai/whisper-large-v3). In other words, it's the exact same model, except that the number of decoding layers have reduced from 32 to 4.
-As a result, the model is way faster, at the expense of a minor quality degradation.
+Whisper large-v3-turbo is a finetuned version of a pruned [Whisper large-v3](https://huggingface.co/openai/whisper-large-v3). In other words, it's the exact same model, except that the number of decoding layers have reduced from 32 to 4.
+As a result, the model is way faster, at the expense of a minor quality degradation. You can find more details about it [in this GitHub discussion](https://github.com/openai/whisper/discussions/2363).

 **Disclaimer**: Content for this model card has partly been written by the 🤗 Hugging Face team, and partly copied and 
 pasted from the original model card.
@ -148,7 +148,7 @@ from datasets import load_dataset
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

-model_id = "ylacombe/whisper-large-v3-turbo"
+model_id = "openai/whisper-large-v3-turbo"

 model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
@ -252,7 +252,7 @@ from datasets import Audio, load_dataset
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

-model_id = "ylacombe/whisper-large-v3-turbo"
+model_id = "openai/whisper-large-v3-turbo"

 model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
@ -327,7 +327,7 @@ from datasets import load_dataset
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

-model_id = "ylacombe/whisper-large-v3-turbo"
+model_id = "openai/whisper-large-v3-turbo"

 model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
@ -373,7 +373,7 @@ torch.set_float32_matmul_precision("high")
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

-model_id = "ylacombe/whisper-large-v3-turbo"
+model_id = "openai/whisper-large-v3-turbo"

 model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
@ -472,7 +472,7 @@ checkpoints are summarised in the following table with links to the models on th
 | large    | 1550 M     | x                                                    | [✓](https://huggingface.co/openai/whisper-large)    |
 | large-v2 | 1550 M     | x                                                    | [✓](https://huggingface.co/openai/whisper-large-v2) |
 | large-v3 | 1550 M     | x                                                    | [✓](https://huggingface.co/openai/whisper-large-v3) |
-| large-v3-turbo | 809 M     | x                                                    | [✓](https://huggingface.co/ylacombe/whisper-large-v3-turbo) |
+| large-v3-turbo | 809 M     | x                                                    | [✓](https://huggingface.co/openai/whisper-large-v3-turbo) |


 ## Fine-Tuning