diff --git a/README.md b/README.md index 93abeca..f98e6af 100644 --- a/README.md +++ b/README.md @@ -390,6 +390,7 @@ model = AutoModelForSpeechSeq2Seq.from_pretrained( # Enable static cache and compile the forward pass model.generation_config.cache_implementation = "static" +model.generation_config.max_new_tokens = 256 model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True) processor = AutoProcessor.from_pretrained(model_id) @@ -409,7 +410,7 @@ sample = dataset[0]["audio"] # 2 warmup steps for _ in tqdm(range(2), desc="Warm-up step"): with sdpa_kernel(SDPBackend.MATH): - result = pipe(sample.copy()) + result = pipe(sample.copy(), generate_kwargs={"min_new_tokens": 256, "max_new_tokens": 256}) # fast run with sdpa_kernel(SDPBackend.MATH):