Update README.md
This commit is contained in:
parent
afda370583
commit
06f233fe06
@ -390,6 +390,7 @@ model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
|||||||
|
|
||||||
# Enable static cache and compile the forward pass
|
# Enable static cache and compile the forward pass
|
||||||
model.generation_config.cache_implementation = "static"
|
model.generation_config.cache_implementation = "static"
|
||||||
|
model.generation_config.max_new_tokens = 256
|
||||||
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
||||||
|
|
||||||
processor = AutoProcessor.from_pretrained(model_id)
|
processor = AutoProcessor.from_pretrained(model_id)
|
||||||
@ -409,7 +410,7 @@ sample = dataset[0]["audio"]
|
|||||||
# 2 warmup steps
|
# 2 warmup steps
|
||||||
for _ in tqdm(range(2), desc="Warm-up step"):
|
for _ in tqdm(range(2), desc="Warm-up step"):
|
||||||
with sdpa_kernel(SDPBackend.MATH):
|
with sdpa_kernel(SDPBackend.MATH):
|
||||||
result = pipe(sample.copy())
|
result = pipe(sample.copy(), generate_kwargs={"min_new_tokens": 256, "max_new_tokens": 256})
|
||||||
|
|
||||||
# fast run
|
# fast run
|
||||||
with sdpa_kernel(SDPBackend.MATH):
|
with sdpa_kernel(SDPBackend.MATH):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user