wan2-2-fp8da-aoti-preview-2c

Running on Zero

App Files Files Community

r3gm commited on May 15

Commit

7bd100b

verified ·

1 Parent(s): 3bc5207

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -11

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os
 import spaces
 import shutil
 import subprocess
@@ -11,7 +11,6 @@ import time
 import gc
 import uuid
 from tqdm import tqdm
 import cv2
 import numpy as np
 import torch
@@ -331,8 +330,14 @@ torch._dynamo.reset()
 quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
 torch._dynamo.reset()
-aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
-aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
 # pipe.vae.enable_slicing()
 # pipe.vae.enable_tiling()
@@ -419,14 +424,14 @@ def get_inference_duration(
     progress
 ):
     BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
-    BASE_STEP_DURATION = 15
     width, height = resized_image.size
     factor = num_frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
     step_duration = BASE_STEP_DURATION * factor ** 1.5
     gen_time = int(steps) * step_duration
     if guidance_scale > 1:
-        gen_time = gen_time * 1.9
     frame_factor = frame_multiplier // FIXED_FPS
     if frame_factor > 1:
@@ -436,12 +441,12 @@ def get_inference_duration(
     total_time = 15 + gen_time
     if safe_mode:
-        total_time = total_time * 1.20
     return total_time
-@spaces.GPU(duration=get_inference_duration)
 def run_inference(
     resized_image,
     processed_last_image,
@@ -633,7 +638,6 @@ CSS = """
 with gr.Blocks(delete_cache=(3600, 10800)) as demo:
     gr.Markdown(model_title())
-    gr.Markdown("#### ℹ️ **A Note on Performance:** This version prioritizes a straightforward setup over maximum speed, so performance may vary.")
     gr.Markdown("Run Wan 2.2 in just 4-8 steps, fp8 quantization & AoT compilation - compatible with 🧨 diffusers and ZeroGPU")
     with gr.Row():
@@ -649,8 +653,8 @@ with gr.Blocks(delete_cache=(3600, 10800)) as demo:
             )
             safe_mode_checkbox = gr.Checkbox(
                 label="🛠️ Safe Mode",
-                value=False,
-                info="Requests 20% extra processing time to try to prevent unfinished tasks when the server is busy."
             )
             with gr.Accordion("Advanced Settings", open=False):
                 last_image_component = gr.Image(type="pil", label="Last Image (Optional)", sources=["upload", "clipboard"])

+import os; os.system('pip install --upgrade --no-deps spaces')
 import spaces
 import shutil
 import subprocess
 import gc
 import uuid
 from tqdm import tqdm
 import cv2
 import numpy as np
 import torch
 quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
 torch._dynamo.reset()
+spaces.aoti_load(
+    module=pipe.transformer,
+    repo_id='cbensimon/WanTransformer3DModel-sm120-cu130-raa',
+)
+spaces.aoti_load(
+    module=pipe.transformer_2,
+    repo_id='cbensimon/WanTransformer3DModel-sm120-cu130-raa',
+)
 # pipe.vae.enable_slicing()
 # pipe.vae.enable_tiling()
     progress
 ):
     BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
+    BASE_STEP_DURATION = 5.
     width, height = resized_image.size
     factor = num_frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
     step_duration = BASE_STEP_DURATION * factor ** 1.5
     gen_time = int(steps) * step_duration
     if guidance_scale > 1:
+        gen_time = gen_time * 2.4
     frame_factor = frame_multiplier // FIXED_FPS
     if frame_factor > 1:
     total_time = 15 + gen_time
     if safe_mode:
+        total_time = total_time * 1.30
     return total_time
+@spaces.GPU(duration=get_inference_duration, size='xlarge')
 def run_inference(
     resized_image,
     processed_last_image,
 with gr.Blocks(delete_cache=(3600, 10800)) as demo:
     gr.Markdown(model_title())
     gr.Markdown("Run Wan 2.2 in just 4-8 steps, fp8 quantization & AoT compilation - compatible with 🧨 diffusers and ZeroGPU")
     with gr.Row():
             )
             safe_mode_checkbox = gr.Checkbox(
                 label="🛠️ Safe Mode",
+                value=True,
+                info="Requests 30% extra processing time to try to prevent unfinished tasks when the server is busy."
             )
             with gr.Accordion("Advanced Settings", open=False):
                 last_image_component = gr.Image(type="pil", label="Last Image (Optional)", sources=["upload", "clipboard"])