diff --git a/README.md b/README.md index 2c3adfc..b0e721f 100644 --- a/README.md +++ b/README.md @@ -50,16 +50,22 @@ Here are some more advanced examples: [HunyuanDiT](hunyuan_dit) +[Hunyuan Image 2.1](hunyuan_image) + [Chroma](chroma) [Lumina Image 2.0](lumina2) [HiDream](hidream) +[Qwen Image](qwen_image) + [Flux](flux) [Edit/InstructPix2Pix Models](edit_models) +[Omnigen2](omnigen) + [Stable Video Diffusion](video) [Mochi](mochi) @@ -70,7 +76,11 @@ Here are some more advanced examples: [Nvidia Cosmos](cosmos) -[Wan](wan) +[Nvidia Cosmos Predict2](cosmos_predict2) + +[Wan 2.1](wan) + +[Wan 2.2](wan22) [Audio Models](audio) diff --git a/audio/README.md b/audio/README.md index 6972124..97caf63 100644 --- a/audio/README.md +++ b/audio/README.md @@ -1,5 +1,15 @@ # Audio Examples +## ACE Step Model + +Download the [ace_step_v1_3.5b.safetensors](https://huggingface.co/Comfy-Org/ACE-Step_ComfyUI_repackaged/blob/main/all_in_one/ace_step_v1_3.5b.safetensors) and save it to your ComfyUI/models/checkpoints/ directory. + + +he following flac audio file contains a workflow, you can download it and load it or drag it on the ComfyUI interface + + + + ## Stable Audio Open 1.0 Download the [model.safetensors from this page](https://huggingface.co/google-t5/t5-base/blob/main/model.safetensors) and save it as `t5_base.safetensors` to your ComfyUI/models/text_encoders/ directory. diff --git a/audio/ace_step_example.flac b/audio/ace_step_example.flac new file mode 100644 index 0000000..57b2293 Binary files /dev/null and b/audio/ace_step_example.flac differ diff --git a/chroma/README.md b/chroma/README.md index 9c83fa7..49568f9 100644 --- a/chroma/README.md +++ b/chroma/README.md @@ -4,7 +4,7 @@ This is a model that is modified from [flux](../flux/) and has had some changes To use it you will need one of the t5xxl text encoder model files that you can find in: [this repo](https://huggingface.co/comfyanonymous/flux_text_encoders/tree/main), fp16 is recommended, if you don't have that much memory fp8_scaled are recommended. Put it in the ComfyUI/models/text_encoders/ folder. -You can then download the latest chroma checkpoint from the [official huggingface page](https://huggingface.co/lodestones/Chroma), I recommend grabbing the most recent file. It goes in the ComfyUI/models/diffusion_models/ folder. +You can then download the latest chroma checkpoint from the [official huggingface page](https://huggingface.co/lodestones/Chroma1-HD), It goes in the ComfyUI/models/diffusion_models/ folder. Load or drag this image on ComfyUI to get the example workflow: diff --git a/chroma/chroma_example.png b/chroma/chroma_example.png index a1f4245..77ff321 100644 Binary files a/chroma/chroma_example.png and b/chroma/chroma_example.png differ diff --git a/chroma/fennec_girl_flowers.png b/chroma/fennec_girl_flowers.png new file mode 100644 index 0000000..5f95987 Binary files /dev/null and b/chroma/fennec_girl_flowers.png differ diff --git a/chroma/fennec_girl_hug.png b/chroma/fennec_girl_hug.png new file mode 100644 index 0000000..76c091c Binary files /dev/null and b/chroma/fennec_girl_hug.png differ diff --git a/chroma/fennec_girl_sing.png b/chroma/fennec_girl_sing.png new file mode 100644 index 0000000..a308c4a Binary files /dev/null and b/chroma/fennec_girl_sing.png differ diff --git a/cosmos/README.md b/cosmos/README.md index 5730bfd..11e343b 100644 --- a/cosmos/README.md +++ b/cosmos/README.md @@ -1,4 +1,6 @@ -# Nvidia Cosmos Models +# Original Nvidia Cosmos Models + +For the newer Cosmos models see [Cosmos Predict2](../cosmos_predict2) [Nvidia Cosmos](https://www.nvidia.com/en-us/ai/cosmos/) is a family of "World Models". ComfyUI currently supports specifically the 7B and 14B text to video diffusion models and the 7B and 14B image to video diffusion models. diff --git a/cosmos_predict2/README.md b/cosmos_predict2/README.md new file mode 100644 index 0000000..bf77457 --- /dev/null +++ b/cosmos_predict2/README.md @@ -0,0 +1,46 @@ +# Nvidia Cosmos Predict2 + +These are a family of text to image and image to video models from Nvidia. + +## Files to Download + +You will first need: + +#### Text encoder and VAE: + +[oldt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/comfyanonymous/cosmos_1.0_text_encoder_and_VAE_ComfyUI/tree/main/text_encoders) goes in: ComfyUI/models/text_encoders/ + +[wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) goes in: ComfyUI/models/vae/ + + +Note: oldt5_xxl is not the same as the t5xxl used in flux and other models. +oldt5_xxl is t5xxl 1.0 while the one used in flux and others is t5xxl 1.1 + + +You can find all the diffusion models (go in ComfyUI/models/diffusion_models/) here: [Repackaged safetensors files](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/tree/main) or [Official Nvidia Model Files](https://huggingface.co/collections/nvidia/cosmos-predict2-68028efc052239369a0f2959) + + +## Workflows + +### Text to Image + +This workflow uses the 2B text to image cosmos predict2 model. The file used in the workflow is [cosmos_predict2_2B_t2i.safetensors](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/blob/main/cosmos_predict2_2B_t2i.safetensors) this file goes in: ComfyUI/models/diffusion_models/ + +![Example](cosmos_predict2_2b_t2i_example.png) + +You can load this image in [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the full workflow. + +I think the 2B model is the most interesting one but you can find the bigger 14B model here: [cosmos_predict2_14B_t2i.safetensors](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/blob/main/cosmos_predict2_14B_t2i.safetensors) and use it in the workflow above. + + +### Image to Video + +These models are pretty picky about the resolution/length of the videos. This workflow is for the 480p models, for the 720p models you will have to set the resolution to 720p or your results might be bad. + +This workflow uses the 2B image to video cosmos predict2 model. The file used in the workflow is [cosmos_predict2_2B_video2world_480p_16fps.safetensors](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/blob/main/cosmos_predict2_2B_video2world_480p_16fps.safetensors) this file goes in: ComfyUI/models/diffusion_models/ + +![Example](cosmos_predict2_2b_i2v_example.webp) + +[Workflow in Json format](cosmos_predict2_2b_i2v_example.json) + + diff --git a/cosmos_predict2/cosmos_predict2_2b_i2v_example.json b/cosmos_predict2/cosmos_predict2_2b_i2v_example.json new file mode 100644 index 0000000..f7d8bdd --- /dev/null +++ b/cosmos_predict2/cosmos_predict2_2b_i2v_example.json @@ -0,0 +1,548 @@ +{ + "id": "242a6140-7341-49ca-876b-c01366b39b84", + "revision": 0, + "last_node_id": 31, + "last_link_id": 46, + "nodes": [ + { + "id": 10, + "type": "CLIPLoader", + "pos": [ + 0, + 250 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 34, + 35 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "oldt5_xxl_fp8_e4m3fn_scaled.safetensors", + "cosmos", + "default" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 870, + 180 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 33 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 4 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 42 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 7 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 788533681999051, + "randomize", + 30, + 4, + "euler", + "simple", + 1 + ] + }, + { + "id": 15, + "type": "VAELoader", + "pos": [ + 80, + 400 + ], + "size": [ + 300, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 17, + 43 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 28, + "type": "CosmosPredict2ImageToVideoLatent", + "pos": [ + 499.99761962890625, + 616.21435546875 + ], + "size": [ + 330.7769470214844, + 170 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 43 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 44 + }, + { + "name": "end_image", + "shape": 7, + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 42 + ] + } + ], + "properties": { + "Node name for S&R": "CosmosPredict2ImageToVideoLatent" + }, + "widgets_values": [ + 848, + 480, + 93, + 1 + ] + }, + { + "id": 29, + "type": "LoadImage", + "pos": [ + 85.3239517211914, + 633.9439697265625 + ], + "size": [ + 274.080078125, + 314 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 44 + ] + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "mountains.png", + "image" + ] + }, + { + "id": 13, + "type": "UNETLoader", + "pos": [ + 0, + 120 + ], + "size": [ + 410, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 33 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "cosmos_predict2_2B_video2world_480p_16fps.safetensors", + "default" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 390 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 34 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 6 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1200, + 180 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 7 + }, + { + "name": "vae", + "type": "VAE", + "link": 17 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 45, + 46 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 31, + "type": "SaveWEBM", + "pos": [ + 1890, + 190 + ], + "size": [ + 270, + 274.8302001953125 + ], + "flags": {}, + "order": 10, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 46 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 24 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 420, + 180 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 35 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 4 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "camera moving quickly through the scene timelapse wind" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 30, + "type": "SaveAnimatedWEBP", + "pos": [ + 1440, + 180 + ], + "size": [ + 270, + 366 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 45 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16.000000000000004, + false, + 85, + "default" + ] + } + ], + "links": [ + [ + 4, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 6, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 7, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 17, + 15, + 0, + 8, + 1, + "VAE" + ], + [ + 33, + 13, + 0, + 3, + 0, + "MODEL" + ], + [ + 34, + 10, + 0, + 7, + 0, + "CLIP" + ], + [ + 35, + 10, + 0, + 6, + 0, + "CLIP" + ], + [ + 42, + 28, + 0, + 3, + 3, + "LATENT" + ], + [ + 43, + 15, + 0, + 28, + 0, + "VAE" + ], + [ + 44, + 29, + 0, + 28, + 1, + "IMAGE" + ], + [ + 45, + 8, + 0, + 30, + 0, + "IMAGE" + ], + [ + 46, + 8, + 0, + 31, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.8390545288824265, + "offset": [ + 61.29088261146557, + 145.33443300197447 + ] + }, + "frontendVersion": "1.21.7" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/cosmos_predict2/cosmos_predict2_2b_i2v_example.webp b/cosmos_predict2/cosmos_predict2_2b_i2v_example.webp new file mode 100644 index 0000000..29d98f8 Binary files /dev/null and b/cosmos_predict2/cosmos_predict2_2b_i2v_example.webp differ diff --git a/cosmos_predict2/cosmos_predict2_2b_t2i_example.png b/cosmos_predict2/cosmos_predict2_2b_t2i_example.png new file mode 100644 index 0000000..50cd0a5 Binary files /dev/null and b/cosmos_predict2/cosmos_predict2_2b_t2i_example.png differ diff --git a/flux/README.md b/flux/README.md index 62c860a..621e5cd 100644 --- a/flux/README.md +++ b/flux/README.md @@ -8,9 +8,9 @@ For the easy to use single file versions that you can easily use in [ComfyUI](ht ### Files to download for the regular version -If you don't have t5xxl_fp16.safetensors or clip_l.safetensors already in your ComfyUI/models/text_encoders/ directory you can find them on: [this link.](https://huggingface.co/comfyanonymous/flux_text_encoders/tree/main) You can use t5xxl_fp8_e4m3fn.safetensors instead for lower memory usage but the fp16 one is recommended if you have more than 32GB ram. +If you don't have t5xxl_fp16.safetensors or clip_l.safetensors already in your ComfyUI/models/text_encoders/ directory you can find them on: [this link.](https://huggingface.co/comfyanonymous/flux_text_encoders/tree/main) You can use t5xxl_fp8_e4m3fn_scaled.safetensors instead for lower memory usage but the fp16 one is recommended if you have more than 32GB ram. -The VAE can be found [here](https://huggingface.co/black-forest-labs/FLUX.1-schnell/blob/main/ae.safetensors) and should go in your ComfyUI/models/vae/ folder. +The VAE can be found [here](https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/blob/main/split_files/vae/ae.safetensors) and should go in your ComfyUI/models/vae/ folder. ### Tips if you are running out of memory: @@ -30,7 +30,7 @@ You can then load or drag the following image in ComfyUI to get the workflow: Flux Schnell is a distilled 4 step model. -You can find the Flux Schnell diffusion model weights [here](https://huggingface.co/black-forest-labs/FLUX.1-schnell/blob/main/flux1-schnell.safetensors) this file should go in your: ComfyUI/models/unet/ folder. +You can find the Flux Schnell diffusion model weights [here](https://huggingface.co/black-forest-labs/FLUX.1-schnell) the flux1-schnell.safetensors file should go in your: ComfyUI/models/unet/ folder. You can then load or drag the following image in ComfyUI to get the workflow: @@ -64,6 +64,21 @@ You can then load or drag the following image in ComfyUI to get the workflow: The following examples might require that you have some of the regular flux files that you can find links to at the top of this page. +### Flux Kontext (image editing) model + +Download the [flux1-kontext-dev.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) model file and put it in your ComfyUI/models/diffusion_models/ folder. There is an alternative fp8 model here: [flux1-dev-kontext_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/flux1-kontext-dev_ComfyUI/blob/main/split_files/diffusion_models/flux1-dev-kontext_fp8_scaled.safetensors) if the other one is too large for you. + +Here's a simple example. You can load or drag the following image in ComfyUI to get the workflow: + +![Example](flux_kontext_example.png) + + +You can find the input image for the above workflow [here](../chroma/fennec_girl_sing.png) + +Here's another more complex example that generates a comic from the above input image: + +![Example](flux_kontext_example_comic.webp) + ### Fill (Inpainting) model Download the [flux1-fill-dev.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev) model file and put it in your ComfyUI/models/diffusion_models/ folder. diff --git a/flux/flux_kontext_example.png b/flux/flux_kontext_example.png new file mode 100644 index 0000000..3dea15b Binary files /dev/null and b/flux/flux_kontext_example.png differ diff --git a/flux/flux_kontext_example_comic.webp b/flux/flux_kontext_example_comic.webp new file mode 100644 index 0000000..6586fdd Binary files /dev/null and b/flux/flux_kontext_example_comic.webp differ diff --git a/hidream/README.md b/hidream/README.md index 2c22fbe..cb1ffc6 100644 --- a/hidream/README.md +++ b/hidream/README.md @@ -35,7 +35,17 @@ You can then load up or drag the following image in ComfyUI to get the workflow: ![Example](hidream_full_example.png) +## HiDream e1.1 +This is an edit model, download [hidream_e1_1_bf16.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/diffusion_models/hidream_e1_1_bf16.safetensors) and put it in your ComfyUI/models/diffusion_models/ directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hidream_e1.1_example.png) + + +
+Old hidream 1.0 edit model. ## HiDream e1 This is an experimental edit model, download [hidream_e1_full_bf16.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/diffusion_models/hidream_e1_full_bf16.safetensors) and put it in your ComfyUI/models/diffusion_models/ directory. @@ -43,3 +53,4 @@ This is an experimental edit model, download [hidream_e1_full_bf16.safetensors]( You can then load up or drag the following image in ComfyUI to get the workflow: ![Example](hidream_e1_example.png) +
diff --git a/hidream/hidream_e1.1_example.png b/hidream/hidream_e1.1_example.png new file mode 100644 index 0000000..ebc62c6 Binary files /dev/null and b/hidream/hidream_e1.1_example.png differ diff --git a/hunyuan_image/README.md b/hunyuan_image/README.md new file mode 100644 index 0000000..3b51cfe --- /dev/null +++ b/hunyuan_image/README.md @@ -0,0 +1,26 @@ +# Hunyuan Image 2.1 + +[Hunyuan Image 2.1](https://huggingface.co/tencent/HunyuanImage-2.1) is a powerful diffusion model for image generation. + +## Basic Workflow + +Download the following models and place them in the appropriate ComfyUI directories: + +### Text Encoders +Download and put in your ComfyUI/models/text_encoders directory: +- [byt5_small_glyphxl_fp16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/text_encoders/byt5_small_glyphxl_fp16.safetensors) +- [qwen_2.5_vl_7b.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/text_encoders/qwen_2.5_vl_7b.safetensors) + +### VAE Models +Download and put in your ComfyUI/models/vae directory: +- [hunyuan_image_2.1_vae_fp16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/vae/hunyuan_image_2.1_vae_fp16.safetensors) +- **Optional (for refiner):** [hunyuan_image_refiner_vae_fp16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/vae/hunyuan_image_refiner_vae_fp16.safetensors) + +### Diffusion Models +Download and put in your ComfyUI/models/diffusion_models directory: +- [hunyuanimage2.1_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/diffusion_models/hunyuanimage2.1_bf16.safetensors) +- **Optional (for refiner):** [hunyuanimage2.1_refiner_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/diffusion_models/hunyuanimage2.1_refiner_bf16.safetensors) + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hunyuan_image_example.png) diff --git a/hunyuan_image/hunyuan_image_example.png b/hunyuan_image/hunyuan_image_example.png new file mode 100644 index 0000000..3dcfbbc Binary files /dev/null and b/hunyuan_image/hunyuan_image_example.png differ diff --git a/omnigen/README.md b/omnigen/README.md new file mode 100644 index 0000000..da334b2 --- /dev/null +++ b/omnigen/README.md @@ -0,0 +1,24 @@ +# Omnigen 2 + +[Omnigen 2](https://github.com/VectorSpaceLab/OmniGen2) is a model that can be used to edit images with text prompts. + +## Files to Download + +You will first need: + +[omnigen2_fp16.safetensors](https://huggingface.co/Comfy-Org/Omnigen2_ComfyUI_repackaged/blob/main/split_files/diffusion_models/omnigen2_fp16.safetensors) goes in: ComfyUI/models/diffusion_models/ + +[qwen_2.5_vl_fp16.safetensors](https://huggingface.co/Comfy-Org/Omnigen2_ComfyUI_repackaged/blob/main/split_files/text_encoders/qwen_2.5_vl_fp16.safetensors) goes in: ComfyUI/models/text_encoders/ + +[ae.safetensors](https://huggingface.co/Comfy-Org/Omnigen2_ComfyUI_repackaged/blob/main/split_files/vae/ae.safetensors), this is the flux VAE that you might already have, it goes in: ComfyUI/models/vae/ + +## Workflows + +This is a basic workflow using an image as a character reference. For multiple image inputs chain ReferenceLatent nodes together + +![Example](omnigen2_example.png) + +You can load this image in [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the full workflow. + +You can find the input image [here](../chroma/fennec_girl_sing.png) + diff --git a/omnigen/omnigen2_example.png b/omnigen/omnigen2_example.png new file mode 100644 index 0000000..bf3329d Binary files /dev/null and b/omnigen/omnigen2_example.png differ diff --git a/qwen_image/README.md b/qwen_image/README.md new file mode 100644 index 0000000..460b833 --- /dev/null +++ b/qwen_image/README.md @@ -0,0 +1,42 @@ +# Qwen Image + +[Qwen Image](https://github.com/QwenLM/Qwen-Image) is a 20B diffusion model. + +## Basic Workflow + +Download [qwen_image_fp8_e4m3fn.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/blob/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors) and put it in your ComfyUI/models/diffusion_models directory. + +[qwen_2.5_vl_7b_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/blob/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors) and put it in your ComfyUI/models/text_encoders directory. + +[qwen_image_vae.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/blob/main/split_files/vae/qwen_image_vae.safetensors) and put it in your ComfyUI/models/vae/ directory + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](qwen_image_basic_example.png) + +## Edit Model v2509 + +Make sure you downloaded the text encoder and vae files for the basic workflow above. This model supports up to 3 different image inputs. + +Download [qwen_image_edit_2509_fp8_e4m3fn.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/blob/main/split_files/diffusion_models/qwen_image_edit_2509_fp8_e4m3fn.safetensors) and put it in your ComfyUI/models/diffusion_models directory. + + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](qwen_image_edit_2509_basic_example.png) + +You can find the input image [here](../chroma/fennec_girl_sing.png) + + +## Edit Model (older first version) + +Make sure you downloaded the text encoder and vae files for the basic workflow above. + +Download [qwen_image_edit_fp8_e4m3fn.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/blob/main/split_files/diffusion_models/qwen_image_edit_fp8_e4m3fn.safetensors) and put it in your ComfyUI/models/diffusion_models directory. + + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](qwen_image_edit_basic_example.png) + +You can find the input image [here](../chroma/fennec_girl_sing.png) diff --git a/qwen_image/qwen_image_basic_example.png b/qwen_image/qwen_image_basic_example.png new file mode 100644 index 0000000..c6a0537 Binary files /dev/null and b/qwen_image/qwen_image_basic_example.png differ diff --git a/qwen_image/qwen_image_edit_2509_basic_example.png b/qwen_image/qwen_image_edit_2509_basic_example.png new file mode 100644 index 0000000..82895ff Binary files /dev/null and b/qwen_image/qwen_image_edit_2509_basic_example.png differ diff --git a/qwen_image/qwen_image_edit_basic_example.png b/qwen_image/qwen_image_edit_basic_example.png new file mode 100644 index 0000000..db318c0 Binary files /dev/null and b/qwen_image/qwen_image_edit_basic_example.png differ diff --git a/wan/README.md b/wan/README.md index 0c3372d..677184f 100644 --- a/wan/README.md +++ b/wan/README.md @@ -2,6 +2,8 @@ [Wan 2.1](https://github.com/Wan-Video/Wan2.1) is a family of video models. +For Wan 2.2 see: [Wan 2.2](../wan22) + ## Files to Download You will first need: @@ -51,3 +53,28 @@ The input image can be found on the [flux](../flux) page. Here's the same example with the [720p](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_i2v_720p_14B_fp16.safetensors) model: + + +### VACE reference Image to Video + +This workflow requires the [wan2.1_vace_14B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_vace_14B_fp16.safetensors) file (put it in: ComfyUI/models/diffusion_models/) + +This example generates a video from a reference image, this is different from generating a video from a start image. You'll notice that the video does not actually contain the reference image but is clearly derived from it. + + + +[Workflow in Json format](vace_reference_to_video.json) + +You can find the input image [here](../chroma/fennec_girl_sing.png) that image contains a [Chroma](../chroma) workflow if you are interested how it was generated. + +### Image Camera to Video + +This workflow requires the [wan2.1_fun_camera_v1.1_1.3B_bf16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_fun_camera_v1.1_1.3B_bf16.safetensors) file (put it in: ComfyUI/models/diffusion_models/) and +[clip_vision_h.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/clip_vision/clip_vision_h.safetensors) which goes in: ComfyUI/models/clip_vision/ if you don't have it already. + + + + +[Workflow in Json format](camera_image_to_video_wan_example.json) + +The input image can be found on the [flux](../flux) page. diff --git a/wan/camera_image_to_video_wan_example.json b/wan/camera_image_to_video_wan_example.json new file mode 100644 index 0000000..74068e8 --- /dev/null +++ b/wan/camera_image_to_video_wan_example.json @@ -0,0 +1,865 @@ +{ + "id": "fa117b0f-052b-46d1-af50-d1bc60704ed5", + "revision": 0, + "last_node_id": 60, + "last_link_id": 130, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -540, + 170 + ], + "size": [ + 387.0450744628906, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 590, + 480 + ], + "size": [ + 290.6003723144531, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 117 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 900, + 180 + ], + "size": [ + 308.10516357421875, + 262 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 111 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 118 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 119 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 120 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 1034274237172778, + "randomize", + 20, + 6, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1230, + 180 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1480, + 180 + ], + "size": [ + 620.66796875, + 679.0053100585938 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 90, + "default" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + -140, + 370 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 116 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + -140, + 160 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 115 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail wearing a maid outfit" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 51, + "type": "CLIPVisionEncode", + "pos": [ + 350, + 680 + ], + "size": [ + 255.5699462890625, + 78 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 94 + }, + { + "name": "image", + "type": "IMAGE", + "link": 109 + } + ], + "outputs": [ + { + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "slot_index": 0, + "links": [ + 113 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + -10, + 780 + ], + "size": [ + 315, + 314 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 109, + 114 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "flux_dev_example.png", + "image" + ] + }, + { + "id": 49, + "type": "CLIPVisionLoader", + "pos": [ + 0, + 670 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "slot_index": 0, + "links": [ + 94 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader" + }, + "widgets_values": [ + "clip_vision_h.safetensors" + ] + }, + { + "id": 56, + "type": "WanCameraImageToVideo", + "pos": [ + 590, + 200 + ], + "size": [ + 290, + 230 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 115 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 116 + }, + { + "name": "vae", + "type": "VAE", + "link": 117 + }, + { + "name": "clip_vision_output", + "shape": 7, + "type": "CLIP_VISION_OUTPUT", + "link": 113 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 114 + }, + { + "name": "camera_conditions", + "shape": 7, + "type": "WAN_CAMERA_EMBEDDING", + "link": 124 + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 125 + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 126 + }, + { + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 127 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 118 + ] + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 119 + ] + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 120 + ] + } + ], + "properties": { + "Node name for S&R": "WanCameraImageToVideo" + }, + "widgets_values": [ + 832, + 480, + 81, + 1 + ] + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 600, + 100 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 130 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 111 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2150, + 180 + ], + "size": [ + 315, + 210 + ], + "flags": {}, + "order": 14, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 32 + ] + }, + { + "id": 57, + "type": "WanCameraEmbedding", + "pos": [ + 310, + 300 + ], + "size": [ + 236.8000030517578, + 310 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "camera_embedding", + "type": "WAN_CAMERA_EMBEDDING", + "links": [ + 124 + ] + }, + { + "name": "width", + "type": "INT", + "links": [ + 125 + ] + }, + { + "name": "height", + "type": "INT", + "links": [ + 126 + ] + }, + { + "name": "length", + "type": "INT", + "links": [ + 127 + ] + } + ], + "properties": { + "Node name for S&R": "WanCameraEmbedding" + }, + "widgets_values": [ + "Zoom Out", + 512, + 512, + 81, + 1, + 0.5, + 0.5, + 0.5, + 0.5 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + -540, + 50 + ], + "size": [ + 390, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 130 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.1_fun_camera_v1.1_1.3B_bf16.safetensors", + "default" + ] + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 49, + 0, + 51, + 0, + "CLIP_VISION" + ], + [ + 109, + 52, + 0, + 51, + 1, + "IMAGE" + ], + [ + 111, + 54, + 0, + 3, + 0, + "MODEL" + ], + [ + 113, + 51, + 0, + 56, + 3, + "CLIP_VISION_OUTPUT" + ], + [ + 114, + 52, + 0, + 56, + 4, + "IMAGE" + ], + [ + 115, + 6, + 0, + 56, + 0, + "CONDITIONING" + ], + [ + 116, + 7, + 0, + 56, + 1, + "CONDITIONING" + ], + [ + 117, + 39, + 0, + 56, + 2, + "VAE" + ], + [ + 118, + 56, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 119, + 56, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 120, + 56, + 2, + 3, + 3, + "LATENT" + ], + [ + 124, + 57, + 0, + 56, + 5, + "WAN_CAMERA_EMBEDDING" + ], + [ + 125, + 57, + 1, + 56, + 6, + "INT" + ], + [ + 126, + 57, + 2, + 56, + 7, + "INT" + ], + [ + 127, + 57, + 3, + 56, + 8, + "INT" + ], + [ + 130, + 37, + 0, + 54, + 0, + "MODEL" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.6934334949441638, + "offset": [ + 570.9293716820114, + 14.391611998548521 + ] + }, + "frontendVersion": "1.20.7" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan/camera_image_to_video_wan_example.webp b/wan/camera_image_to_video_wan_example.webp new file mode 100644 index 0000000..042ac9f Binary files /dev/null and b/wan/camera_image_to_video_wan_example.webp differ diff --git a/wan/vace_reference_to_video.json b/wan/vace_reference_to_video.json new file mode 100644 index 0000000..a2ea2ad --- /dev/null +++ b/wan/vace_reference_to_video.json @@ -0,0 +1,741 @@ +{ + "id": "0898f6a6-2814-4ccd-968a-a2405ee177e7", + "revision": 0, + "last_node_id": 58, + "last_link_id": 124, + "nodes": [ + { + "id": 39, + "type": "VAELoader", + "pos": [ + 866.3932495117188, + 499.18597412109375 + ], + "size": [ + 306.36004638671875, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 114 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 390, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 510, + 70 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 111 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 113 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 120 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 56, + "type": "TrimVideoLatent", + "pos": [ + 1265.2001953125, + 613.80859375 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 119 + }, + { + "name": "trim_amount", + "type": "INT", + "widget": { + "name": "trim_amount" + }, + "link": 121 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 120 + ] + } + ], + "properties": { + "Node name for S&R": "TrimVideoLatent" + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 70 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.1_vace_14B_fp16.safetensors", + "default" + ] + }, + { + "id": 55, + "type": "WanVaceToVideo", + "pos": [ + 698.0429077148438, + 632.2788696289062 + ], + "size": [ + 270, + 254 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 112 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 113 + }, + { + "name": "vae", + "type": "VAE", + "link": 114 + }, + { + "name": "control_video", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "name": "control_masks", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "name": "reference_image", + "shape": 7, + "type": "IMAGE", + "link": 118 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 115 + ] + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 116 + ] + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 117 + ] + }, + { + "name": "trim_latent", + "type": "INT", + "links": [ + 121 + ] + } + ], + "properties": { + "Node name for S&R": "WanVaceToVideo" + }, + "widgets_values": [ + 768, + 768, + 81, + 1, + 1 + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1600, + 190 + ], + "size": [ + 364.4535217285156, + 510.4535217285156 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 90, + "default" + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2060, + 190 + ], + "size": [ + 429.0989685058594, + 523.8981323242188 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 0 + ] + }, + { + "id": 58, + "type": "Note", + "pos": [ + 2509.27587890625, + 189.5493621826172 + ], + "size": [ + 263.95501708984375, + 155.10342407226562 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "crf 0 means a lossless webm, if you want a lossy once with smaller filesize increase the crf." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + 221.9611358642578, + 734.3540649414062 + ], + "size": [ + 315, + 314 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 118 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": [] + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fennec_girl_sing.png", + "image" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 111 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 115 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 116 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 117 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 119 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 399224011392770, + "randomize", + 20, + 6, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 112 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail turning around and dancing and singing on stage like an idol" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 111, + 54, + 0, + 3, + 0, + "MODEL" + ], + [ + 112, + 6, + 0, + 55, + 0, + "CONDITIONING" + ], + [ + 113, + 7, + 0, + 55, + 1, + "CONDITIONING" + ], + [ + 114, + 39, + 0, + 55, + 2, + "VAE" + ], + [ + 115, + 55, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 116, + 55, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 117, + 55, + 2, + 3, + 3, + "LATENT" + ], + [ + 118, + 52, + 0, + 55, + 5, + "IMAGE" + ], + [ + 119, + 3, + 0, + 56, + 0, + "LATENT" + ], + [ + 120, + 56, + 0, + 8, + 0, + "LATENT" + ], + [ + 121, + 55, + 3, + 56, + 1, + "INT" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.9358232486220777, + "offset": [ + -2.3933794268561357, + -27.125629672645054 + ] + }, + "frontendVersion": "1.19.9" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan/vace_reference_to_video.webp b/wan/vace_reference_to_video.webp new file mode 100644 index 0000000..731f7d1 Binary files /dev/null and b/wan/vace_reference_to_video.webp differ diff --git a/wan22/README.md b/wan22/README.md new file mode 100644 index 0000000..7f791eb --- /dev/null +++ b/wan22/README.md @@ -0,0 +1,70 @@ +# Wan 2.2 Models + +[Wan 2.2](https://github.com/Wan-Video/Wan2.2) is a family of video models and the version after [Wan 2.1](../wan) + +Wan2.2 is initially released with 3 different models, a 5B model that can do both text and image to video and two 14B models, one for text to video and the other for video to video. + +See also the [Comfy Docs Wan 2.2 page for more workflow examples.](https://docs.comfy.org/tutorials/video/wan/wan2_2) + +## Files to Download + +You will first need: + +#### Text encoder and VAE: + +[umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/tree/main/split_files/text_encoders) goes in: ComfyUI/models/text_encoders/ + +Needed for the 14B models: [wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) goes in: ComfyUI/models/vae/ + +Needed for the 5B model (NEW): [wan2.2_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan2.2_vae.safetensors) goes in: ComfyUI/models/vae/ + +#### Video Models + +The diffusion models can be found [here](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/tree/main/split_files/diffusion_models) + +These files go in: ComfyUI/models/diffusion_models/ + +## Workflows + +### 5B Model + +This workflow requires the [wan2.2_ti2v_5B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_ti2v_5B_fp16.safetensors) file (put it in: ComfyUI/models/diffusion_models/). + +Make sure you have the [wan2.2 VAE](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan2.2_vae.safetensors) (goes in: ComfyUI/models/vae/) + +#### Text to video + +![Example](text_to_video_wan22_5B.webp) + +[Workflow in Json format](text_to_video_wan22_5B.json) + + +#### Image to Video + +![Example](image_to_video_wan22_5B.webp) + +[Workflow in Json format](image_to_video_wan22_5B.json) + +You can find the input image [here](../chroma/fennec_girl_hug.png) + +### 14B Model + +Make sure you have the [wan2.1 VAE](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) (goes in: ComfyUI/models/vae/) + +#### Text to video + +This workflow requires both the [wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors) and the [wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors) file (put it in: ComfyUI/models/diffusion_models/). + +![Example](text_to_video_wan22_14B.webp) + +[Workflow in Json format](text_to_video_wan22_14B.json) + +#### Image to Video + +This workflow requires both the [wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors) and the [wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors) file (put it in: ComfyUI/models/diffusion_models/). + +![Example](image_to_video_wan22_14B.webp) + +[Workflow in Json format](image_to_video_wan22_14B.json) + +You can find the input image [here](../chroma/fennec_girl_flowers.png) diff --git a/wan22/image_to_video_wan22_14B.json b/wan22/image_to_video_wan22_14B.json new file mode 100644 index 0000000..cea25cd --- /dev/null +++ b/wan22/image_to_video_wan22_14B.json @@ -0,0 +1,876 @@ +{ + "id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", + "revision": 0, + "last_node_id": 60, + "last_link_id": 126, + "nodes": [ + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 98 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 486.4836120605469, + -69.28914642333984 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 125 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 55, + "type": "ModelSamplingSD3", + "pos": [ + 484.0019836425781, + 54.46213912963867 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 112 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 123 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 58, + "type": "KSamplerAdvanced", + "pos": [ + 1262.509765625, + -26.73247528076172 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 123 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 121 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 122 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 113 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 124 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "disable", + 0, + "fixed", + 20, + 3.5, + "euler", + "simple", + 10, + 10000, + "disable" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 30, + 190 + ], + "size": [ + 360, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 30, + -70 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 56, + "type": "UNETLoader", + "pos": [ + 30, + 60 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 112 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 30, + 340 + ], + "size": [ + 360, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 99 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 59, + "type": "Note", + "pos": [ + -202.05557250976562, + -57.859466552734375 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses a different diffusion model for the first steps (high noise) vs the last steps (low noise).\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 60, + "type": "Note", + "pos": [ + -200, + 340 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses the wan 2.1 VAE.\n\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1590, + -20 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 124 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2530, + -20 + ], + "size": [ + 763.67041015625, + 885.67041015625 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 13.3333740234375 + ] + }, + { + "id": 57, + "type": "KSamplerAdvanced", + "pos": [ + 893.0060424804688, + -29.923471450805664 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 125 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 118 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 119 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 120 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 113 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "enable", + 99822389587980, + "randomize", + 20, + 3.5, + "euler", + "simple", + 0, + 10, + "enable" + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1820, + -20 + ], + "size": [ + 674.6224975585938, + 820.6224975585938 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 80, + "default" + ] + }, + { + "id": 50, + "type": "WanImageToVideo", + "pos": [ + 491.7362060546875, + 617.798095703125 + ], + "size": [ + 342.5999755859375, + 210 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 97 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 98 + }, + { + "name": "vae", + "type": "VAE", + "link": 99 + }, + { + "name": "clip_vision_output", + "shape": 7, + "type": "CLIP_VISION_OUTPUT", + "link": null + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 126 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 118, + 121 + ] + }, + { + "name": "negative", + "type": "CONDITIONING", + "slot_index": 1, + "links": [ + 119, + 122 + ] + }, + { + "name": "latent", + "type": "LATENT", + "slot_index": 2, + "links": [ + 120 + ] + } + ], + "properties": { + "Node name for S&R": "WanImageToVideo" + }, + "widgets_values": [ + 768, + 768, + 81, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 97 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl picking up an assault rifle and moving quickly" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + -50, + 550 + ], + "size": [ + 450, + 540 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 126 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fennec_girl_flowers.png", + "image" + ] + } + ], + "links": [ + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 97, + 6, + 0, + 50, + 0, + "CONDITIONING" + ], + [ + 98, + 7, + 0, + 50, + 1, + "CONDITIONING" + ], + [ + 99, + 39, + 0, + 50, + 2, + "VAE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 112, + 56, + 0, + 55, + 0, + "MODEL" + ], + [ + 113, + 57, + 0, + 58, + 3, + "LATENT" + ], + [ + 118, + 50, + 0, + 57, + 1, + "CONDITIONING" + ], + [ + 119, + 50, + 1, + 57, + 2, + "CONDITIONING" + ], + [ + 120, + 50, + 2, + 57, + 3, + "LATENT" + ], + [ + 121, + 50, + 0, + 58, + 1, + "CONDITIONING" + ], + [ + 122, + 50, + 1, + 58, + 2, + "CONDITIONING" + ], + [ + 123, + 55, + 0, + 58, + 0, + "MODEL" + ], + [ + 124, + 58, + 0, + 8, + 0, + "LATENT" + ], + [ + 125, + 54, + 0, + 57, + 0, + "MODEL" + ], + [ + 126, + 52, + 0, + 50, + 4, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425299, + "offset": [ + 229.4669275491141, + 115.0852193902741 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/image_to_video_wan22_14B.webp b/wan22/image_to_video_wan22_14B.webp new file mode 100644 index 0000000..d74baa2 Binary files /dev/null and b/wan22/image_to_video_wan22_14B.webp differ diff --git a/wan22/image_to_video_wan22_5B.json b/wan22/image_to_video_wan22_5B.json new file mode 100644 index 0000000..6160b10 --- /dev/null +++ b/wan22/image_to_video_wan22_5B.json @@ -0,0 +1,624 @@ +{ + "id": "91f6bbe2-ed41-4fd6-bac7-71d5b5864ecb", + "revision": 0, + "last_node_id": 57, + "last_link_id": 106, + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 52 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 95 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 46 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 52 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 104 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 869177064731501, + "randomize", + 30, + 5, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1460, + 190 + ], + "size": [ + 870.8511352539062, + 648.4141235351562 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24.000000000000004, + false, + 90, + "default" + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 20, + 340 + ], + "size": [ + 330, + 60 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 105 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan2.2_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 48, + "type": "ModelSamplingSD3", + "pos": [ + 440, + 60 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 94 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 95 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 60 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 94 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_ti2v_5B_fp16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2367.213134765625, + 193.6114959716797 + ], + "size": [ + 670, + 650 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 16.111083984375 + ] + }, + { + "id": 57, + "type": "LoadImage", + "pos": [ + 87.407958984375, + 620.4816284179688 + ], + "size": [ + 274.080078125, + 314 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106 + ] + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fennec_girl_hug.png", + "image" + ] + }, + { + "id": 56, + "type": "Note", + "pos": [ + 710.781005859375, + 608.9545288085938 + ], + "size": [ + 320.9936218261719, + 182.6057586669922 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Optimal resolution is: 1280x704 length 121\n\nThe reason it's lower in this workflow is just because I didn't want you to wait too long to get an initial video.\n\nTo get image to video just plug in a start image. For text to video just don't give it a start image." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 55, + "type": "Wan22ImageToVideoLatent", + "pos": [ + 420, + 610 + ], + "size": [ + 271.9126892089844, + 150 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 105 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 104 + ] + } + ], + "properties": { + "Node name for S&R": "Wan22ImageToVideoLatent" + }, + "widgets_values": [ + 1280, + 704, + 41, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 46 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with fennec ears and a fluffy tail walking in a beautiful field" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 46, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 52, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 37, + 0, + 48, + 0, + "MODEL" + ], + [ + 95, + 48, + 0, + 3, + 0, + "MODEL" + ], + [ + 104, + 55, + 0, + 3, + 3, + "LATENT" + ], + [ + 105, + 39, + 0, + 55, + 0, + "VAE" + ], + [ + 106, + 57, + 0, + 55, + 1, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425287, + "offset": [ + 3.5210927484772534, + -9.231468990407302 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/image_to_video_wan22_5B.webp b/wan22/image_to_video_wan22_5B.webp new file mode 100644 index 0000000..20281aa Binary files /dev/null and b/wan22/image_to_video_wan22_5B.webp differ diff --git a/wan22/text_to_video_wan22_14B.json b/wan22/text_to_video_wan22_14B.json new file mode 100644 index 0000000..2dc3ca0 --- /dev/null +++ b/wan22/text_to_video_wan22_14B.json @@ -0,0 +1,759 @@ +{ + "id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", + "revision": 0, + "last_node_id": 61, + "last_link_id": 131, + "nodes": [ + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 486.4836120605469, + -69.28914642333984 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 125 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 58, + "type": "KSamplerAdvanced", + "pos": [ + 1262.509765625, + -26.73247528076172 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 123 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 128 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 130 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 113 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 124 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "disable", + 0, + "fixed", + 20, + 3.5, + "euler", + "simple", + 10, + 10000, + "disable" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 30, + 190 + ], + "size": [ + 360, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 30, + 340 + ], + "size": [ + 360, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 59, + "type": "Note", + "pos": [ + -202.05557250976562, + -57.859466552734375 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses a different diffusion model for the first steps (high noise) vs the last steps (low noise).\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 60, + "type": "Note", + "pos": [ + -200, + 340 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses the wan 2.1 VAE.\n\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1590, + -20 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 124 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1820, + -20 + ], + "size": [ + 674.6224975585938, + 820.6224975585938 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 80, + "default" + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2530, + -20 + ], + "size": [ + 763.67041015625, + 885.67041015625 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 13.3333740234375 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 129, + 130 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 56, + "type": "UNETLoader", + "pos": [ + 30, + 60 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 112 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 55, + "type": "ModelSamplingSD3", + "pos": [ + 484.0019836425781, + 54.46213912963867 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 112 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 123 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 30, + -70 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 57, + "type": "KSamplerAdvanced", + "pos": [ + 893.0060424804688, + -29.923471450805664 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 125 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 127 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 129 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 131 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 113 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "enable", + 738226772790037, + "randomize", + 20, + 3.5, + "euler", + "simple", + 0, + 10, + "enable" + ] + }, + { + "id": 61, + "type": "EmptyHunyuanLatentVideo", + "pos": [ + 560, + 620 + ], + "size": [ + 270.0943298339844, + 130 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 131 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyHunyuanLatentVideo" + }, + "widgets_values": [ + 1280, + 704, + 57, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 127, + 128 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a robot is running through a futuristic cyberpunk city with neon signs and darkness with bright HDR lights" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 112, + 56, + 0, + 55, + 0, + "MODEL" + ], + [ + 113, + 57, + 0, + 58, + 3, + "LATENT" + ], + [ + 123, + 55, + 0, + 58, + 0, + "MODEL" + ], + [ + 124, + 58, + 0, + 8, + 0, + "LATENT" + ], + [ + 125, + 54, + 0, + 57, + 0, + "MODEL" + ], + [ + 127, + 6, + 0, + 57, + 1, + "CONDITIONING" + ], + [ + 128, + 6, + 0, + 58, + 1, + "CONDITIONING" + ], + [ + 129, + 7, + 0, + 57, + 2, + "CONDITIONING" + ], + [ + 130, + 7, + 0, + 58, + 2, + "CONDITIONING" + ], + [ + 131, + 61, + 0, + 57, + 3, + "LATENT" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425305, + "offset": [ + 242.9977455078102, + 122.98065462666187 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/text_to_video_wan22_14B.webp b/wan22/text_to_video_wan22_14B.webp new file mode 100644 index 0000000..4d3f6d6 Binary files /dev/null and b/wan22/text_to_video_wan22_14B.webp differ diff --git a/wan22/text_to_video_wan22_5B.json b/wan22/text_to_video_wan22_5B.json new file mode 100644 index 0000000..25dc251 --- /dev/null +++ b/wan22/text_to_video_wan22_5B.json @@ -0,0 +1,579 @@ +{ + "id": "91f6bbe2-ed41-4fd6-bac7-71d5b5864ecb", + "revision": 0, + "last_node_id": 57, + "last_link_id": 106, + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 52 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 95 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 46 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 52 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 104 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 285741127119524, + "randomize", + 30, + 5, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 20, + 340 + ], + "size": [ + 330, + 60 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 105 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan2.2_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 48, + "type": "ModelSamplingSD3", + "pos": [ + 440, + 60 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 94 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 95 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 60 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 94 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_ti2v_5B_fp16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2367.213134765625, + 193.6114959716797 + ], + "size": [ + 670, + 650 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 16.111083984375 + ] + }, + { + "id": 56, + "type": "Note", + "pos": [ + 710.781005859375, + 608.9545288085938 + ], + "size": [ + 320.9936218261719, + 182.6057586669922 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Optimal resolution is: 1280x704 length 121\n\nThe reason it's lower in this workflow is just because I didn't want you to wait too long to get an initial video.\n\nTo get image to video just plug in a start image. For text to video just don't give it a start image." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 55, + "type": "Wan22ImageToVideoLatent", + "pos": [ + 420, + 610 + ], + "size": [ + 271.9126892089844, + 150 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 105 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 104 + ] + } + ], + "properties": { + "Node name for S&R": "Wan22ImageToVideoLatent" + }, + "widgets_values": [ + 1280, + 704, + 41, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 46 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "drone shot of a volcano erupting with a fox walking on it" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1460, + 190 + ], + "size": [ + 870.8511352539062, + 648.4141235351562 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24.000000000000004, + false, + 80, + "default" + ] + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 46, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 52, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 37, + 0, + 48, + 0, + "MODEL" + ], + [ + 95, + 48, + 0, + 3, + 0, + "MODEL" + ], + [ + 104, + 55, + 0, + 3, + 3, + "LATENT" + ], + [ + 105, + 39, + 0, + 55, + 0, + "VAE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.11678157794253, + "offset": [ + 7.041966347099882, + -19.733042401058505 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/text_to_video_wan22_5B.webp b/wan22/text_to_video_wan22_5B.webp new file mode 100644 index 0000000..eafce22 Binary files /dev/null and b/wan22/text_to_video_wan22_5B.webp differ