diff --git a/README.md b/README.md index 4d5dae6..b0e721f 100644 --- a/README.md +++ b/README.md @@ -50,10 +50,22 @@ Here are some more advanced examples: [HunyuanDiT](hunyuan_dit) +[Hunyuan Image 2.1](hunyuan_image) + +[Chroma](chroma) + +[Lumina Image 2.0](lumina2) + +[HiDream](hidream) + +[Qwen Image](qwen_image) + [Flux](flux) [Edit/InstructPix2Pix Models](edit_models) +[Omnigen2](omnigen) + [Stable Video Diffusion](video) [Mochi](mochi) @@ -62,8 +74,17 @@ Here are some more advanced examples: [Hunyuan Video](hunyuan_video) +[Nvidia Cosmos](cosmos) + +[Nvidia Cosmos Predict2](cosmos_predict2) + +[Wan 2.1](wan) + +[Wan 2.2](wan22) + [Audio Models](audio) +[Hunyuan3D 2.0](https://docs.comfy.org/tutorials/3d/hunyuan3D-2) ## Documentation diff --git a/audio/README.md b/audio/README.md index bd558cc..97caf63 100644 --- a/audio/README.md +++ b/audio/README.md @@ -1,8 +1,18 @@ # Audio Examples +## ACE Step Model + +Download the [ace_step_v1_3.5b.safetensors](https://huggingface.co/Comfy-Org/ACE-Step_ComfyUI_repackaged/blob/main/all_in_one/ace_step_v1_3.5b.safetensors) and save it to your ComfyUI/models/checkpoints/ directory. + + +he following flac audio file contains a workflow, you can download it and load it or drag it on the ComfyUI interface + + + + ## Stable Audio Open 1.0 -Download the [model.safetensors from this page](https://huggingface.co/google-t5/t5-base/blob/main/model.safetensors) and save it as `t5_base.safetensors` to your ComfyUI/models/clip/ directory. +Download the [model.safetensors from this page](https://huggingface.co/google-t5/t5-base/blob/main/model.safetensors) and save it as `t5_base.safetensors` to your ComfyUI/models/text_encoders/ directory. Download the [model.safetensors from this page](https://huggingface.co/stabilityai/stable-audio-open-1.0/tree/main) and save it as `stable_audio_open_1.0.safetensors` to your ComfyUI/models/checkpoints/ directory. diff --git a/audio/ace_step_example.flac b/audio/ace_step_example.flac new file mode 100644 index 0000000..57b2293 Binary files /dev/null and b/audio/ace_step_example.flac differ diff --git a/chroma/README.md b/chroma/README.md new file mode 100644 index 0000000..49568f9 --- /dev/null +++ b/chroma/README.md @@ -0,0 +1,11 @@ +# Chroma + +This is a model that is modified from [flux](../flux/) and has had some changes in the architecture. + +To use it you will need one of the t5xxl text encoder model files that you can find in: [this repo](https://huggingface.co/comfyanonymous/flux_text_encoders/tree/main), fp16 is recommended, if you don't have that much memory fp8_scaled are recommended. Put it in the ComfyUI/models/text_encoders/ folder. + +You can then download the latest chroma checkpoint from the [official huggingface page](https://huggingface.co/lodestones/Chroma1-HD), It goes in the ComfyUI/models/diffusion_models/ folder. + +Load or drag this image on ComfyUI to get the example workflow: + +![Example](chroma_example.png) diff --git a/chroma/chroma_example.png b/chroma/chroma_example.png new file mode 100644 index 0000000..77ff321 Binary files /dev/null and b/chroma/chroma_example.png differ diff --git a/chroma/fennec_girl_flowers.png b/chroma/fennec_girl_flowers.png new file mode 100644 index 0000000..5f95987 Binary files /dev/null and b/chroma/fennec_girl_flowers.png differ diff --git a/chroma/fennec_girl_hug.png b/chroma/fennec_girl_hug.png new file mode 100644 index 0000000..76c091c Binary files /dev/null and b/chroma/fennec_girl_hug.png differ diff --git a/chroma/fennec_girl_sing.png b/chroma/fennec_girl_sing.png new file mode 100644 index 0000000..a308c4a Binary files /dev/null and b/chroma/fennec_girl_sing.png differ diff --git a/cosmos/README.md b/cosmos/README.md new file mode 100644 index 0000000..11e343b --- /dev/null +++ b/cosmos/README.md @@ -0,0 +1,52 @@ +# Original Nvidia Cosmos Models + +For the newer Cosmos models see [Cosmos Predict2](../cosmos_predict2) + +[Nvidia Cosmos](https://www.nvidia.com/en-us/ai/cosmos/) is a family of "World Models". ComfyUI currently supports specifically the 7B and 14B text to video diffusion models and the 7B and 14B image to video diffusion models. + +## Files to Download + +You will first need: + +#### Text encoder and VAE: + +[oldt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/comfyanonymous/cosmos_1.0_text_encoder_and_VAE_ComfyUI/tree/main/text_encoders) goes in: ComfyUI/models/text_encoders/ + +[cosmos_cv8x8x8_1.0.safetensors](https://huggingface.co/comfyanonymous/cosmos_1.0_text_encoder_and_VAE_ComfyUI/blob/main/vae/cosmos_cv8x8x8_1.0.safetensors) goes in: ComfyUI/models/vae/ + +Note: oldt5_xxl is not the same as the t5xxl used in flux and other models. +oldt5_xxl is t5xxl 1.0 while the one used in flux and others is t5xxl 1.1 + +#### Video Models + +The video models can be found [in safetensors format here.](https://huggingface.co/mcmonkey/cosmos-1.0/tree/main) + +The workflows on this page use [Cosmos-1_0-Diffusion-7B-Text2World.safetensors](https://huggingface.co/mcmonkey/cosmos-1.0/blob/main/Cosmos-1_0-Diffusion-7B-Text2World.safetensors) and [Cosmos-1_0-Diffusion-7B-Video2World.safetensors](https://huggingface.co/mcmonkey/cosmos-1.0/blob/main/Cosmos-1_0-Diffusion-7B-Video2World.safetensors) + +These files go in: ComfyUI/models/diffusion_models + +Note: "Text to World" means Text to video and "Video to World" means image/video to video. + +If you want the original diffusion models in .pt format instead of the repacked safetensors the official links are: [7B-Text2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-7B-Text2World) [7B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-7B-Video2World) [14B-Text2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-14B-Text2World) [14B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-14B-Video2World) + +## Workflows + +### Text to Video + +This workflow requires the 7B text to video model that you can download above. + +![Example](text_to_video_cosmos_7B.webp) + +[Workflow in Json format](text_to_video_cosmos_7B.json) + +### Image to Video + +This model supports generating a video from 1 or more images. If more than one image is fed it will use them all as a guide and continue the motion. You can also do basic interpolation by setting one or more start_image and end_image which works best if those images are similar to each other. + +This workflow requires the 7B image to video model that you can download above. + +This model is trained primarily on realistic videos but in this example you can see that it also works decently on anime. + +![Example](image_to_video_cosmos_7B.webp) + +[Workflow in Json format](image_to_video_cosmos_7B.json) diff --git a/cosmos/image_to_video_cosmos_7B.json b/cosmos/image_to_video_cosmos_7B.json new file mode 100644 index 0000000..f04c2d0 --- /dev/null +++ b/cosmos/image_to_video_cosmos_7B.json @@ -0,0 +1,729 @@ +{ + "last_node_id": 84, + "last_link_id": 198, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -332.08502197265625, + 231.04571533203125 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 75, + 99 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "oldt5_xxl_fp8_e4m3fn_scaled.safetensors", + "cosmos", + "default" + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1270.6927490234375, + 120.51702117919922 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 87 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 172, + 181 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 61, + "type": "ModelSamplingContinuousEDM", + "pos": [ + 410, + -20 + ], + "size": [ + 327.5999755859375, + 106 + ], + "flags": {}, + "order": 9, + "mode": 4, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 157 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 194 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingContinuousEDM" + }, + "widgets_values": [ + "edm", + 80, + 0.002 + ] + }, + { + "id": 74, + "type": "LTXVConditioning", + "pos": [ + 540, + 150 + ], + "size": [ + 210, + 78 + ], + "flags": {}, + "order": 11, + "mode": 4, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 185 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 186 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 187 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 188 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 80, + "type": "Note", + "pos": [ + 475.15997314453125, + -163.2658233642578 + ], + "size": [ + 266.2419128417969, + 99.78375244140625 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "These pink nodes are \"bypassed\" meaning they don't do anything. To unbypass them: right click -> bypass\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 82, + "type": "Note", + "pos": [ + -1.2908354997634888, + 565.2498168945312 + ], + "size": [ + 312.01824951171875, + 126.14599609375 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "The positive and negative prompts should be long. Short prompts will still generate a coherent video however it might not follow the prompt very well." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 0, + 330 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 186 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality." + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 69, + "type": "SaveAnimatedWEBP", + "pos": [ + 1520, + 120 + ], + "size": [ + 763.5289916992188, + 578.3422241210938 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 172 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 80, + "default" + ] + }, + { + "id": 73, + "type": "SaveAnimatedPNG", + "pos": [ + 2299.842041015625, + 121.30572509765625 + ], + "size": [ + 720.1341552734375, + 829.0499877929688 + ], + "flags": {}, + "order": 15, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 181 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveAnimatedPNG" + }, + "widgets_values": [ + "ComfyUI", + 24, + 4 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 843.46337890625, + 122.69183349609375 + ], + "size": [ + 385.8114318847656, + 262 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 194 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 187 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 188 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 196 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 87 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 959521281192720, + "randomize", + 20, + 6.5, + "res_multistep", + "karras", + 1 + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 950.17138671875, + 453.1830749511719 + ], + "size": [ + 278.68310546875, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 76, + 195 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "cosmos_cv8x8x8_1.0.safetensors" + ] + }, + { + "id": 81, + "type": "Note", + "pos": [ + 480, + 730 + ], + "size": [ + 332.6131591796875, + 168.23121643066406 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model loves a length of 121 and anything too far away from this will result in a bad video.\n\nThe width and height should be equal or bigger to 704\n\nYou can set a start_image, end_image or both at the same time." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 7.319890975952148, + -20.895429611206055 + ], + "size": [ + 380, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 157 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "Cosmos-1_0-Diffusion-7B-Video2World.safetensors", + "default" + ] + }, + { + "id": 84, + "type": "LoadImage", + "pos": [ + -6.688927173614502, + 743.3736572265625 + ], + "size": [ + 416.1836242675781, + 366.83038330078125 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 198 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "ComfyUI_256804_.png", + "image" + ] + }, + { + "id": 42, + "type": "CLIPTextEncode", + "pos": [ + 0, + 120 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 99 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 185 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "cute anime girl with massive fennec fox ears and a big fluffy tail long blonde wavy hair blue eyes wearing a pink plaid sweater and a red scarf with a oversized black open coat with a golden circuit board pattern and a long blue maxi skirt and large black boots standing in the beautiful outdoors snow with amazing view mountains forest sky clouds beautiful sunset evening colorful horizon, she is smiling as the evening turns into night\n\n" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 83, + "type": "CosmosImageToVideoLatent", + "pos": [ + 480, + 490 + ], + "size": [ + 315, + 170 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 195 + }, + { + "name": "start_image", + "type": "IMAGE", + "link": 198, + "shape": 7 + }, + { + "name": "end_image", + "type": "IMAGE", + "link": null, + "shape": 7 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 196 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CosmosImageToVideoLatent" + }, + "widgets_values": [ + 1024, + 1024, + 121, + 1 + ] + } + ], + "links": [ + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 87, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 99, + 38, + 0, + 42, + 0, + "CLIP" + ], + [ + 157, + 37, + 0, + 61, + 0, + "MODEL" + ], + [ + 172, + 8, + 0, + 69, + 0, + "IMAGE" + ], + [ + 181, + 8, + 0, + 73, + 0, + "IMAGE" + ], + [ + 185, + 42, + 0, + 74, + 0, + "CONDITIONING" + ], + [ + 186, + 7, + 0, + 74, + 1, + "CONDITIONING" + ], + [ + 187, + 74, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 188, + 74, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 194, + 61, + 0, + 3, + 0, + "MODEL" + ], + [ + 195, + 39, + 0, + 83, + 0, + "VAE" + ], + [ + 196, + 83, + 0, + 3, + 3, + "LATENT" + ], + [ + 198, + 84, + 0, + 83, + 1, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} \ No newline at end of file diff --git a/cosmos/image_to_video_cosmos_7B.webp b/cosmos/image_to_video_cosmos_7B.webp new file mode 100644 index 0000000..875ba1b Binary files /dev/null and b/cosmos/image_to_video_cosmos_7B.webp differ diff --git a/cosmos/text_to_video_cosmos_7B.json b/cosmos/text_to_video_cosmos_7B.json new file mode 100644 index 0000000..48e435e --- /dev/null +++ b/cosmos/text_to_video_cosmos_7B.json @@ -0,0 +1,663 @@ +{ + "last_node_id": 82, + "last_link_id": 194, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -332.08502197265625, + 231.04571533203125 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 75, + 99 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "oldt5_xxl_fp8_e4m3fn_scaled.safetensors", + "cosmos", + "default" + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1270.6927490234375, + 120.51702117919922 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 87 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 172, + 181 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 61, + "type": "ModelSamplingContinuousEDM", + "pos": [ + 410, + -20 + ], + "size": [ + 327.5999755859375, + 106 + ], + "flags": {}, + "order": 9, + "mode": 4, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 157 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 194 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingContinuousEDM" + }, + "widgets_values": [ + "edm", + 80, + 0.002 + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 950.17138671875, + 453.1830749511719 + ], + "size": [ + 278.68310546875, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 76 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "cosmos_cv8x8x8_1.0.safetensors" + ] + }, + { + "id": 78, + "type": "EmptyCosmosLatentVideo", + "pos": [ + 473.05047607421875, + 380.00341796875 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 193 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyCosmosLatentVideo" + }, + "widgets_values": [ + 1280, + 704, + 121, + 1 + ] + }, + { + "id": 74, + "type": "LTXVConditioning", + "pos": [ + 540, + 150 + ], + "size": [ + 210, + 78 + ], + "flags": {}, + "order": 10, + "mode": 4, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 185 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 186 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 187 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 188 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 80, + "type": "Note", + "pos": [ + 475.15997314453125, + -163.2658233642578 + ], + "size": [ + 266.2419128417969, + 99.78375244140625 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "These pink nodes are \"bypassed\" meaning they don't do anything. To unbypass them: right click -> bypass\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 70, + -20 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 157 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "Cosmos-1_0-Diffusion-7B-Text2World.safetensors", + "default" + ] + }, + { + "id": 81, + "type": "Note", + "pos": [ + 475.4506530761719, + 570.9951782226562 + ], + "size": [ + 312.01824951171875, + 126.14599609375 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model loves a length of 121 and anything too far away from this will result in a bad video.\n\nThe width and height should be equal or bigger to 704" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 82, + "type": "Note", + "pos": [ + -1.2908354997634888, + 565.2498168945312 + ], + "size": [ + 312.01824951171875, + 126.14599609375 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "The positive and negative prompts should be long. Short prompts will still generate a coherent video however it might not follow the prompt very well." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 42, + "type": "CLIPTextEncode", + "pos": [ + 0, + 120 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 99 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 185 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A crystalline waterfall stands partially frozen, its edges draped with translucent ice that catches the sunlight in prisms of blue and silver. Below, a half-frozen pool spreads out, bordered by delicate ice formations. Through the fresh snow, a red fox moves gracefully, its russet coat vibrant against the white landscape, leaving perfect star-shaped prints behind as steam rises from its breath in the crisp winter air. The scene is wrapped in snow-muffled silence, broken only by the gentle murmur of water still flowing beneath the ice.\n\n" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 0, + 330 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 186 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality." + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 69, + "type": "SaveAnimatedWEBP", + "pos": [ + 1520, + 120 + ], + "size": [ + 763.5289916992188, + 578.3422241210938 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 172 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 80, + "default" + ] + }, + { + "id": 73, + "type": "SaveAnimatedPNG", + "pos": [ + 2299.842041015625, + 121.30572509765625 + ], + "size": [ + 720.1341552734375, + 829.0499877929688 + ], + "flags": {}, + "order": 14, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 181 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveAnimatedPNG" + }, + "widgets_values": [ + "ComfyUI", + 24, + 4 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 843.46337890625, + 122.69183349609375 + ], + "size": [ + 385.8114318847656, + 262 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 194 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 187 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 188 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 193 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 87 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 959521281192720, + "randomize", + 20, + 6.5, + "res_multistep", + "karras", + 1 + ] + } + ], + "links": [ + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 87, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 99, + 38, + 0, + 42, + 0, + "CLIP" + ], + [ + 157, + 37, + 0, + 61, + 0, + "MODEL" + ], + [ + 172, + 8, + 0, + 69, + 0, + "IMAGE" + ], + [ + 181, + 8, + 0, + 73, + 0, + "IMAGE" + ], + [ + 185, + 42, + 0, + 74, + 0, + "CONDITIONING" + ], + [ + 186, + 7, + 0, + 74, + 1, + "CONDITIONING" + ], + [ + 187, + 74, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 188, + 74, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 193, + 78, + 0, + 3, + 3, + "LATENT" + ], + [ + 194, + 61, + 0, + 3, + 0, + "MODEL" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.3513057093105383, + "offset": [ + 347.8132028514172, + 200.3286418889474 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/cosmos/text_to_video_cosmos_7B.webp b/cosmos/text_to_video_cosmos_7B.webp new file mode 100644 index 0000000..18530d1 Binary files /dev/null and b/cosmos/text_to_video_cosmos_7B.webp differ diff --git a/cosmos_predict2/README.md b/cosmos_predict2/README.md new file mode 100644 index 0000000..bf77457 --- /dev/null +++ b/cosmos_predict2/README.md @@ -0,0 +1,46 @@ +# Nvidia Cosmos Predict2 + +These are a family of text to image and image to video models from Nvidia. + +## Files to Download + +You will first need: + +#### Text encoder and VAE: + +[oldt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/comfyanonymous/cosmos_1.0_text_encoder_and_VAE_ComfyUI/tree/main/text_encoders) goes in: ComfyUI/models/text_encoders/ + +[wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) goes in: ComfyUI/models/vae/ + + +Note: oldt5_xxl is not the same as the t5xxl used in flux and other models. +oldt5_xxl is t5xxl 1.0 while the one used in flux and others is t5xxl 1.1 + + +You can find all the diffusion models (go in ComfyUI/models/diffusion_models/) here: [Repackaged safetensors files](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/tree/main) or [Official Nvidia Model Files](https://huggingface.co/collections/nvidia/cosmos-predict2-68028efc052239369a0f2959) + + +## Workflows + +### Text to Image + +This workflow uses the 2B text to image cosmos predict2 model. The file used in the workflow is [cosmos_predict2_2B_t2i.safetensors](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/blob/main/cosmos_predict2_2B_t2i.safetensors) this file goes in: ComfyUI/models/diffusion_models/ + +![Example](cosmos_predict2_2b_t2i_example.png) + +You can load this image in [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the full workflow. + +I think the 2B model is the most interesting one but you can find the bigger 14B model here: [cosmos_predict2_14B_t2i.safetensors](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/blob/main/cosmos_predict2_14B_t2i.safetensors) and use it in the workflow above. + + +### Image to Video + +These models are pretty picky about the resolution/length of the videos. This workflow is for the 480p models, for the 720p models you will have to set the resolution to 720p or your results might be bad. + +This workflow uses the 2B image to video cosmos predict2 model. The file used in the workflow is [cosmos_predict2_2B_video2world_480p_16fps.safetensors](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/blob/main/cosmos_predict2_2B_video2world_480p_16fps.safetensors) this file goes in: ComfyUI/models/diffusion_models/ + +![Example](cosmos_predict2_2b_i2v_example.webp) + +[Workflow in Json format](cosmos_predict2_2b_i2v_example.json) + + diff --git a/cosmos_predict2/cosmos_predict2_2b_i2v_example.json b/cosmos_predict2/cosmos_predict2_2b_i2v_example.json new file mode 100644 index 0000000..f7d8bdd --- /dev/null +++ b/cosmos_predict2/cosmos_predict2_2b_i2v_example.json @@ -0,0 +1,548 @@ +{ + "id": "242a6140-7341-49ca-876b-c01366b39b84", + "revision": 0, + "last_node_id": 31, + "last_link_id": 46, + "nodes": [ + { + "id": 10, + "type": "CLIPLoader", + "pos": [ + 0, + 250 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 34, + 35 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "oldt5_xxl_fp8_e4m3fn_scaled.safetensors", + "cosmos", + "default" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 870, + 180 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 33 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 4 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 42 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 7 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 788533681999051, + "randomize", + 30, + 4, + "euler", + "simple", + 1 + ] + }, + { + "id": 15, + "type": "VAELoader", + "pos": [ + 80, + 400 + ], + "size": [ + 300, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 17, + 43 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 28, + "type": "CosmosPredict2ImageToVideoLatent", + "pos": [ + 499.99761962890625, + 616.21435546875 + ], + "size": [ + 330.7769470214844, + 170 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 43 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 44 + }, + { + "name": "end_image", + "shape": 7, + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 42 + ] + } + ], + "properties": { + "Node name for S&R": "CosmosPredict2ImageToVideoLatent" + }, + "widgets_values": [ + 848, + 480, + 93, + 1 + ] + }, + { + "id": 29, + "type": "LoadImage", + "pos": [ + 85.3239517211914, + 633.9439697265625 + ], + "size": [ + 274.080078125, + 314 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 44 + ] + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "mountains.png", + "image" + ] + }, + { + "id": 13, + "type": "UNETLoader", + "pos": [ + 0, + 120 + ], + "size": [ + 410, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 33 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "cosmos_predict2_2B_video2world_480p_16fps.safetensors", + "default" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 390 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 34 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 6 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1200, + 180 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 7 + }, + { + "name": "vae", + "type": "VAE", + "link": 17 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 45, + 46 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 31, + "type": "SaveWEBM", + "pos": [ + 1890, + 190 + ], + "size": [ + 270, + 274.8302001953125 + ], + "flags": {}, + "order": 10, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 46 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 24 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 420, + 180 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 35 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 4 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "camera moving quickly through the scene timelapse wind" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 30, + "type": "SaveAnimatedWEBP", + "pos": [ + 1440, + 180 + ], + "size": [ + 270, + 366 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 45 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16.000000000000004, + false, + 85, + "default" + ] + } + ], + "links": [ + [ + 4, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 6, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 7, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 17, + 15, + 0, + 8, + 1, + "VAE" + ], + [ + 33, + 13, + 0, + 3, + 0, + "MODEL" + ], + [ + 34, + 10, + 0, + 7, + 0, + "CLIP" + ], + [ + 35, + 10, + 0, + 6, + 0, + "CLIP" + ], + [ + 42, + 28, + 0, + 3, + 3, + "LATENT" + ], + [ + 43, + 15, + 0, + 28, + 0, + "VAE" + ], + [ + 44, + 29, + 0, + 28, + 1, + "IMAGE" + ], + [ + 45, + 8, + 0, + 30, + 0, + "IMAGE" + ], + [ + 46, + 8, + 0, + 31, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.8390545288824265, + "offset": [ + 61.29088261146557, + 145.33443300197447 + ] + }, + "frontendVersion": "1.21.7" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/cosmos_predict2/cosmos_predict2_2b_i2v_example.webp b/cosmos_predict2/cosmos_predict2_2b_i2v_example.webp new file mode 100644 index 0000000..29d98f8 Binary files /dev/null and b/cosmos_predict2/cosmos_predict2_2b_i2v_example.webp differ diff --git a/cosmos_predict2/cosmos_predict2_2b_t2i_example.png b/cosmos_predict2/cosmos_predict2_2b_t2i_example.png new file mode 100644 index 0000000..50cd0a5 Binary files /dev/null and b/cosmos_predict2/cosmos_predict2_2b_t2i_example.png differ diff --git a/flux/README.md b/flux/README.md index 2f4a339..621e5cd 100644 --- a/flux/README.md +++ b/flux/README.md @@ -8,9 +8,9 @@ For the easy to use single file versions that you can easily use in [ComfyUI](ht ### Files to download for the regular version -If you don't have t5xxl_fp16.safetensors or clip_l.safetensors already in your ComfyUI/models/clip/ directory you can find them on: [this link.](https://huggingface.co/comfyanonymous/flux_text_encoders/tree/main) You can use t5xxl_fp8_e4m3fn.safetensors instead for lower memory usage but the fp16 one is recommended if you have more than 32GB ram. +If you don't have t5xxl_fp16.safetensors or clip_l.safetensors already in your ComfyUI/models/text_encoders/ directory you can find them on: [this link.](https://huggingface.co/comfyanonymous/flux_text_encoders/tree/main) You can use t5xxl_fp8_e4m3fn_scaled.safetensors instead for lower memory usage but the fp16 one is recommended if you have more than 32GB ram. -The VAE can be found [here](https://huggingface.co/black-forest-labs/FLUX.1-schnell/blob/main/ae.safetensors) and should go in your ComfyUI/models/vae/ folder. +The VAE can be found [here](https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/blob/main/split_files/vae/ae.safetensors) and should go in your ComfyUI/models/vae/ folder. ### Tips if you are running out of memory: @@ -30,7 +30,7 @@ You can then load or drag the following image in ComfyUI to get the workflow: Flux Schnell is a distilled 4 step model. -You can find the Flux Schnell diffusion model weights [here](https://huggingface.co/black-forest-labs/FLUX.1-schnell/blob/main/flux1-schnell.safetensors) this file should go in your: ComfyUI/models/unet/ folder. +You can find the Flux Schnell diffusion model weights [here](https://huggingface.co/black-forest-labs/FLUX.1-schnell) the flux1-schnell.safetensors file should go in your: ComfyUI/models/unet/ folder. You can then load or drag the following image in ComfyUI to get the workflow: @@ -64,6 +64,21 @@ You can then load or drag the following image in ComfyUI to get the workflow: The following examples might require that you have some of the regular flux files that you can find links to at the top of this page. +### Flux Kontext (image editing) model + +Download the [flux1-kontext-dev.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) model file and put it in your ComfyUI/models/diffusion_models/ folder. There is an alternative fp8 model here: [flux1-dev-kontext_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/flux1-kontext-dev_ComfyUI/blob/main/split_files/diffusion_models/flux1-dev-kontext_fp8_scaled.safetensors) if the other one is too large for you. + +Here's a simple example. You can load or drag the following image in ComfyUI to get the workflow: + +![Example](flux_kontext_example.png) + + +You can find the input image for the above workflow [here](../chroma/fennec_girl_sing.png) + +Here's another more complex example that generates a comic from the above input image: + +![Example](flux_kontext_example_comic.webp) + ### Fill (Inpainting) model Download the [flux1-fill-dev.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev) model file and put it in your ComfyUI/models/diffusion_models/ folder. diff --git a/flux/flux_kontext_example.png b/flux/flux_kontext_example.png new file mode 100644 index 0000000..3dea15b Binary files /dev/null and b/flux/flux_kontext_example.png differ diff --git a/flux/flux_kontext_example_comic.webp b/flux/flux_kontext_example_comic.webp new file mode 100644 index 0000000..6586fdd Binary files /dev/null and b/flux/flux_kontext_example_comic.webp differ diff --git a/hidream/README.md b/hidream/README.md new file mode 100644 index 0000000..cb1ffc6 --- /dev/null +++ b/hidream/README.md @@ -0,0 +1,56 @@ +# HiDream + +[HiDream I1](https://github.com/HiDream-ai/HiDream-I1) is a state of the art image diffusion model. + +## Files to Download + +Download the text encoder files: + +* [clip_l_hidream.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/text_encoders/clip_l_hidream.safetensors) +* [clip_g_hidream.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/text_encoders/clip_g_hidream.safetensors) +* [t5xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/text_encoders/t5xxl_fp8_e4m3fn_scaled.safetensors) +* [llama_3.1_8b_instruct_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/text_encoders/llama_3.1_8b_instruct_fp8_scaled.safetensors) + +Put these 4 files in your ComfyUI/models/text_encoders directory. + +You can find them all: [here](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/tree/main/split_files/text_encoders). You might already have t5xxl downloaded. + +The VAE can be found [here](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/vae/ae.safetensors) and should go in your ComfyUI/models/vae/ folder. This is the Flux VAE so you might already have it. + +The diffusion models can be found in this [folder](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/tree/main/split_files/diffusion_models) + +## HiDream dev Workflow + +Download [hidream_i1_dev_bf16.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/diffusion_models/hidream_i1_dev_bf16.safetensors) and put it in your ComfyUI/models/diffusion_models/ directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hidream_dev_example.png) + +## HiDream full Workflow + +Download [hidream_i1_full_fp16.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/diffusion_models/hidream_i1_full_fp16.safetensors) and put it in your ComfyUI/models/diffusion_models/ directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hidream_full_example.png) + +## HiDream e1.1 + +This is an edit model, download [hidream_e1_1_bf16.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/diffusion_models/hidream_e1_1_bf16.safetensors) and put it in your ComfyUI/models/diffusion_models/ directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hidream_e1.1_example.png) + + +
+Old hidream 1.0 edit model. +## HiDream e1 + +This is an experimental edit model, download [hidream_e1_full_bf16.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/diffusion_models/hidream_e1_full_bf16.safetensors) and put it in your ComfyUI/models/diffusion_models/ directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hidream_e1_example.png) +
diff --git a/hidream/hidream_dev_example.png b/hidream/hidream_dev_example.png new file mode 100644 index 0000000..db79354 Binary files /dev/null and b/hidream/hidream_dev_example.png differ diff --git a/hidream/hidream_e1.1_example.png b/hidream/hidream_e1.1_example.png new file mode 100644 index 0000000..ebc62c6 Binary files /dev/null and b/hidream/hidream_e1.1_example.png differ diff --git a/hidream/hidream_e1_example.png b/hidream/hidream_e1_example.png new file mode 100644 index 0000000..e595e22 Binary files /dev/null and b/hidream/hidream_e1_example.png differ diff --git a/hidream/hidream_full_example.png b/hidream/hidream_full_example.png new file mode 100644 index 0000000..224f4d3 Binary files /dev/null and b/hidream/hidream_full_example.png differ diff --git a/hunyuan_dit/README.md b/hunyuan_dit/README.md index 4aed81f..17589fe 100644 --- a/hunyuan_dit/README.md +++ b/hunyuan_dit/README.md @@ -4,9 +4,9 @@ Hunyuan DiT is a diffusion model that understands both english and chinese. ## Hunyuan DiT 1.2 -Download [hunyuan_dit_1.2.safetensors](https://huggingface.co/comfyanonymous/hunyuan_dit_comfyui/blob/main/hunyuan_dit_1.2.safetensors) and put it in your ComfyUI/checkpoints directory. +Download [hunyuan_dit_1.2.safetensors](https://huggingface.co/comfyanonymous/hunyuan_dit_comfyui/blob/main/hunyuan_dit_1.2.safetensors) and put it in your ComfyUI/models/checkpoints directory. -You can then load up the following image in ComfyUI to get the workflow: +You can then load up or drag the following image in ComfyUI to get the workflow: ![Example](hunyuan_dit_1.2_example.png) diff --git a/hunyuan_image/README.md b/hunyuan_image/README.md new file mode 100644 index 0000000..3b51cfe --- /dev/null +++ b/hunyuan_image/README.md @@ -0,0 +1,26 @@ +# Hunyuan Image 2.1 + +[Hunyuan Image 2.1](https://huggingface.co/tencent/HunyuanImage-2.1) is a powerful diffusion model for image generation. + +## Basic Workflow + +Download the following models and place them in the appropriate ComfyUI directories: + +### Text Encoders +Download and put in your ComfyUI/models/text_encoders directory: +- [byt5_small_glyphxl_fp16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/text_encoders/byt5_small_glyphxl_fp16.safetensors) +- [qwen_2.5_vl_7b.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/text_encoders/qwen_2.5_vl_7b.safetensors) + +### VAE Models +Download and put in your ComfyUI/models/vae directory: +- [hunyuan_image_2.1_vae_fp16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/vae/hunyuan_image_2.1_vae_fp16.safetensors) +- **Optional (for refiner):** [hunyuan_image_refiner_vae_fp16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/vae/hunyuan_image_refiner_vae_fp16.safetensors) + +### Diffusion Models +Download and put in your ComfyUI/models/diffusion_models directory: +- [hunyuanimage2.1_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/diffusion_models/hunyuanimage2.1_bf16.safetensors) +- **Optional (for refiner):** [hunyuanimage2.1_refiner_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/diffusion_models/hunyuanimage2.1_refiner_bf16.safetensors) + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hunyuan_image_example.png) diff --git a/hunyuan_image/hunyuan_image_example.png b/hunyuan_image/hunyuan_image_example.png new file mode 100644 index 0000000..3dcfbbc Binary files /dev/null and b/hunyuan_image/hunyuan_image_example.png differ diff --git a/hunyuan_video/README.md b/hunyuan_video/README.md index 05c8ebb..0f936c1 100644 --- a/hunyuan_video/README.md +++ b/hunyuan_video/README.md @@ -3,14 +3,14 @@ [Hunyuan Video](https://huggingface.co/tencent/HunyuanVideo) is a text to video model. -Download the [hunyuan_video_t2v_720p_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/diffusion_models) file and put it in your ComfyUI/models/diffusion_models folder. - Download the clip_l.safetensors and llava_llama3_fp8_scaled.safetensors files from [here](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/text_encoders) and put them in your ComfyUI/models/text_encoders directory. Download the [hunyuan_video_vae_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/vae) file and put it in your ComfyUI/models/vae folder. ### Text to Video +Download the [hunyuan_video_t2v_720p_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/diffusion_models) file and put it in your ComfyUI/models/diffusion_models folder. + This model can also generate still images by setting the video length to 1. ![Example](hunyuan_video_text_to_video.webp) @@ -18,3 +18,33 @@ This model can also generate still images by setting the video length to 1. [Workflow in Json format](hunyuan_video_text_to_video.json) You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. + +### Image to Video + +Download the [llava_llama3_vision.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/blob/main/split_files/clip_vision/llava_llama3_vision.safetensors) file and put it in your ComfyUI/models/clip_vision/ folder. + +There are two different models you can choose from which give different results. + +#### v1 "concat" + +This first model follows the guiding image less than the other one but might give better movement. + +Download the [hunyuan_video_image_to_video_720p_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/diffusion_models) file and put it in your ComfyUI/models/diffusion_models/ folder. + +![Example](hunyuan_video_image_to_video.webp) + +[Workflow in Json format](hunyuan_video_image_to_video.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. The input image can be found on the [flux](../flux) page. + +#### v2 "replace" + +This second model follows the guiding image very closely but seems to be a bit less dynamic than the first one. + +Download the [hunyuan_video_v2_replace_image_to_video_720p_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/diffusion_models) file and put it in your ComfyUI/models/diffusion_models/ folder. + +![Example](hunyuan_video_image_to_video_v2.webp) + +[Workflow in Json format](hunyuan_video_image_to_video_v2.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. The input image can be found on the [flux](../flux) page. diff --git a/hunyuan_video/hunyuan_video_image_to_video.json b/hunyuan_video/hunyuan_video_image_to_video.json new file mode 100644 index 0000000..0a8508c --- /dev/null +++ b/hunyuan_video/hunyuan_video_image_to_video.json @@ -0,0 +1,1142 @@ +{ + "last_node_id": 89, + "last_link_id": 230, + "nodes": [ + { + "id": 22, + "type": "BasicGuider", + "pos": [ + 600, + 0 + ], + "size": [ + 222.3482666015625, + 46 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 195, + "slot_index": 0 + }, + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 129, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "GUIDER", + "type": "GUIDER", + "shape": 3, + "links": [ + 30 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BasicGuider" + }, + "widgets_values": [] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1150, + 90 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 19, + "mode": 2, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 181 + }, + { + "name": "vae", + "type": "VAE", + "link": 206 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 13, + "type": "SamplerCustomAdvanced", + "pos": [ + 860, + 200 + ], + "size": [ + 272.3617858886719, + 124.53733825683594 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "name": "noise", + "type": "NOISE", + "link": 37, + "slot_index": 0 + }, + { + "name": "guider", + "type": "GUIDER", + "link": 30, + "slot_index": 1 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 19, + "slot_index": 2 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 20, + "slot_index": 3 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 216, + "slot_index": 4 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "shape": 3, + "links": [ + 181, + 210 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "shape": 3, + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustomAdvanced" + }, + "widgets_values": [] + }, + { + "id": 74, + "type": "Note", + "pos": [ + 1147.7459716796875, + 405.0789489746094 + ], + "size": [ + 210, + 170 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Use the tiled decode node by default because most people will need it.\n\nLower the tile_size and overlap if you run out of memory." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 26, + "type": "FluxGuidance", + "pos": [ + 514.2149047851562, + 86.77685546875 + ], + "size": [ + 317.4000244140625, + 58 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 225 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "shape": 3, + "links": [ + 129 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "FluxGuidance" + }, + "widgets_values": [ + 6 + ], + "color": "#233", + "bgcolor": "#355" + }, + { + "id": 67, + "type": "ModelSamplingSD3", + "pos": [ + 360, + 0 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 209 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 195 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 7 + ] + }, + { + "id": 17, + "type": "BasicScheduler", + "pos": [ + 510, + 660 + ], + "size": [ + 315, + 106 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 190, + "slot_index": 0 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "shape": 3, + "links": [ + 20 + ] + } + ], + "properties": { + "Node name for S&R": "BasicScheduler" + }, + "widgets_values": [ + "simple", + 20, + 1 + ] + }, + { + "id": 16, + "type": "KSamplerSelect", + "pos": [ + 520, + 550 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "shape": 3, + "links": [ + 19 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 78, + "type": "HunyuanImageToVideo", + "pos": [ + 510, + 820 + ], + "size": [ + 315, + 170 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 218 + }, + { + "name": "vae", + "type": "VAE", + "link": 223 + }, + { + "name": "start_image", + "type": "IMAGE", + "shape": 7, + "link": 222 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 225 + ], + "slot_index": 0 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 216 + ] + } + ], + "properties": { + "Node name for S&R": "HunyuanImageToVideo" + }, + "widgets_values": [ + 720, + 720, + 57, + 1 + ] + }, + { + "id": 73, + "type": "VAEDecodeTiled", + "pos": [ + 1150, + 200 + ], + "size": [ + 210, + 150 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 210 + }, + { + "name": "vae", + "type": "VAE", + "link": 211 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 230 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecodeTiled" + }, + "widgets_values": [ + 256, + 64, + 64, + 8 + ] + }, + { + "id": 75, + "type": "SaveAnimatedWEBP", + "pos": [ + 1640, + 200 + ], + "size": [ + 621.495361328125, + 587.12451171875 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 228 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 90, + "default" + ] + }, + { + "id": 84, + "type": "SaveWEBM", + "pos": [ + 2280, + 200 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 23, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 229 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 12 + ] + }, + { + "id": 87, + "type": "Note", + "pos": [ + 1410, + 340 + ], + "size": [ + 210, + 170 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "The official code removes the first 4 frames because they are sometimes bad. You can bypass (CTRL-B) this node if you don't want this." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 80, + "type": "TextEncodeHunyuanVideo_ImageToVideo", + "pos": [ + 390, + 180 + ], + "size": [ + 441, + 200 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 224 + }, + { + "name": "clip_vision_output", + "type": "CLIP_VISION_OUTPUT", + "link": 219 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 218 + ] + } + ], + "title": "Text Encode Hunyuan Video (ImageToVideo)", + "properties": { + "Node name for S&R": "TextEncodeHunyuanVideo_ImageToVideo" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail wearing a maid outfit running" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 81, + "type": "CLIPVisionEncode", + "pos": [ + 200, + 530 + ], + "size": [ + 253.60000610351562, + 78 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 220 + }, + { + "name": "image", + "type": "IMAGE", + "link": 221 + } + ], + "outputs": [ + { + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "links": [ + 219 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 89, + "type": "Note", + "pos": [ + 190, + 660 + ], + "size": [ + 260, + 210 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Select your image here. The image is fed to both the text encoder and directly to the model.\n\nYou can set the resolution and length of the video using the HunyuanImageToVideo node." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 83, + "type": "LoadImage", + "pos": [ + -190, + 700 + ], + "size": [ + 365.4132080078125, + 471.8512268066406 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 221, + 222 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "flux_dev_example.png", + "image" + ] + }, + { + "id": 82, + "type": "CLIPVisionLoader", + "pos": [ + -190, + 580 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 220 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader" + }, + "widgets_values": [ + "llava_llama3_vision.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 10, + "type": "VAELoader", + "pos": [ + -190, + 470 + ], + "size": [ + 350, + 60 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "shape": 3, + "links": [ + 206, + 211, + 223 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "hunyuan_video_vae_bf16.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 11, + "type": "DualCLIPLoader", + "pos": [ + -190, + 290 + ], + "size": [ + 350, + 122 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "shape": 3, + "links": [ + 224 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "DualCLIPLoader" + }, + "widgets_values": [ + "clip_l.safetensors", + "llava_llama3_fp8_scaled.safetensors", + "hunyuan_video", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 12, + "type": "UNETLoader", + "pos": [ + -190, + 160 + ], + "size": [ + 404.6181640625, + 82 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "shape": 3, + "links": [ + 190, + 209 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "hunyuan_video_image_to_video_720p_bf16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 77, + "type": "Note", + "pos": [ + -140, + 0 + ], + "size": [ + 350, + 110 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Select a fp8 weight_dtype if you are running out of memory." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 86, + "type": "ImageFromBatch", + "pos": [ + 1410, + 200 + ], + "size": [ + 210, + 82 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 230 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 228, + 229 + ] + } + ], + "properties": { + "Node name for S&R": "ImageFromBatch" + }, + "widgets_values": [ + 4, + 4096 + ] + }, + { + "id": 25, + "type": "RandomNoise", + "pos": [ + 520, + 420 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "NOISE", + "type": "NOISE", + "shape": 3, + "links": [ + 37 + ] + } + ], + "properties": { + "Node name for S&R": "RandomNoise" + }, + "widgets_values": [ + 187330947843879, + "randomize" + ], + "color": "#2a363b", + "bgcolor": "#3f5159" + } + ], + "links": [ + [ + 19, + 16, + 0, + 13, + 2, + "SAMPLER" + ], + [ + 20, + 17, + 0, + 13, + 3, + "SIGMAS" + ], + [ + 30, + 22, + 0, + 13, + 1, + "GUIDER" + ], + [ + 37, + 25, + 0, + 13, + 0, + "NOISE" + ], + [ + 129, + 26, + 0, + 22, + 1, + "CONDITIONING" + ], + [ + 181, + 13, + 0, + 8, + 0, + "LATENT" + ], + [ + 190, + 12, + 0, + 17, + 0, + "MODEL" + ], + [ + 195, + 67, + 0, + 22, + 0, + "MODEL" + ], + [ + 206, + 10, + 0, + 8, + 1, + "VAE" + ], + [ + 209, + 12, + 0, + 67, + 0, + "MODEL" + ], + [ + 210, + 13, + 0, + 73, + 0, + "LATENT" + ], + [ + 211, + 10, + 0, + 73, + 1, + "VAE" + ], + [ + 216, + 78, + 1, + 13, + 4, + "LATENT" + ], + [ + 218, + 80, + 0, + 78, + 0, + "CONDITIONING" + ], + [ + 219, + 81, + 0, + 80, + 1, + "CLIP_VISION_OUTPUT" + ], + [ + 220, + 82, + 0, + 81, + 0, + "CLIP_VISION" + ], + [ + 221, + 83, + 0, + 81, + 1, + "IMAGE" + ], + [ + 222, + 83, + 0, + 78, + 2, + "IMAGE" + ], + [ + 223, + 10, + 0, + 78, + 1, + "VAE" + ], + [ + 224, + 11, + 0, + 80, + 0, + "CLIP" + ], + [ + 225, + 78, + 0, + 26, + 0, + "CONDITIONING" + ], + [ + 228, + 86, + 0, + 75, + 0, + "IMAGE" + ], + [ + 229, + 86, + 0, + 84, + 0, + "IMAGE" + ], + [ + 230, + 73, + 0, + 86, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "groupNodes": {} + }, + "models": [ + { + "name": "llava_llama3_vision.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/resolve/main/split_files/clip_vision/llava_llama3_vision.safetensors?download=true", + "directory": "clip_vision" + }, + { + "name": "clip_l.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/resolve/main/split_files/text_encoders/clip_l.safetensors?download=true", + "directory": "text_encoders" + }, + { + "name": "llava_llama3_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/resolve/main/split_files/text_encoders/llava_llama3_fp8_scaled.safetensors?download=true", + "directory": "text_encoders" + }, + { + "name": "hunyuan_video_vae_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/resolve/main/split_files/vae/hunyuan_video_vae_bf16.safetensors?download=true", + "directory": "vae" + }, + { + "name": "hunyuan_video_image_to_video_720p_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/resolve/main/split_files/diffusion_models/hunyuan_video_image_to_video_720p_bf16.safetensors?download=true", + "directory": "diffusion_models" + } + ], + "version": 0.4 +} diff --git a/hunyuan_video/hunyuan_video_image_to_video.webp b/hunyuan_video/hunyuan_video_image_to_video.webp new file mode 100644 index 0000000..772ddfd Binary files /dev/null and b/hunyuan_video/hunyuan_video_image_to_video.webp differ diff --git a/hunyuan_video/hunyuan_video_image_to_video_v2.json b/hunyuan_video/hunyuan_video_image_to_video_v2.json new file mode 100644 index 0000000..2900399 --- /dev/null +++ b/hunyuan_video/hunyuan_video_image_to_video_v2.json @@ -0,0 +1,1048 @@ +{ + "last_node_id": 90, + "last_link_id": 233, + "nodes": [ + { + "id": 22, + "type": "BasicGuider", + "pos": [ + 600, + 0 + ], + "size": [ + 222.3482666015625, + 46 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 195, + "slot_index": 0 + }, + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 129, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "GUIDER", + "type": "GUIDER", + "shape": 3, + "links": [ + 30 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BasicGuider" + }, + "widgets_values": [] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1150, + 90 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 18, + "mode": 2, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 181 + }, + { + "name": "vae", + "type": "VAE", + "link": 206 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 13, + "type": "SamplerCustomAdvanced", + "pos": [ + 860, + 200 + ], + "size": [ + 272.3617858886719, + 124.53733825683594 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "name": "noise", + "type": "NOISE", + "link": 37, + "slot_index": 0 + }, + { + "name": "guider", + "type": "GUIDER", + "link": 30, + "slot_index": 1 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 19, + "slot_index": 2 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 20, + "slot_index": 3 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 216, + "slot_index": 4 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "shape": 3, + "links": [ + 181, + 210 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "shape": 3, + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustomAdvanced" + }, + "widgets_values": [] + }, + { + "id": 74, + "type": "Note", + "pos": [ + 1147.7459716796875, + 405.0789489746094 + ], + "size": [ + 210, + 170 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Use the tiled decode node by default because most people will need it.\n\nLower the tile_size and overlap if you run out of memory." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 26, + "type": "FluxGuidance", + "pos": [ + 514.2149047851562, + 86.77685546875 + ], + "size": [ + 317.4000244140625, + 58 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 225 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "shape": 3, + "links": [ + 129 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "FluxGuidance" + }, + "widgets_values": [ + 6 + ], + "color": "#233", + "bgcolor": "#355" + }, + { + "id": 67, + "type": "ModelSamplingSD3", + "pos": [ + 360, + 0 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 209 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 195 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 7 + ] + }, + { + "id": 17, + "type": "BasicScheduler", + "pos": [ + 510, + 660 + ], + "size": [ + 315, + 106 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 190, + "slot_index": 0 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "shape": 3, + "links": [ + 20 + ] + } + ], + "properties": { + "Node name for S&R": "BasicScheduler" + }, + "widgets_values": [ + "simple", + 20, + 1 + ] + }, + { + "id": 16, + "type": "KSamplerSelect", + "pos": [ + 520, + 550 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "shape": 3, + "links": [ + 19 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 81, + "type": "CLIPVisionEncode", + "pos": [ + 200, + 530 + ], + "size": [ + 253.60000610351562, + 78 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 220 + }, + { + "name": "image", + "type": "IMAGE", + "link": 221 + } + ], + "outputs": [ + { + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "links": [ + 219 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 89, + "type": "Note", + "pos": [ + 190, + 660 + ], + "size": [ + 260, + 210 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Select your image here. The image is fed to both the text encoder and directly to the model.\n\nYou can set the resolution and length of the video using the HunyuanImageToVideo node." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 83, + "type": "LoadImage", + "pos": [ + -190, + 700 + ], + "size": [ + 365.4132080078125, + 471.8512268066406 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 221, + 222 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "flux_dev_example.png", + "image" + ] + }, + { + "id": 82, + "type": "CLIPVisionLoader", + "pos": [ + -190, + 580 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 220 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader" + }, + "widgets_values": [ + "llava_llama3_vision.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 10, + "type": "VAELoader", + "pos": [ + -190, + 470 + ], + "size": [ + 350, + 60 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "shape": 3, + "links": [ + 206, + 211, + 223 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "hunyuan_video_vae_bf16.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 11, + "type": "DualCLIPLoader", + "pos": [ + -190, + 290 + ], + "size": [ + 350, + 122 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "shape": 3, + "links": [ + 224 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "DualCLIPLoader" + }, + "widgets_values": [ + "clip_l.safetensors", + "llava_llama3_fp8_scaled.safetensors", + "hunyuan_video", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 77, + "type": "Note", + "pos": [ + -140, + 0 + ], + "size": [ + 350, + 110 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Select a fp8 weight_dtype if you are running out of memory." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 78, + "type": "HunyuanImageToVideo", + "pos": [ + 510, + 820 + ], + "size": [ + 315, + 194 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 218 + }, + { + "name": "vae", + "type": "VAE", + "link": 223 + }, + { + "name": "start_image", + "type": "IMAGE", + "shape": 7, + "link": 222 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 225 + ], + "slot_index": 0 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 216 + ] + } + ], + "properties": { + "Node name for S&R": "HunyuanImageToVideo" + }, + "widgets_values": [ + 720, + 720, + 57, + 1, + "v2 (replace)" + ] + }, + { + "id": 73, + "type": "VAEDecodeTiled", + "pos": [ + 1150, + 200 + ], + "size": [ + 210, + 150 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 210 + }, + { + "name": "vae", + "type": "VAE", + "link": 211 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 232, + 233 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecodeTiled" + }, + "widgets_values": [ + 256, + 64, + 64, + 8 + ] + }, + { + "id": 75, + "type": "SaveAnimatedWEBP", + "pos": [ + 1390, + 200 + ], + "size": [ + 621.495361328125, + 587.12451171875 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 232 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 90, + "default" + ] + }, + { + "id": 84, + "type": "SaveWEBM", + "pos": [ + 2030, + 200 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 21, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 233 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 12 + ] + }, + { + "id": 80, + "type": "TextEncodeHunyuanVideo_ImageToVideo", + "pos": [ + 390, + 180 + ], + "size": [ + 441, + 200 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 224 + }, + { + "name": "clip_vision_output", + "type": "CLIP_VISION_OUTPUT", + "link": 219 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 218 + ] + } + ], + "title": "Text Encode Hunyuan Video (ImageToVideo)", + "properties": { + "Node name for S&R": "TextEncodeHunyuanVideo_ImageToVideo" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail wearing a maid outfit walking forward", + 4 + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 25, + "type": "RandomNoise", + "pos": [ + 520, + 420 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "NOISE", + "type": "NOISE", + "shape": 3, + "links": [ + 37 + ] + } + ], + "properties": { + "Node name for S&R": "RandomNoise" + }, + "widgets_values": [ + 72275780843998, + "randomize" + ], + "color": "#2a363b", + "bgcolor": "#3f5159" + }, + { + "id": 12, + "type": "UNETLoader", + "pos": [ + -190, + 160 + ], + "size": [ + 404.6181640625, + 82 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "shape": 3, + "links": [ + 190, + 209 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "hunyuan_video_v2_replace_image_to_video_720p_bf16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + } + ], + "links": [ + [ + 19, + 16, + 0, + 13, + 2, + "SAMPLER" + ], + [ + 20, + 17, + 0, + 13, + 3, + "SIGMAS" + ], + [ + 30, + 22, + 0, + 13, + 1, + "GUIDER" + ], + [ + 37, + 25, + 0, + 13, + 0, + "NOISE" + ], + [ + 129, + 26, + 0, + 22, + 1, + "CONDITIONING" + ], + [ + 181, + 13, + 0, + 8, + 0, + "LATENT" + ], + [ + 190, + 12, + 0, + 17, + 0, + "MODEL" + ], + [ + 195, + 67, + 0, + 22, + 0, + "MODEL" + ], + [ + 206, + 10, + 0, + 8, + 1, + "VAE" + ], + [ + 209, + 12, + 0, + 67, + 0, + "MODEL" + ], + [ + 210, + 13, + 0, + 73, + 0, + "LATENT" + ], + [ + 211, + 10, + 0, + 73, + 1, + "VAE" + ], + [ + 216, + 78, + 1, + 13, + 4, + "LATENT" + ], + [ + 218, + 80, + 0, + 78, + 0, + "CONDITIONING" + ], + [ + 219, + 81, + 0, + 80, + 1, + "CLIP_VISION_OUTPUT" + ], + [ + 220, + 82, + 0, + 81, + 0, + "CLIP_VISION" + ], + [ + 221, + 83, + 0, + 81, + 1, + "IMAGE" + ], + [ + 222, + 83, + 0, + 78, + 2, + "IMAGE" + ], + [ + 223, + 10, + 0, + 78, + 1, + "VAE" + ], + [ + 224, + 11, + 0, + 80, + 0, + "CLIP" + ], + [ + 225, + 78, + 0, + 26, + 0, + "CONDITIONING" + ], + [ + 232, + 73, + 0, + 75, + 0, + "IMAGE" + ], + [ + 233, + 73, + 0, + 84, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "groupNodes": {} + }, + "version": 0.4 +} \ No newline at end of file diff --git a/hunyuan_video/hunyuan_video_image_to_video_v2.webp b/hunyuan_video/hunyuan_video_image_to_video_v2.webp new file mode 100644 index 0000000..fc3dda7 Binary files /dev/null and b/hunyuan_video/hunyuan_video_image_to_video_v2.webp differ diff --git a/ltxv/README.md b/ltxv/README.md index ea14a16..e97ab27 100644 --- a/ltxv/README.md +++ b/ltxv/README.md @@ -4,28 +4,39 @@ The important thing with this model is to give it long descriptive prompts. -Download the [ltx-video-2b-v0.9.safetensors](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.safetensors) file and put it in your ComfyUI/models/checkpoints folder. +Download the [ltx-video-2b-v0.9.5.safetensors](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.5.safetensors) file and put it in your ComfyUI/models/checkpoints folder. If you don't have it already downloaded you can download the [t5xxl_fp16.safetensors](https://huggingface.co/Comfy-Org/mochi_preview_repackaged/blob/main/split_files/text_encoders/t5xxl_fp16.safetensors) file and put it in your ComfyUI/models/text_encoders/ folder. -### Text to Video +### Image to Video -![Example](ltxv_text_to_video.webp) +Input image: + -[Workflow in Json format](ltxv_text_to_video.json) +#### Simple img2vid workflow with start image only: -You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. +![Example](ltxv_image_to_video_simple.0.9.5.webp) -### Image to Video +[Workflow in Json format](ltxv_image_to_video_simple.0.9.5.json) -[Input image](https://commons.wikimedia.org/wiki/File:Havelock_Island,_Mangrove_tree_on_the_beach,_Andaman_Islands.jpg): - -Workflow: +#### More complex img2vid workflow with multiple guiding images: -![Example](ltxv_image_to_video.webp) +![Example](ltxv_image_to_video.0.9.5.webp) -[Workflow in Json format](ltxv_image_to_video.json) +[Workflow in Json format](ltxv_image_to_video.0.9.5.json) You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. +### Text to Video + +![Example](ltxv_text_to_video_0.9.5.webp) + +[Workflow in Json format](ltxv_text_to_video_0.9.5.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. + + +[Old ltxv examples](README_old.md) + + diff --git a/ltxv/README_old.md b/ltxv/README_old.md new file mode 100644 index 0000000..ea1b077 --- /dev/null +++ b/ltxv/README_old.md @@ -0,0 +1,31 @@ +# Lightricks LTX-Video Model + +[LTX-Video](https://huggingface.co/Lightricks/LTX-Video) is a very efficient video model by lightricks. + +The important thing with this model is to give it long descriptive prompts. + +Download the [ltx-video-2b-v0.9.1.safetensors](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.1.safetensors) or old [ltx-video-2b-v0.9.safetensors](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.safetensors) file and put it in your ComfyUI/models/checkpoints folder. + +If you don't have it already downloaded you can download the [t5xxl_fp16.safetensors](https://huggingface.co/Comfy-Org/mochi_preview_repackaged/blob/main/split_files/text_encoders/t5xxl_fp16.safetensors) file and put it in your ComfyUI/models/text_encoders/ folder. + +### Text to Video + +![Example](ltxv_text_to_video.webp) + +[Workflow in Json format](ltxv_text_to_video.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. + +### Image to Video + +[Input image](https://commons.wikimedia.org/wiki/File:Havelock_Island,_Mangrove_tree_on_the_beach,_Andaman_Islands.jpg): + + +Workflow: + +![Example](ltxv_image_to_video.webp) + +[Workflow in Json format](ltxv_image_to_video.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. + diff --git a/ltxv/fox.jpg b/ltxv/fox.jpg new file mode 100644 index 0000000..bb91607 Binary files /dev/null and b/ltxv/fox.jpg differ diff --git a/ltxv/ltxv_image_to_video.0.9.5.json b/ltxv/ltxv_image_to_video.0.9.5.json new file mode 100644 index 0000000..91fb7f5 --- /dev/null +++ b/ltxv/ltxv_image_to_video.0.9.5.json @@ -0,0 +1,1158 @@ +{ + "last_node_id": 94, + "last_link_id": 234, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 60, + 190 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "t5xxl_fp16.safetensors", + "ltxv", + "default" + ] + }, + { + "id": 76, + "type": "Note", + "pos": [ + 40, + 350 + ], + "size": [ + 360, + 200 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model needs long descriptive prompts, if the prompt is too short the quality will suffer greatly." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 390 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 195 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 44, + "type": "CheckpointLoaderSimple", + "pos": [ + 520, + 30 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 181 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": null + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 87, + 196, + 207 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "ltx-video-2b-v0.9.5.safetensors" + ] + }, + { + "id": 71, + "type": "LTXVScheduler", + "pos": [ + 880, + 290 + ], + "size": [ + 315, + 154 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "latent", + "type": "LATENT", + "shape": 7, + "link": 228 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 182 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVScheduler" + }, + "widgets_values": [ + 30, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 79, + "type": "LTXVAddGuide", + "pos": [ + 900, + 670 + ], + "size": [ + 315, + 162 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 194 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 195 + }, + { + "name": "vae", + "type": "VAE", + "link": 196 + }, + { + "name": "latent", + "type": "LATENT", + "link": 193 + }, + { + "name": "image", + "type": "IMAGE", + "link": 203 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 210 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 211 + ], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 212, + 228 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LTXVAddGuide" + }, + "widgets_values": [ + 0, + 1 + ] + }, + { + "id": 81, + "type": "EmptyLTXVLatentVideo", + "pos": [ + 560, + 790 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 193 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyLTXVLatentVideo" + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 92, + "type": "LTXVPreprocess", + "pos": [ + 560, + 1070 + ], + "size": [ + 275.9266662597656, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 229 + } + ], + "outputs": [ + { + "name": "output_image", + "type": "IMAGE", + "links": [ + 230 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVPreprocess" + }, + "widgets_values": [ + 35 + ] + }, + { + "id": 73, + "type": "KSamplerSelect", + "pos": [ + 880, + 190 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 172 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 85, + "type": "LoadImage", + "pos": [ + 150, + 1070 + ], + "size": [ + 385.15606689453125, + 333.3305358886719 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 229 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "sunset.png", + "image" + ] + }, + { + "id": 84, + "type": "LTXVAddGuide", + "pos": [ + 1240, + 670 + ], + "size": [ + 315, + 162 + ], + "flags": {}, + "order": 13, + "mode": 4, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 210 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 211 + }, + { + "name": "vae", + "type": "VAE", + "link": 207 + }, + { + "name": "latent", + "type": "LATENT", + "link": 212 + }, + { + "name": "image", + "type": "IMAGE", + "link": 230 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 213 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 214 + ], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 215 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LTXVAddGuide" + }, + "widgets_values": [ + -1, + 1 + ] + }, + { + "id": 78, + "type": "LoadImage", + "pos": [ + 150, + 670 + ], + "size": [ + 385.15606689453125, + 333.3305358886719 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 226 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fox.jpg", + "image" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 420, + 180 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 194 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A red fox moving gracefully, its russet coat vibrant against the white landscape, leaving perfect star-shaped prints behind as steam rises from its breath in the crisp winter air. The scene is wrapped in snow-muffled silence, broken only by the gentle murmur of water still flowing beneath the ice." + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 82, + "type": "LTXVPreprocess", + "pos": [ + 570, + 670 + ], + "size": [ + 275.9266662597656, + 58 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 226 + } + ], + "outputs": [ + { + "name": "output_image", + "type": "IMAGE", + "links": [ + 203 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVPreprocess" + }, + "widgets_values": [ + 40 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1740, + 30 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 232 + }, + { + "name": "vae", + "type": "VAE", + "link": 87 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106, + 217 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 86, + "type": "SaveWEBM", + "pos": [ + 2480, + 30 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 20, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 217 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 12 + ] + }, + { + "id": 93, + "type": "Note", + "pos": [ + 1239.5482177734375, + 880.7611083984375 + ], + "size": [ + 310, + 150 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "These nodes can be chained together to set multiple guiding images.\n\nIn this case -1 means the last frame so you can unbypass this node (CTRL-B) if you want to predict from the last frame instead of the first." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 41, + "type": "SaveAnimatedWEBP", + "pos": [ + 1970, + 30 + ], + "size": [ + 493.98468017578125, + 481.28692626953125 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 90, + "default" + ] + }, + { + "id": 72, + "type": "SamplerCustom", + "pos": [ + 1201, + 32 + ], + "size": [ + 355.20001220703125, + 230 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 181 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 199 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 167 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 172 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 182 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 215 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "links": [ + 231 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustom" + }, + "widgets_values": [ + true, + 397166166231987, + "randomize", + 3 + ] + }, + { + "id": 94, + "type": "LTXVCropGuides", + "pos": [ + 1552.4571533203125, + -99.19783020019531 + ], + "size": [ + 216.59999084472656, + 66 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 233 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 234 + }, + { + "name": "latent", + "type": "LATENT", + "link": 231 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": null + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": null + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 232 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LTXVCropGuides" + }, + "widgets_values": [] + }, + { + "id": 69, + "type": "LTXVConditioning", + "pos": [ + 920, + 60 + ], + "size": [ + 223.8660125732422, + 78 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 213 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 214 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 199, + 233 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 167, + 234 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 25 + ] + } + ], + "links": [ + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 87, + 44, + 2, + 8, + 1, + "VAE" + ], + [ + 106, + 8, + 0, + 41, + 0, + "IMAGE" + ], + [ + 167, + 69, + 1, + 72, + 2, + "CONDITIONING" + ], + [ + 172, + 73, + 0, + 72, + 3, + "SAMPLER" + ], + [ + 181, + 44, + 0, + 72, + 0, + "MODEL" + ], + [ + 182, + 71, + 0, + 72, + 4, + "SIGMAS" + ], + [ + 193, + 81, + 0, + 79, + 3, + "LATENT" + ], + [ + 194, + 6, + 0, + 79, + 0, + "CONDITIONING" + ], + [ + 195, + 7, + 0, + 79, + 1, + "CONDITIONING" + ], + [ + 196, + 44, + 2, + 79, + 2, + "VAE" + ], + [ + 199, + 69, + 0, + 72, + 1, + "CONDITIONING" + ], + [ + 203, + 82, + 0, + 79, + 4, + "IMAGE" + ], + [ + 207, + 44, + 2, + 84, + 2, + "VAE" + ], + [ + 210, + 79, + 0, + 84, + 0, + "CONDITIONING" + ], + [ + 211, + 79, + 1, + 84, + 1, + "CONDITIONING" + ], + [ + 212, + 79, + 2, + 84, + 3, + "LATENT" + ], + [ + 213, + 84, + 0, + 69, + 0, + "CONDITIONING" + ], + [ + 214, + 84, + 1, + 69, + 1, + "CONDITIONING" + ], + [ + 215, + 84, + 2, + 72, + 5, + "LATENT" + ], + [ + 217, + 8, + 0, + 86, + 0, + "IMAGE" + ], + [ + 226, + 78, + 0, + 82, + 0, + "IMAGE" + ], + [ + 228, + 79, + 2, + 71, + 0, + "LATENT" + ], + [ + 229, + 85, + 0, + 92, + 0, + "IMAGE" + ], + [ + 230, + 92, + 0, + 84, + 4, + "IMAGE" + ], + [ + 231, + 72, + 0, + 94, + 2, + "LATENT" + ], + [ + 232, + 94, + 2, + 8, + 0, + "LATENT" + ], + [ + 233, + 69, + 0, + 94, + 0, + "CONDITIONING" + ], + [ + 234, + 69, + 1, + 94, + 1, + "CONDITIONING" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} \ No newline at end of file diff --git a/ltxv/ltxv_image_to_video.0.9.5.webp b/ltxv/ltxv_image_to_video.0.9.5.webp new file mode 100644 index 0000000..8ed52b0 Binary files /dev/null and b/ltxv/ltxv_image_to_video.0.9.5.webp differ diff --git a/ltxv/ltxv_image_to_video_simple.0.9.5.json b/ltxv/ltxv_image_to_video_simple.0.9.5.json new file mode 100644 index 0000000..547d5cb --- /dev/null +++ b/ltxv/ltxv_image_to_video_simple.0.9.5.json @@ -0,0 +1,808 @@ +{ + "last_node_id": 95, + "last_link_id": 250, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 60, + 190 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "t5xxl_fp16.safetensors", + "ltxv", + "default" + ] + }, + { + "id": 76, + "type": "Note", + "pos": [ + 40, + 350 + ], + "size": [ + 360, + 200 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model needs long descriptive prompts, if the prompt is too short the quality will suffer greatly." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 44, + "type": "CheckpointLoaderSimple", + "pos": [ + 520, + 30 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 181 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": null + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 87, + 250 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "ltx-video-2b-v0.9.5.safetensors" + ] + }, + { + "id": 71, + "type": "LTXVScheduler", + "pos": [ + 880, + 290 + ], + "size": [ + 315, + 154 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "latent", + "type": "LATENT", + "shape": 7, + "link": 249 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 182 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVScheduler" + }, + "widgets_values": [ + 30, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 73, + "type": "KSamplerSelect", + "pos": [ + 880, + 190 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 172 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 78, + "type": "LoadImage", + "pos": [ + 150, + 670 + ], + "size": [ + 385.15606689453125, + 333.3305358886719 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 226 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fox.jpg", + "image" + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1740, + 30 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 235 + }, + { + "name": "vae", + "type": "VAE", + "link": 87 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106, + 217 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 86, + "type": "SaveWEBM", + "pos": [ + 2480, + 30 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 14, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 217 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 12 + ] + }, + { + "id": 41, + "type": "SaveAnimatedWEBP", + "pos": [ + 1970, + 30 + ], + "size": [ + 493.98468017578125, + 481.28692626953125 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 90, + "default" + ] + }, + { + "id": 69, + "type": "LTXVConditioning", + "pos": [ + 920, + 60 + ], + "size": [ + 223.8660125732422, + 78 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 245 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 246 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 199 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 167 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 420, + 180 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 239 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A red fox moving gracefully, its russet coat vibrant against the white landscape, leaving perfect star-shaped prints behind as steam rises from its breath in the crisp winter air. The scene is wrapped in snow-muffled silence, broken only by the gentle murmur of water still flowing beneath the ice." + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 390 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 240 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 82, + "type": "LTXVPreprocess", + "pos": [ + 570, + 670 + ], + "size": [ + 275.9266662597656, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 226 + } + ], + "outputs": [ + { + "name": "output_image", + "type": "IMAGE", + "links": [ + 248 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVPreprocess" + }, + "widgets_values": [ + 40 + ] + }, + { + "id": 95, + "type": "LTXVImgToVideo", + "pos": [ + 888.8251342773438, + 608.7010498046875 + ], + "size": [ + 315, + 190 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 239 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 240 + }, + { + "name": "vae", + "type": "VAE", + "link": 250 + }, + { + "name": "image", + "type": "IMAGE", + "link": 248 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 245 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 246 + ], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 247, + 249 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LTXVImgToVideo" + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 72, + "type": "SamplerCustom", + "pos": [ + 1201, + 32 + ], + "size": [ + 355.20001220703125, + 230 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 181 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 199 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 167 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 172 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 182 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 247 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "links": [ + 235 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustom" + }, + "widgets_values": [ + true, + 1092847494041144, + "randomize", + 3 + ] + } + ], + "links": [ + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 87, + 44, + 2, + 8, + 1, + "VAE" + ], + [ + 106, + 8, + 0, + 41, + 0, + "IMAGE" + ], + [ + 167, + 69, + 1, + 72, + 2, + "CONDITIONING" + ], + [ + 172, + 73, + 0, + 72, + 3, + "SAMPLER" + ], + [ + 181, + 44, + 0, + 72, + 0, + "MODEL" + ], + [ + 182, + 71, + 0, + 72, + 4, + "SIGMAS" + ], + [ + 199, + 69, + 0, + 72, + 1, + "CONDITIONING" + ], + [ + 217, + 8, + 0, + 86, + 0, + "IMAGE" + ], + [ + 226, + 78, + 0, + 82, + 0, + "IMAGE" + ], + [ + 235, + 72, + 0, + 8, + 0, + "LATENT" + ], + [ + 239, + 6, + 0, + 95, + 0, + "CONDITIONING" + ], + [ + 240, + 7, + 0, + 95, + 1, + "CONDITIONING" + ], + [ + 245, + 95, + 0, + 69, + 0, + "CONDITIONING" + ], + [ + 246, + 95, + 1, + 69, + 1, + "CONDITIONING" + ], + [ + 247, + 95, + 2, + 72, + 5, + "LATENT" + ], + [ + 248, + 82, + 0, + 95, + 3, + "IMAGE" + ], + [ + 249, + 95, + 2, + 71, + 0, + "LATENT" + ], + [ + 250, + 44, + 2, + 95, + 2, + "VAE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} \ No newline at end of file diff --git a/ltxv/ltxv_image_to_video_simple.0.9.5.webp b/ltxv/ltxv_image_to_video_simple.0.9.5.webp new file mode 100644 index 0000000..d623f7b Binary files /dev/null and b/ltxv/ltxv_image_to_video_simple.0.9.5.webp differ diff --git a/ltxv/ltxv_text_to_video_0.9.5.json b/ltxv/ltxv_text_to_video_0.9.5.json new file mode 100644 index 0000000..3243ade --- /dev/null +++ b/ltxv/ltxv_text_to_video_0.9.5.json @@ -0,0 +1,654 @@ +{ + "last_node_id": 77, + "last_link_id": 183, + "nodes": [ + { + "id": 69, + "type": "LTXVConditioning", + "pos": [ + 920, + 60 + ], + "size": [ + 223.8660125732422, + 78 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 169 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 170 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 166 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 167 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 71, + "type": "LTXVScheduler", + "pos": [ + 856, + 531 + ], + "size": [ + 315, + 154 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "latent", + "type": "LATENT", + "shape": 7, + "link": 168 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 182 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVScheduler" + }, + "widgets_values": [ + 30, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 76, + "type": "Note", + "pos": [ + 40, + 350 + ], + "size": [ + 360, + 200 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model needs long descriptive prompts, if the prompt is too short the quality will suffer greatly." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 44, + "type": "CheckpointLoaderSimple", + "pos": [ + 520, + 30 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 181 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": null + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 87 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "ltx-video-2b-v0.9.5.safetensors" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 60, + 190 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "t5xxl_fp16.safetensors", + "ltxv", + "default" + ] + }, + { + "id": 72, + "type": "SamplerCustom", + "pos": [ + 1201, + 32 + ], + "size": [ + 355.20001220703125, + 230 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 181 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 166 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 167 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 172 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 182 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 175 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "links": [ + 171 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustom" + }, + "widgets_values": [ + true, + 552872474466407, + "randomize", + 3 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1600, + 30 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 171 + }, + { + "name": "vae", + "type": "VAE", + "link": 87 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106, + 183 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 77, + "type": "SaveWEBM", + "pos": [ + 2530, + 30 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 12, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 183 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 18 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 500 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 170 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 41, + "type": "SaveAnimatedWEBP", + "pos": [ + 1830, + 30 + ], + "size": [ + 680, + 610 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 95, + "default" + ] + }, + { + "id": 73, + "type": "KSamplerSelect", + "pos": [ + 860, + 420 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 172 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "res_multistep" + ] + }, + { + "id": 70, + "type": "EmptyLTXVLatentVideo", + "pos": [ + 860, + 240 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 168, + 175 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLTXVLatentVideo" + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 422.44207763671875, + 184.3018035888672 + ], + "size": [ + 430, + 270 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 169 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A drone quickly rises through a bank of morning fog, revealing a pristine alpine lake surrounded by snow-capped mountains. The camera glides forward over the glassy water, capturing perfect reflections of the peaks. As it continues, the perspective shifts to reveal a lone wooden cabin with a curl of smoke from its chimney, nestled among tall pines at the lake's edge. The final shot tracks upward rapidly, transitioning from intimate to epic as the full mountain range comes into view, bathed in the golden light of sunrise breaking through scattered clouds." + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 87, + 44, + 2, + 8, + 1, + "VAE" + ], + [ + 106, + 8, + 0, + 41, + 0, + "IMAGE" + ], + [ + 166, + 69, + 0, + 72, + 1, + "CONDITIONING" + ], + [ + 167, + 69, + 1, + 72, + 2, + "CONDITIONING" + ], + [ + 168, + 70, + 0, + 71, + 0, + "LATENT" + ], + [ + 169, + 6, + 0, + 69, + 0, + "CONDITIONING" + ], + [ + 170, + 7, + 0, + 69, + 1, + "CONDITIONING" + ], + [ + 171, + 72, + 0, + 8, + 0, + "LATENT" + ], + [ + 172, + 73, + 0, + 72, + 3, + "SAMPLER" + ], + [ + 175, + 70, + 0, + 72, + 5, + "LATENT" + ], + [ + 181, + 44, + 0, + 72, + 0, + "MODEL" + ], + [ + 182, + 71, + 0, + 72, + 4, + "SIGMAS" + ], + [ + 183, + 8, + 0, + 77, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} \ No newline at end of file diff --git a/ltxv/ltxv_text_to_video_0.9.5.webp b/ltxv/ltxv_text_to_video_0.9.5.webp new file mode 100644 index 0000000..604e813 Binary files /dev/null and b/ltxv/ltxv_text_to_video_0.9.5.webp differ diff --git a/lumina2/README.md b/lumina2/README.md new file mode 100644 index 0000000..f942ecd --- /dev/null +++ b/lumina2/README.md @@ -0,0 +1,12 @@ +# Lumina Image 2.0 + +[Lumina Image 2.0](https://github.com/Alpha-VLLM/Lumina-Image-2.0) is a interesting diffusion model that uses gemma 2 2B for its text encoder. + +## Basic Workflow + +Download [lumina_2.safetensors](https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/blob/main/all_in_one/lumina_2.safetensors) and put it in your ComfyUI/models/checkpoints directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](lumina2_basic_example.png) + diff --git a/lumina2/lumina2_basic_example.png b/lumina2/lumina2_basic_example.png new file mode 100644 index 0000000..a2e85cd Binary files /dev/null and b/lumina2/lumina2_basic_example.png differ diff --git a/omnigen/README.md b/omnigen/README.md new file mode 100644 index 0000000..da334b2 --- /dev/null +++ b/omnigen/README.md @@ -0,0 +1,24 @@ +# Omnigen 2 + +[Omnigen 2](https://github.com/VectorSpaceLab/OmniGen2) is a model that can be used to edit images with text prompts. + +## Files to Download + +You will first need: + +[omnigen2_fp16.safetensors](https://huggingface.co/Comfy-Org/Omnigen2_ComfyUI_repackaged/blob/main/split_files/diffusion_models/omnigen2_fp16.safetensors) goes in: ComfyUI/models/diffusion_models/ + +[qwen_2.5_vl_fp16.safetensors](https://huggingface.co/Comfy-Org/Omnigen2_ComfyUI_repackaged/blob/main/split_files/text_encoders/qwen_2.5_vl_fp16.safetensors) goes in: ComfyUI/models/text_encoders/ + +[ae.safetensors](https://huggingface.co/Comfy-Org/Omnigen2_ComfyUI_repackaged/blob/main/split_files/vae/ae.safetensors), this is the flux VAE that you might already have, it goes in: ComfyUI/models/vae/ + +## Workflows + +This is a basic workflow using an image as a character reference. For multiple image inputs chain ReferenceLatent nodes together + +![Example](omnigen2_example.png) + +You can load this image in [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the full workflow. + +You can find the input image [here](../chroma/fennec_girl_sing.png) + diff --git a/omnigen/omnigen2_example.png b/omnigen/omnigen2_example.png new file mode 100644 index 0000000..bf3329d Binary files /dev/null and b/omnigen/omnigen2_example.png differ diff --git a/qwen_image/README.md b/qwen_image/README.md new file mode 100644 index 0000000..460b833 --- /dev/null +++ b/qwen_image/README.md @@ -0,0 +1,42 @@ +# Qwen Image + +[Qwen Image](https://github.com/QwenLM/Qwen-Image) is a 20B diffusion model. + +## Basic Workflow + +Download [qwen_image_fp8_e4m3fn.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/blob/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors) and put it in your ComfyUI/models/diffusion_models directory. + +[qwen_2.5_vl_7b_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/blob/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors) and put it in your ComfyUI/models/text_encoders directory. + +[qwen_image_vae.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/blob/main/split_files/vae/qwen_image_vae.safetensors) and put it in your ComfyUI/models/vae/ directory + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](qwen_image_basic_example.png) + +## Edit Model v2509 + +Make sure you downloaded the text encoder and vae files for the basic workflow above. This model supports up to 3 different image inputs. + +Download [qwen_image_edit_2509_fp8_e4m3fn.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/blob/main/split_files/diffusion_models/qwen_image_edit_2509_fp8_e4m3fn.safetensors) and put it in your ComfyUI/models/diffusion_models directory. + + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](qwen_image_edit_2509_basic_example.png) + +You can find the input image [here](../chroma/fennec_girl_sing.png) + + +## Edit Model (older first version) + +Make sure you downloaded the text encoder and vae files for the basic workflow above. + +Download [qwen_image_edit_fp8_e4m3fn.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/blob/main/split_files/diffusion_models/qwen_image_edit_fp8_e4m3fn.safetensors) and put it in your ComfyUI/models/diffusion_models directory. + + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](qwen_image_edit_basic_example.png) + +You can find the input image [here](../chroma/fennec_girl_sing.png) diff --git a/qwen_image/qwen_image_basic_example.png b/qwen_image/qwen_image_basic_example.png new file mode 100644 index 0000000..c6a0537 Binary files /dev/null and b/qwen_image/qwen_image_basic_example.png differ diff --git a/qwen_image/qwen_image_edit_2509_basic_example.png b/qwen_image/qwen_image_edit_2509_basic_example.png new file mode 100644 index 0000000..82895ff Binary files /dev/null and b/qwen_image/qwen_image_edit_2509_basic_example.png differ diff --git a/qwen_image/qwen_image_edit_basic_example.png b/qwen_image/qwen_image_edit_basic_example.png new file mode 100644 index 0000000..db318c0 Binary files /dev/null and b/qwen_image/qwen_image_edit_basic_example.png differ diff --git a/sd3/README.md b/sd3/README.md index d373f9b..067aa3e 100644 --- a/sd3/README.md +++ b/sd3/README.md @@ -2,7 +2,7 @@ ## SD3.5 -The first step is downloading the text encoder files if you don't have them already from SD3, Flux or other models: ([clip_l.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_l.safetensors), [clip_g.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_g.safetensors) and t5xxl) if you don't have them already in your ComfyUI/models/clip/ folder. For the t5xxl I recommend [t5xxl_fp16.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp16.safetensors) if you have more than 32GB ram or [t5xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp8_e4m3fn_scaled.safetensors) if you don't. +The first step is downloading the text encoder files if you don't have them already from SD3, Flux or other models: ([clip_l.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_l.safetensors), [clip_g.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_g.safetensors) and t5xxl) if you don't have them already in your ComfyUI/models/text_encoders/ folder. For the t5xxl I recommend [t5xxl_fp16.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp16.safetensors) if you have more than 32GB ram or [t5xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp8_e4m3fn_scaled.safetensors) if you don't. The SD3.5 model family contains a large 8B model and a medium 2.5B model. The medium model will be faster and take less memory but might have less complex understanding of some concepts. I recommend downloading both and experimenting with how each of them respond to your prompts. diff --git a/sd3/README_old.md b/sd3/README_old.md index f947393..2b02561 100644 --- a/sd3/README_old.md +++ b/sd3/README_old.md @@ -6,7 +6,7 @@ Here is a very basic example how to use it: ![Example](sd3_simple_example.png) -The [sd3_medium.safetensors](https://huggingface.co/stabilityai/stable-diffusion-3-medium/tree/main) file does not contain text encoder/CLIP weights so you must load them separately to use that file. Download the text encoder weights from the [text_encoders directory](https://huggingface.co/stabilityai/stable-diffusion-3-medium/tree/main) and put them in your ComfyUI/models/clip/ directory. sd3_medium.safetensors should be put in your ComfyUI/models/checkpoints/ directory. +The [sd3_medium.safetensors](https://huggingface.co/stabilityai/stable-diffusion-3-medium/tree/main) file does not contain text encoder/CLIP weights so you must load them separately to use that file. Download the text encoder weights from the [text_encoders directory](https://huggingface.co/stabilityai/stable-diffusion-3-medium/tree/main) and put them in your ComfyUI/models/text_encoders/ directory. sd3_medium.safetensors should be put in your ComfyUI/models/checkpoints/ directory. Here is a basic example how to use it: diff --git a/wan/README.md b/wan/README.md new file mode 100644 index 0000000..677184f --- /dev/null +++ b/wan/README.md @@ -0,0 +1,80 @@ +# Wan 2.1 Models + +[Wan 2.1](https://github.com/Wan-Video/Wan2.1) is a family of video models. + +For Wan 2.2 see: [Wan 2.2](../wan22) + +## Files to Download + +You will first need: + +#### Text encoder and VAE: + +[umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/tree/main/split_files/text_encoders) goes in: ComfyUI/models/text_encoders/ + +[wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) goes in: ComfyUI/models/vae/ + + +#### Video Models + +The diffusion models can be found [here](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/tree/main/split_files/diffusion_models) + +Note: The fp16 versions are recommended over the bf16 versions as they will give better results. + +Quality rank (highest to lowest): fp16 > bf16 > fp8_scaled > fp8_e4m3fn + +These files go in: ComfyUI/models/diffusion_models/ + +These examples use the 16 bit files but you can use the fp8 ones instead if you don't have enough memory. + +## Workflows + +### Text to Video + +This workflow requires the [wan2.1_t2v_1.3B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_t2v_1.3B_fp16.safetensors) file (put it in: ComfyUI/models/diffusion_models/). You can also use it with the 14B model. + +![Example](text_to_video_wan.webp) + +[Workflow in Json format](text_to_video_wan.json) + +### Image to Video + +This workflow requires the [wan2.1_i2v_480p_14B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_i2v_480p_14B_fp16.safetensors) file (put it in: ComfyUI/models/diffusion_models/) and +[clip_vision_h.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/clip_vision/clip_vision_h.safetensors) which goes in: ComfyUI/models/clip_vision/ + +Note this example only generates 33 frames at 512x512 because I wanted it to be accessible, the model can do more than that. The 720p model is pretty good if you have the hardware/patience to run it. + + + +[Workflow in Json format](image_to_video_wan_example.json) + +The input image can be found on the [flux](../flux) page. + +Here's the same example with the [720p](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_i2v_720p_14B_fp16.safetensors) model: + + + + +### VACE reference Image to Video + +This workflow requires the [wan2.1_vace_14B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_vace_14B_fp16.safetensors) file (put it in: ComfyUI/models/diffusion_models/) + +This example generates a video from a reference image, this is different from generating a video from a start image. You'll notice that the video does not actually contain the reference image but is clearly derived from it. + + + +[Workflow in Json format](vace_reference_to_video.json) + +You can find the input image [here](../chroma/fennec_girl_sing.png) that image contains a [Chroma](../chroma) workflow if you are interested how it was generated. + +### Image Camera to Video + +This workflow requires the [wan2.1_fun_camera_v1.1_1.3B_bf16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_fun_camera_v1.1_1.3B_bf16.safetensors) file (put it in: ComfyUI/models/diffusion_models/) and +[clip_vision_h.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/clip_vision/clip_vision_h.safetensors) which goes in: ComfyUI/models/clip_vision/ if you don't have it already. + + + + +[Workflow in Json format](camera_image_to_video_wan_example.json) + +The input image can be found on the [flux](../flux) page. diff --git a/wan/camera_image_to_video_wan_example.json b/wan/camera_image_to_video_wan_example.json new file mode 100644 index 0000000..74068e8 --- /dev/null +++ b/wan/camera_image_to_video_wan_example.json @@ -0,0 +1,865 @@ +{ + "id": "fa117b0f-052b-46d1-af50-d1bc60704ed5", + "revision": 0, + "last_node_id": 60, + "last_link_id": 130, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -540, + 170 + ], + "size": [ + 387.0450744628906, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 590, + 480 + ], + "size": [ + 290.6003723144531, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 117 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 900, + 180 + ], + "size": [ + 308.10516357421875, + 262 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 111 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 118 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 119 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 120 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 1034274237172778, + "randomize", + 20, + 6, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1230, + 180 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1480, + 180 + ], + "size": [ + 620.66796875, + 679.0053100585938 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 90, + "default" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + -140, + 370 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 116 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + -140, + 160 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 115 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail wearing a maid outfit" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 51, + "type": "CLIPVisionEncode", + "pos": [ + 350, + 680 + ], + "size": [ + 255.5699462890625, + 78 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 94 + }, + { + "name": "image", + "type": "IMAGE", + "link": 109 + } + ], + "outputs": [ + { + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "slot_index": 0, + "links": [ + 113 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + -10, + 780 + ], + "size": [ + 315, + 314 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 109, + 114 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "flux_dev_example.png", + "image" + ] + }, + { + "id": 49, + "type": "CLIPVisionLoader", + "pos": [ + 0, + 670 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "slot_index": 0, + "links": [ + 94 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader" + }, + "widgets_values": [ + "clip_vision_h.safetensors" + ] + }, + { + "id": 56, + "type": "WanCameraImageToVideo", + "pos": [ + 590, + 200 + ], + "size": [ + 290, + 230 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 115 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 116 + }, + { + "name": "vae", + "type": "VAE", + "link": 117 + }, + { + "name": "clip_vision_output", + "shape": 7, + "type": "CLIP_VISION_OUTPUT", + "link": 113 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 114 + }, + { + "name": "camera_conditions", + "shape": 7, + "type": "WAN_CAMERA_EMBEDDING", + "link": 124 + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 125 + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 126 + }, + { + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 127 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 118 + ] + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 119 + ] + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 120 + ] + } + ], + "properties": { + "Node name for S&R": "WanCameraImageToVideo" + }, + "widgets_values": [ + 832, + 480, + 81, + 1 + ] + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 600, + 100 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 130 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 111 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2150, + 180 + ], + "size": [ + 315, + 210 + ], + "flags": {}, + "order": 14, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 32 + ] + }, + { + "id": 57, + "type": "WanCameraEmbedding", + "pos": [ + 310, + 300 + ], + "size": [ + 236.8000030517578, + 310 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "camera_embedding", + "type": "WAN_CAMERA_EMBEDDING", + "links": [ + 124 + ] + }, + { + "name": "width", + "type": "INT", + "links": [ + 125 + ] + }, + { + "name": "height", + "type": "INT", + "links": [ + 126 + ] + }, + { + "name": "length", + "type": "INT", + "links": [ + 127 + ] + } + ], + "properties": { + "Node name for S&R": "WanCameraEmbedding" + }, + "widgets_values": [ + "Zoom Out", + 512, + 512, + 81, + 1, + 0.5, + 0.5, + 0.5, + 0.5 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + -540, + 50 + ], + "size": [ + 390, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 130 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.1_fun_camera_v1.1_1.3B_bf16.safetensors", + "default" + ] + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 49, + 0, + 51, + 0, + "CLIP_VISION" + ], + [ + 109, + 52, + 0, + 51, + 1, + "IMAGE" + ], + [ + 111, + 54, + 0, + 3, + 0, + "MODEL" + ], + [ + 113, + 51, + 0, + 56, + 3, + "CLIP_VISION_OUTPUT" + ], + [ + 114, + 52, + 0, + 56, + 4, + "IMAGE" + ], + [ + 115, + 6, + 0, + 56, + 0, + "CONDITIONING" + ], + [ + 116, + 7, + 0, + 56, + 1, + "CONDITIONING" + ], + [ + 117, + 39, + 0, + 56, + 2, + "VAE" + ], + [ + 118, + 56, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 119, + 56, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 120, + 56, + 2, + 3, + 3, + "LATENT" + ], + [ + 124, + 57, + 0, + 56, + 5, + "WAN_CAMERA_EMBEDDING" + ], + [ + 125, + 57, + 1, + 56, + 6, + "INT" + ], + [ + 126, + 57, + 2, + 56, + 7, + "INT" + ], + [ + 127, + 57, + 3, + 56, + 8, + "INT" + ], + [ + 130, + 37, + 0, + 54, + 0, + "MODEL" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.6934334949441638, + "offset": [ + 570.9293716820114, + 14.391611998548521 + ] + }, + "frontendVersion": "1.20.7" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan/camera_image_to_video_wan_example.webp b/wan/camera_image_to_video_wan_example.webp new file mode 100644 index 0000000..042ac9f Binary files /dev/null and b/wan/camera_image_to_video_wan_example.webp differ diff --git a/wan/image_to_video_wan_720p_example.webp b/wan/image_to_video_wan_720p_example.webp new file mode 100644 index 0000000..567042f Binary files /dev/null and b/wan/image_to_video_wan_720p_example.webp differ diff --git a/wan/image_to_video_wan_example.json b/wan/image_to_video_wan_example.json new file mode 100644 index 0000000..1bccc0f --- /dev/null +++ b/wan/image_to_video_wan_example.json @@ -0,0 +1,743 @@ +{ + "last_node_id": 54, + "last_link_id": 111, + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 56, + 93 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 866.3932495117188, + 499.18597412109375 + ], + "size": [ + 306.36004638671875, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 76, + 99 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1460, + 190 + ], + "size": [ + 870.8511352539062, + 643.7430419921875 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 90, + "default" + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2367.213134765625, + 193.6114959716797 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 13, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 32 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 98 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 50, + "type": "WanImageToVideo", + "pos": [ + 673.0507202148438, + 627.272705078125 + ], + "size": [ + 342.5999755859375, + 210 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 97 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 98 + }, + { + "name": "vae", + "type": "VAE", + "link": 99 + }, + { + "name": "clip_vision_output", + "type": "CLIP_VISION_OUTPUT", + "shape": 7, + "link": 107 + }, + { + "name": "start_image", + "type": "IMAGE", + "shape": 7, + "link": 106 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 101 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 102 + ], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 103 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "WanImageToVideo" + }, + "widgets_values": [ + 512, + 512, + 33, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 97 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail wearing a maid outfit turning around" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 111 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 101 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 102 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 103 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 35 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 987948718394761, + "randomize", + 20, + 6, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 49, + "type": "CLIPVisionLoader", + "pos": [ + 20, + 640 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 94 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader" + }, + "widgets_values": [ + "clip_vision_h.safetensors" + ] + }, + { + "id": 51, + "type": "CLIPVisionEncode", + "pos": [ + 360, + 640 + ], + "size": [ + 253.60000610351562, + 78 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 94 + }, + { + "name": "image", + "type": "IMAGE", + "link": 109 + } + ], + "outputs": [ + { + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "links": [ + 107 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + 20, + 760 + ], + "size": [ + 315, + 314 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106, + 109 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "flux_dev_example.png", + "image" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 390, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 70 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 110 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.1_i2v_480p_14B_fp16.safetensors", + "default" + ] + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 510, + 70 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 111 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 49, + 0, + 51, + 0, + "CLIP_VISION" + ], + [ + 97, + 6, + 0, + 50, + 0, + "CONDITIONING" + ], + [ + 98, + 7, + 0, + 50, + 1, + "CONDITIONING" + ], + [ + 99, + 39, + 0, + 50, + 2, + "VAE" + ], + [ + 101, + 50, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 102, + 50, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 103, + 50, + 2, + 3, + 3, + "LATENT" + ], + [ + 106, + 52, + 0, + 50, + 4, + "IMAGE" + ], + [ + 107, + 51, + 0, + 50, + 3, + "CLIP_VISION_OUTPUT" + ], + [ + 109, + 52, + 0, + 51, + 1, + "IMAGE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 111, + 54, + 0, + 3, + 0, + "MODEL" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.015255979947749, + "offset": [ + 4.576817595742521, + -17.69629597715313 + ] + } + }, + "version": 0.4 +} diff --git a/wan/image_to_video_wan_example.webp b/wan/image_to_video_wan_example.webp new file mode 100644 index 0000000..26433ad Binary files /dev/null and b/wan/image_to_video_wan_example.webp differ diff --git a/wan/text_to_video_wan.json b/wan/text_to_video_wan.json new file mode 100644 index 0000000..3427190 --- /dev/null +++ b/wan/text_to_video_wan.json @@ -0,0 +1,528 @@ +{ + "last_node_id": 48, + "last_link_id": 95, + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 56, + 93 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 866.3932495117188, + 499.18597412109375 + ], + "size": [ + 306.36004638671875, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 76 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1460, + 190 + ], + "size": [ + 870.8511352539062, + 643.7430419921875 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 90, + "default", + "" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 52 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 12.94982624053955, + 184.6981658935547 + ], + "size": [ + 390, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 40, + "type": "EmptyHunyuanLatentVideo", + "pos": [ + 520, + 620 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 91 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyHunyuanLatentVideo" + }, + "widgets_values": [ + 832, + 480, + 33, + 1 + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2367.213134765625, + 193.6114959716797 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 10, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 32 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 95 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 46 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 52 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 91 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 35 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 82628696717253, + "randomize", + 30, + 6, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 48, + "type": "ModelSamplingSD3", + "pos": [ + 440, + 50 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 94 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 95 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 40 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 94 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.1_t2v_1.3B_fp16.safetensors", + "default" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 46 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a fox moving quickly in a beautiful winter scenery nature trees mountains daytime tracking camera" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 46, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 52, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 91, + 40, + 0, + 3, + 3, + "LATENT" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 37, + 0, + 48, + 0, + "MODEL" + ], + [ + 95, + 48, + 0, + 3, + 0, + "MODEL" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425205, + "offset": [ + -5.675057867608515, + 8.013751263058214 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan/text_to_video_wan.webp b/wan/text_to_video_wan.webp new file mode 100644 index 0000000..f21a370 Binary files /dev/null and b/wan/text_to_video_wan.webp differ diff --git a/wan/vace_reference_to_video.json b/wan/vace_reference_to_video.json new file mode 100644 index 0000000..a2ea2ad --- /dev/null +++ b/wan/vace_reference_to_video.json @@ -0,0 +1,741 @@ +{ + "id": "0898f6a6-2814-4ccd-968a-a2405ee177e7", + "revision": 0, + "last_node_id": 58, + "last_link_id": 124, + "nodes": [ + { + "id": 39, + "type": "VAELoader", + "pos": [ + 866.3932495117188, + 499.18597412109375 + ], + "size": [ + 306.36004638671875, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 114 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 390, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 510, + 70 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 111 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 113 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 120 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 56, + "type": "TrimVideoLatent", + "pos": [ + 1265.2001953125, + 613.80859375 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 119 + }, + { + "name": "trim_amount", + "type": "INT", + "widget": { + "name": "trim_amount" + }, + "link": 121 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 120 + ] + } + ], + "properties": { + "Node name for S&R": "TrimVideoLatent" + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 70 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.1_vace_14B_fp16.safetensors", + "default" + ] + }, + { + "id": 55, + "type": "WanVaceToVideo", + "pos": [ + 698.0429077148438, + 632.2788696289062 + ], + "size": [ + 270, + 254 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 112 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 113 + }, + { + "name": "vae", + "type": "VAE", + "link": 114 + }, + { + "name": "control_video", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "name": "control_masks", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "name": "reference_image", + "shape": 7, + "type": "IMAGE", + "link": 118 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 115 + ] + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 116 + ] + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 117 + ] + }, + { + "name": "trim_latent", + "type": "INT", + "links": [ + 121 + ] + } + ], + "properties": { + "Node name for S&R": "WanVaceToVideo" + }, + "widgets_values": [ + 768, + 768, + 81, + 1, + 1 + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1600, + 190 + ], + "size": [ + 364.4535217285156, + 510.4535217285156 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 90, + "default" + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2060, + 190 + ], + "size": [ + 429.0989685058594, + 523.8981323242188 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 0 + ] + }, + { + "id": 58, + "type": "Note", + "pos": [ + 2509.27587890625, + 189.5493621826172 + ], + "size": [ + 263.95501708984375, + 155.10342407226562 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "crf 0 means a lossless webm, if you want a lossy once with smaller filesize increase the crf." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + 221.9611358642578, + 734.3540649414062 + ], + "size": [ + 315, + 314 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 118 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": [] + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fennec_girl_sing.png", + "image" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 111 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 115 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 116 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 117 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 119 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 399224011392770, + "randomize", + 20, + 6, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 112 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail turning around and dancing and singing on stage like an idol" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 111, + 54, + 0, + 3, + 0, + "MODEL" + ], + [ + 112, + 6, + 0, + 55, + 0, + "CONDITIONING" + ], + [ + 113, + 7, + 0, + 55, + 1, + "CONDITIONING" + ], + [ + 114, + 39, + 0, + 55, + 2, + "VAE" + ], + [ + 115, + 55, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 116, + 55, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 117, + 55, + 2, + 3, + 3, + "LATENT" + ], + [ + 118, + 52, + 0, + 55, + 5, + "IMAGE" + ], + [ + 119, + 3, + 0, + 56, + 0, + "LATENT" + ], + [ + 120, + 56, + 0, + 8, + 0, + "LATENT" + ], + [ + 121, + 55, + 3, + 56, + 1, + "INT" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.9358232486220777, + "offset": [ + -2.3933794268561357, + -27.125629672645054 + ] + }, + "frontendVersion": "1.19.9" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan/vace_reference_to_video.webp b/wan/vace_reference_to_video.webp new file mode 100644 index 0000000..731f7d1 Binary files /dev/null and b/wan/vace_reference_to_video.webp differ diff --git a/wan22/README.md b/wan22/README.md new file mode 100644 index 0000000..7f791eb --- /dev/null +++ b/wan22/README.md @@ -0,0 +1,70 @@ +# Wan 2.2 Models + +[Wan 2.2](https://github.com/Wan-Video/Wan2.2) is a family of video models and the version after [Wan 2.1](../wan) + +Wan2.2 is initially released with 3 different models, a 5B model that can do both text and image to video and two 14B models, one for text to video and the other for video to video. + +See also the [Comfy Docs Wan 2.2 page for more workflow examples.](https://docs.comfy.org/tutorials/video/wan/wan2_2) + +## Files to Download + +You will first need: + +#### Text encoder and VAE: + +[umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/tree/main/split_files/text_encoders) goes in: ComfyUI/models/text_encoders/ + +Needed for the 14B models: [wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) goes in: ComfyUI/models/vae/ + +Needed for the 5B model (NEW): [wan2.2_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan2.2_vae.safetensors) goes in: ComfyUI/models/vae/ + +#### Video Models + +The diffusion models can be found [here](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/tree/main/split_files/diffusion_models) + +These files go in: ComfyUI/models/diffusion_models/ + +## Workflows + +### 5B Model + +This workflow requires the [wan2.2_ti2v_5B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_ti2v_5B_fp16.safetensors) file (put it in: ComfyUI/models/diffusion_models/). + +Make sure you have the [wan2.2 VAE](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan2.2_vae.safetensors) (goes in: ComfyUI/models/vae/) + +#### Text to video + +![Example](text_to_video_wan22_5B.webp) + +[Workflow in Json format](text_to_video_wan22_5B.json) + + +#### Image to Video + +![Example](image_to_video_wan22_5B.webp) + +[Workflow in Json format](image_to_video_wan22_5B.json) + +You can find the input image [here](../chroma/fennec_girl_hug.png) + +### 14B Model + +Make sure you have the [wan2.1 VAE](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) (goes in: ComfyUI/models/vae/) + +#### Text to video + +This workflow requires both the [wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors) and the [wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors) file (put it in: ComfyUI/models/diffusion_models/). + +![Example](text_to_video_wan22_14B.webp) + +[Workflow in Json format](text_to_video_wan22_14B.json) + +#### Image to Video + +This workflow requires both the [wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors) and the [wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors) file (put it in: ComfyUI/models/diffusion_models/). + +![Example](image_to_video_wan22_14B.webp) + +[Workflow in Json format](image_to_video_wan22_14B.json) + +You can find the input image [here](../chroma/fennec_girl_flowers.png) diff --git a/wan22/image_to_video_wan22_14B.json b/wan22/image_to_video_wan22_14B.json new file mode 100644 index 0000000..cea25cd --- /dev/null +++ b/wan22/image_to_video_wan22_14B.json @@ -0,0 +1,876 @@ +{ + "id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", + "revision": 0, + "last_node_id": 60, + "last_link_id": 126, + "nodes": [ + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 98 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 486.4836120605469, + -69.28914642333984 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 125 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 55, + "type": "ModelSamplingSD3", + "pos": [ + 484.0019836425781, + 54.46213912963867 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 112 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 123 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 58, + "type": "KSamplerAdvanced", + "pos": [ + 1262.509765625, + -26.73247528076172 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 123 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 121 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 122 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 113 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 124 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "disable", + 0, + "fixed", + 20, + 3.5, + "euler", + "simple", + 10, + 10000, + "disable" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 30, + 190 + ], + "size": [ + 360, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 30, + -70 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 56, + "type": "UNETLoader", + "pos": [ + 30, + 60 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 112 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 30, + 340 + ], + "size": [ + 360, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 99 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 59, + "type": "Note", + "pos": [ + -202.05557250976562, + -57.859466552734375 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses a different diffusion model for the first steps (high noise) vs the last steps (low noise).\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 60, + "type": "Note", + "pos": [ + -200, + 340 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses the wan 2.1 VAE.\n\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1590, + -20 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 124 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2530, + -20 + ], + "size": [ + 763.67041015625, + 885.67041015625 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 13.3333740234375 + ] + }, + { + "id": 57, + "type": "KSamplerAdvanced", + "pos": [ + 893.0060424804688, + -29.923471450805664 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 125 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 118 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 119 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 120 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 113 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "enable", + 99822389587980, + "randomize", + 20, + 3.5, + "euler", + "simple", + 0, + 10, + "enable" + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1820, + -20 + ], + "size": [ + 674.6224975585938, + 820.6224975585938 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 80, + "default" + ] + }, + { + "id": 50, + "type": "WanImageToVideo", + "pos": [ + 491.7362060546875, + 617.798095703125 + ], + "size": [ + 342.5999755859375, + 210 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 97 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 98 + }, + { + "name": "vae", + "type": "VAE", + "link": 99 + }, + { + "name": "clip_vision_output", + "shape": 7, + "type": "CLIP_VISION_OUTPUT", + "link": null + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 126 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 118, + 121 + ] + }, + { + "name": "negative", + "type": "CONDITIONING", + "slot_index": 1, + "links": [ + 119, + 122 + ] + }, + { + "name": "latent", + "type": "LATENT", + "slot_index": 2, + "links": [ + 120 + ] + } + ], + "properties": { + "Node name for S&R": "WanImageToVideo" + }, + "widgets_values": [ + 768, + 768, + 81, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 97 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl picking up an assault rifle and moving quickly" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + -50, + 550 + ], + "size": [ + 450, + 540 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 126 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fennec_girl_flowers.png", + "image" + ] + } + ], + "links": [ + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 97, + 6, + 0, + 50, + 0, + "CONDITIONING" + ], + [ + 98, + 7, + 0, + 50, + 1, + "CONDITIONING" + ], + [ + 99, + 39, + 0, + 50, + 2, + "VAE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 112, + 56, + 0, + 55, + 0, + "MODEL" + ], + [ + 113, + 57, + 0, + 58, + 3, + "LATENT" + ], + [ + 118, + 50, + 0, + 57, + 1, + "CONDITIONING" + ], + [ + 119, + 50, + 1, + 57, + 2, + "CONDITIONING" + ], + [ + 120, + 50, + 2, + 57, + 3, + "LATENT" + ], + [ + 121, + 50, + 0, + 58, + 1, + "CONDITIONING" + ], + [ + 122, + 50, + 1, + 58, + 2, + "CONDITIONING" + ], + [ + 123, + 55, + 0, + 58, + 0, + "MODEL" + ], + [ + 124, + 58, + 0, + 8, + 0, + "LATENT" + ], + [ + 125, + 54, + 0, + 57, + 0, + "MODEL" + ], + [ + 126, + 52, + 0, + 50, + 4, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425299, + "offset": [ + 229.4669275491141, + 115.0852193902741 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/image_to_video_wan22_14B.webp b/wan22/image_to_video_wan22_14B.webp new file mode 100644 index 0000000..d74baa2 Binary files /dev/null and b/wan22/image_to_video_wan22_14B.webp differ diff --git a/wan22/image_to_video_wan22_5B.json b/wan22/image_to_video_wan22_5B.json new file mode 100644 index 0000000..6160b10 --- /dev/null +++ b/wan22/image_to_video_wan22_5B.json @@ -0,0 +1,624 @@ +{ + "id": "91f6bbe2-ed41-4fd6-bac7-71d5b5864ecb", + "revision": 0, + "last_node_id": 57, + "last_link_id": 106, + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 52 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 95 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 46 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 52 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 104 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 869177064731501, + "randomize", + 30, + 5, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1460, + 190 + ], + "size": [ + 870.8511352539062, + 648.4141235351562 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24.000000000000004, + false, + 90, + "default" + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 20, + 340 + ], + "size": [ + 330, + 60 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 105 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan2.2_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 48, + "type": "ModelSamplingSD3", + "pos": [ + 440, + 60 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 94 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 95 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 60 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 94 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_ti2v_5B_fp16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2367.213134765625, + 193.6114959716797 + ], + "size": [ + 670, + 650 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 16.111083984375 + ] + }, + { + "id": 57, + "type": "LoadImage", + "pos": [ + 87.407958984375, + 620.4816284179688 + ], + "size": [ + 274.080078125, + 314 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106 + ] + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fennec_girl_hug.png", + "image" + ] + }, + { + "id": 56, + "type": "Note", + "pos": [ + 710.781005859375, + 608.9545288085938 + ], + "size": [ + 320.9936218261719, + 182.6057586669922 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Optimal resolution is: 1280x704 length 121\n\nThe reason it's lower in this workflow is just because I didn't want you to wait too long to get an initial video.\n\nTo get image to video just plug in a start image. For text to video just don't give it a start image." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 55, + "type": "Wan22ImageToVideoLatent", + "pos": [ + 420, + 610 + ], + "size": [ + 271.9126892089844, + 150 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 105 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 104 + ] + } + ], + "properties": { + "Node name for S&R": "Wan22ImageToVideoLatent" + }, + "widgets_values": [ + 1280, + 704, + 41, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 46 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with fennec ears and a fluffy tail walking in a beautiful field" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 46, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 52, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 37, + 0, + 48, + 0, + "MODEL" + ], + [ + 95, + 48, + 0, + 3, + 0, + "MODEL" + ], + [ + 104, + 55, + 0, + 3, + 3, + "LATENT" + ], + [ + 105, + 39, + 0, + 55, + 0, + "VAE" + ], + [ + 106, + 57, + 0, + 55, + 1, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425287, + "offset": [ + 3.5210927484772534, + -9.231468990407302 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/image_to_video_wan22_5B.webp b/wan22/image_to_video_wan22_5B.webp new file mode 100644 index 0000000..20281aa Binary files /dev/null and b/wan22/image_to_video_wan22_5B.webp differ diff --git a/wan22/text_to_video_wan22_14B.json b/wan22/text_to_video_wan22_14B.json new file mode 100644 index 0000000..2dc3ca0 --- /dev/null +++ b/wan22/text_to_video_wan22_14B.json @@ -0,0 +1,759 @@ +{ + "id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", + "revision": 0, + "last_node_id": 61, + "last_link_id": 131, + "nodes": [ + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 486.4836120605469, + -69.28914642333984 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 125 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 58, + "type": "KSamplerAdvanced", + "pos": [ + 1262.509765625, + -26.73247528076172 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 123 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 128 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 130 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 113 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 124 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "disable", + 0, + "fixed", + 20, + 3.5, + "euler", + "simple", + 10, + 10000, + "disable" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 30, + 190 + ], + "size": [ + 360, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 30, + 340 + ], + "size": [ + 360, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 59, + "type": "Note", + "pos": [ + -202.05557250976562, + -57.859466552734375 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses a different diffusion model for the first steps (high noise) vs the last steps (low noise).\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 60, + "type": "Note", + "pos": [ + -200, + 340 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses the wan 2.1 VAE.\n\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1590, + -20 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 124 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1820, + -20 + ], + "size": [ + 674.6224975585938, + 820.6224975585938 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 80, + "default" + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2530, + -20 + ], + "size": [ + 763.67041015625, + 885.67041015625 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 13.3333740234375 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 129, + 130 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 56, + "type": "UNETLoader", + "pos": [ + 30, + 60 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 112 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 55, + "type": "ModelSamplingSD3", + "pos": [ + 484.0019836425781, + 54.46213912963867 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 112 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 123 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 30, + -70 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 57, + "type": "KSamplerAdvanced", + "pos": [ + 893.0060424804688, + -29.923471450805664 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 125 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 127 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 129 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 131 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 113 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "enable", + 738226772790037, + "randomize", + 20, + 3.5, + "euler", + "simple", + 0, + 10, + "enable" + ] + }, + { + "id": 61, + "type": "EmptyHunyuanLatentVideo", + "pos": [ + 560, + 620 + ], + "size": [ + 270.0943298339844, + 130 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 131 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyHunyuanLatentVideo" + }, + "widgets_values": [ + 1280, + 704, + 57, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 127, + 128 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a robot is running through a futuristic cyberpunk city with neon signs and darkness with bright HDR lights" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 112, + 56, + 0, + 55, + 0, + "MODEL" + ], + [ + 113, + 57, + 0, + 58, + 3, + "LATENT" + ], + [ + 123, + 55, + 0, + 58, + 0, + "MODEL" + ], + [ + 124, + 58, + 0, + 8, + 0, + "LATENT" + ], + [ + 125, + 54, + 0, + 57, + 0, + "MODEL" + ], + [ + 127, + 6, + 0, + 57, + 1, + "CONDITIONING" + ], + [ + 128, + 6, + 0, + 58, + 1, + "CONDITIONING" + ], + [ + 129, + 7, + 0, + 57, + 2, + "CONDITIONING" + ], + [ + 130, + 7, + 0, + 58, + 2, + "CONDITIONING" + ], + [ + 131, + 61, + 0, + 57, + 3, + "LATENT" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425305, + "offset": [ + 242.9977455078102, + 122.98065462666187 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/text_to_video_wan22_14B.webp b/wan22/text_to_video_wan22_14B.webp new file mode 100644 index 0000000..4d3f6d6 Binary files /dev/null and b/wan22/text_to_video_wan22_14B.webp differ diff --git a/wan22/text_to_video_wan22_5B.json b/wan22/text_to_video_wan22_5B.json new file mode 100644 index 0000000..25dc251 --- /dev/null +++ b/wan22/text_to_video_wan22_5B.json @@ -0,0 +1,579 @@ +{ + "id": "91f6bbe2-ed41-4fd6-bac7-71d5b5864ecb", + "revision": 0, + "last_node_id": 57, + "last_link_id": 106, + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 52 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 95 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 46 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 52 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 104 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 285741127119524, + "randomize", + 30, + 5, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 20, + 340 + ], + "size": [ + 330, + 60 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 105 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan2.2_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 48, + "type": "ModelSamplingSD3", + "pos": [ + 440, + 60 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 94 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 95 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 60 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 94 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_ti2v_5B_fp16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2367.213134765625, + 193.6114959716797 + ], + "size": [ + 670, + 650 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 16.111083984375 + ] + }, + { + "id": 56, + "type": "Note", + "pos": [ + 710.781005859375, + 608.9545288085938 + ], + "size": [ + 320.9936218261719, + 182.6057586669922 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Optimal resolution is: 1280x704 length 121\n\nThe reason it's lower in this workflow is just because I didn't want you to wait too long to get an initial video.\n\nTo get image to video just plug in a start image. For text to video just don't give it a start image." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 55, + "type": "Wan22ImageToVideoLatent", + "pos": [ + 420, + 610 + ], + "size": [ + 271.9126892089844, + 150 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 105 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 104 + ] + } + ], + "properties": { + "Node name for S&R": "Wan22ImageToVideoLatent" + }, + "widgets_values": [ + 1280, + 704, + 41, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 46 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "drone shot of a volcano erupting with a fox walking on it" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1460, + 190 + ], + "size": [ + 870.8511352539062, + 648.4141235351562 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24.000000000000004, + false, + 80, + "default" + ] + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 46, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 52, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 37, + 0, + 48, + 0, + "MODEL" + ], + [ + 95, + 48, + 0, + 3, + 0, + "MODEL" + ], + [ + 104, + 55, + 0, + 3, + 3, + "LATENT" + ], + [ + 105, + 39, + 0, + 55, + 0, + "VAE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.11678157794253, + "offset": [ + 7.041966347099882, + -19.733042401058505 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/text_to_video_wan22_5B.webp b/wan22/text_to_video_wan22_5B.webp new file mode 100644 index 0000000..eafce22 Binary files /dev/null and b/wan22/text_to_video_wan22_5B.webp differ