diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..93a9024 --- /dev/null +++ b/LICENSE @@ -0,0 +1,10 @@ +Permission to use, copy, modify, and/or distribute these workflows for +any purpose with or without fee is hereby granted. + +THE WORKFLOWS ARE PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL +WARRANTIES WITH REGARD TO THESE WORKFLOWS INCLUDING ALL IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE +FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY +DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN +AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE WORKFLOWS. diff --git a/README.md b/README.md index 3e329e9..b0e721f 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,8 @@ Here are some more advanced examples: [SDXL](sdxl) +[SD3](sd3) + [Model Merging](model_merging) [3d](3d) @@ -42,9 +44,55 @@ Here are some more advanced examples: [SDXL Turbo](sdturbo) -[Video Models](video) +[Stable Cascade](stable_cascade) + +[AuraFlow](aura_flow) + +[HunyuanDiT](hunyuan_dit) + +[Hunyuan Image 2.1](hunyuan_image) + +[Chroma](chroma) + +[Lumina Image 2.0](lumina2) + +[HiDream](hidream) + +[Qwen Image](qwen_image) + +[Flux](flux) + +[Edit/InstructPix2Pix Models](edit_models) + +[Omnigen2](omnigen) + +[Stable Video Diffusion](video) + +[Mochi](mochi) + +[Lightricks LTX-Video](ltxv) + +[Hunyuan Video](hunyuan_video) + +[Nvidia Cosmos](cosmos) + +[Nvidia Cosmos Predict2](cosmos_predict2) + +[Wan 2.1](wan) + +[Wan 2.2](wan22) + +[Audio Models](audio) + +[Hunyuan3D 2.0](https://docs.comfy.org/tutorials/3d/hunyuan3D-2) + +## Documentation + +#### [WIP Documentation](https://docs.comfy.org/) + +#### [WIP LLM Assisted Documentation](https://docs.getsalt.ai/md/) of every node -#### The [Node Guide (WIP)](https://blenderneko.github.io/ComfyUI-docs/) documents what each node does. +The old [Node Guide (WIP)](https://blenderneko.github.io/ComfyUI-docs/) documents what most nodes do. You might also want to check out the: [Frequently Asked Questions](faq) diff --git a/audio/README.md b/audio/README.md new file mode 100644 index 0000000..97caf63 --- /dev/null +++ b/audio/README.md @@ -0,0 +1,24 @@ +# Audio Examples + +## ACE Step Model + +Download the [ace_step_v1_3.5b.safetensors](https://huggingface.co/Comfy-Org/ACE-Step_ComfyUI_repackaged/blob/main/all_in_one/ace_step_v1_3.5b.safetensors) and save it to your ComfyUI/models/checkpoints/ directory. + + +he following flac audio file contains a workflow, you can download it and load it or drag it on the ComfyUI interface + + + + +## Stable Audio Open 1.0 + +Download the [model.safetensors from this page](https://huggingface.co/google-t5/t5-base/blob/main/model.safetensors) and save it as `t5_base.safetensors` to your ComfyUI/models/text_encoders/ directory. + +Download the [model.safetensors from this page](https://huggingface.co/stabilityai/stable-audio-open-1.0/tree/main) and save it as `stable_audio_open_1.0.safetensors` to your ComfyUI/models/checkpoints/ directory. + +The following flac audio file contains a workflow, you can download it and load it or drag it on the ComfyUI interface + + + +[download](stable_audio_example.flac) + diff --git a/audio/ace_step_example.flac b/audio/ace_step_example.flac new file mode 100644 index 0000000..57b2293 Binary files /dev/null and b/audio/ace_step_example.flac differ diff --git a/audio/stable_audio_example.flac b/audio/stable_audio_example.flac new file mode 100644 index 0000000..6aa6948 Binary files /dev/null and b/audio/stable_audio_example.flac differ diff --git a/aura_flow/README.md b/aura_flow/README.md new file mode 100644 index 0000000..89274dc --- /dev/null +++ b/aura_flow/README.md @@ -0,0 +1,18 @@ +# AuraFlow Examples + +AuraFlow is one of the only true open source models with both the code and the weights being under a FOSS license. + +## AuraFlow 0.2 + +Download [aura_flow_0.2.safetensors](https://huggingface.co/fal/AuraFlow-v0.2/blob/main/aura_flow_0.2.safetensors) and put it in your ComfyUI/checkpoints directory. + +You can then load up the following image in ComfyUI to get the workflow: + +![Example](aura_flow_0.2_example.png) + +## AuraFlow 0.1 + +The following is an older example for: [aura_flow_0.1.safetensors](https://huggingface.co/fal/AuraFlow/blob/main/aura_flow_0.1.safetensors). The workflow is the same as the one above but with a different prompt. + +![Example](aura_flow_0.1_example.png) + diff --git a/aura_flow/aura_flow_0.1_example.png b/aura_flow/aura_flow_0.1_example.png new file mode 100644 index 0000000..78a50df Binary files /dev/null and b/aura_flow/aura_flow_0.1_example.png differ diff --git a/aura_flow/aura_flow_0.2_example.png b/aura_flow/aura_flow_0.2_example.png new file mode 100644 index 0000000..bb556c1 Binary files /dev/null and b/aura_flow/aura_flow_0.2_example.png differ diff --git a/chroma/README.md b/chroma/README.md new file mode 100644 index 0000000..49568f9 --- /dev/null +++ b/chroma/README.md @@ -0,0 +1,11 @@ +# Chroma + +This is a model that is modified from [flux](../flux/) and has had some changes in the architecture. + +To use it you will need one of the t5xxl text encoder model files that you can find in: [this repo](https://huggingface.co/comfyanonymous/flux_text_encoders/tree/main), fp16 is recommended, if you don't have that much memory fp8_scaled are recommended. Put it in the ComfyUI/models/text_encoders/ folder. + +You can then download the latest chroma checkpoint from the [official huggingface page](https://huggingface.co/lodestones/Chroma1-HD), It goes in the ComfyUI/models/diffusion_models/ folder. + +Load or drag this image on ComfyUI to get the example workflow: + +![Example](chroma_example.png) diff --git a/chroma/chroma_example.png b/chroma/chroma_example.png new file mode 100644 index 0000000..77ff321 Binary files /dev/null and b/chroma/chroma_example.png differ diff --git a/chroma/fennec_girl_flowers.png b/chroma/fennec_girl_flowers.png new file mode 100644 index 0000000..5f95987 Binary files /dev/null and b/chroma/fennec_girl_flowers.png differ diff --git a/chroma/fennec_girl_hug.png b/chroma/fennec_girl_hug.png new file mode 100644 index 0000000..76c091c Binary files /dev/null and b/chroma/fennec_girl_hug.png differ diff --git a/chroma/fennec_girl_sing.png b/chroma/fennec_girl_sing.png new file mode 100644 index 0000000..a308c4a Binary files /dev/null and b/chroma/fennec_girl_sing.png differ diff --git a/cosmos/README.md b/cosmos/README.md new file mode 100644 index 0000000..11e343b --- /dev/null +++ b/cosmos/README.md @@ -0,0 +1,52 @@ +# Original Nvidia Cosmos Models + +For the newer Cosmos models see [Cosmos Predict2](../cosmos_predict2) + +[Nvidia Cosmos](https://www.nvidia.com/en-us/ai/cosmos/) is a family of "World Models". ComfyUI currently supports specifically the 7B and 14B text to video diffusion models and the 7B and 14B image to video diffusion models. + +## Files to Download + +You will first need: + +#### Text encoder and VAE: + +[oldt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/comfyanonymous/cosmos_1.0_text_encoder_and_VAE_ComfyUI/tree/main/text_encoders) goes in: ComfyUI/models/text_encoders/ + +[cosmos_cv8x8x8_1.0.safetensors](https://huggingface.co/comfyanonymous/cosmos_1.0_text_encoder_and_VAE_ComfyUI/blob/main/vae/cosmos_cv8x8x8_1.0.safetensors) goes in: ComfyUI/models/vae/ + +Note: oldt5_xxl is not the same as the t5xxl used in flux and other models. +oldt5_xxl is t5xxl 1.0 while the one used in flux and others is t5xxl 1.1 + +#### Video Models + +The video models can be found [in safetensors format here.](https://huggingface.co/mcmonkey/cosmos-1.0/tree/main) + +The workflows on this page use [Cosmos-1_0-Diffusion-7B-Text2World.safetensors](https://huggingface.co/mcmonkey/cosmos-1.0/blob/main/Cosmos-1_0-Diffusion-7B-Text2World.safetensors) and [Cosmos-1_0-Diffusion-7B-Video2World.safetensors](https://huggingface.co/mcmonkey/cosmos-1.0/blob/main/Cosmos-1_0-Diffusion-7B-Video2World.safetensors) + +These files go in: ComfyUI/models/diffusion_models + +Note: "Text to World" means Text to video and "Video to World" means image/video to video. + +If you want the original diffusion models in .pt format instead of the repacked safetensors the official links are: [7B-Text2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-7B-Text2World) [7B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-7B-Video2World) [14B-Text2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-14B-Text2World) [14B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-14B-Video2World) + +## Workflows + +### Text to Video + +This workflow requires the 7B text to video model that you can download above. + +![Example](text_to_video_cosmos_7B.webp) + +[Workflow in Json format](text_to_video_cosmos_7B.json) + +### Image to Video + +This model supports generating a video from 1 or more images. If more than one image is fed it will use them all as a guide and continue the motion. You can also do basic interpolation by setting one or more start_image and end_image which works best if those images are similar to each other. + +This workflow requires the 7B image to video model that you can download above. + +This model is trained primarily on realistic videos but in this example you can see that it also works decently on anime. + +![Example](image_to_video_cosmos_7B.webp) + +[Workflow in Json format](image_to_video_cosmos_7B.json) diff --git a/cosmos/image_to_video_cosmos_7B.json b/cosmos/image_to_video_cosmos_7B.json new file mode 100644 index 0000000..f04c2d0 --- /dev/null +++ b/cosmos/image_to_video_cosmos_7B.json @@ -0,0 +1,729 @@ +{ + "last_node_id": 84, + "last_link_id": 198, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -332.08502197265625, + 231.04571533203125 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 75, + 99 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "oldt5_xxl_fp8_e4m3fn_scaled.safetensors", + "cosmos", + "default" + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1270.6927490234375, + 120.51702117919922 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 87 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 172, + 181 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 61, + "type": "ModelSamplingContinuousEDM", + "pos": [ + 410, + -20 + ], + "size": [ + 327.5999755859375, + 106 + ], + "flags": {}, + "order": 9, + "mode": 4, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 157 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 194 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingContinuousEDM" + }, + "widgets_values": [ + "edm", + 80, + 0.002 + ] + }, + { + "id": 74, + "type": "LTXVConditioning", + "pos": [ + 540, + 150 + ], + "size": [ + 210, + 78 + ], + "flags": {}, + "order": 11, + "mode": 4, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 185 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 186 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 187 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 188 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 80, + "type": "Note", + "pos": [ + 475.15997314453125, + -163.2658233642578 + ], + "size": [ + 266.2419128417969, + 99.78375244140625 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "These pink nodes are \"bypassed\" meaning they don't do anything. To unbypass them: right click -> bypass\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 82, + "type": "Note", + "pos": [ + -1.2908354997634888, + 565.2498168945312 + ], + "size": [ + 312.01824951171875, + 126.14599609375 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "The positive and negative prompts should be long. Short prompts will still generate a coherent video however it might not follow the prompt very well." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 0, + 330 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 186 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality." + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 69, + "type": "SaveAnimatedWEBP", + "pos": [ + 1520, + 120 + ], + "size": [ + 763.5289916992188, + 578.3422241210938 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 172 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 80, + "default" + ] + }, + { + "id": 73, + "type": "SaveAnimatedPNG", + "pos": [ + 2299.842041015625, + 121.30572509765625 + ], + "size": [ + 720.1341552734375, + 829.0499877929688 + ], + "flags": {}, + "order": 15, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 181 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveAnimatedPNG" + }, + "widgets_values": [ + "ComfyUI", + 24, + 4 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 843.46337890625, + 122.69183349609375 + ], + "size": [ + 385.8114318847656, + 262 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 194 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 187 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 188 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 196 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 87 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 959521281192720, + "randomize", + 20, + 6.5, + "res_multistep", + "karras", + 1 + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 950.17138671875, + 453.1830749511719 + ], + "size": [ + 278.68310546875, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 76, + 195 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "cosmos_cv8x8x8_1.0.safetensors" + ] + }, + { + "id": 81, + "type": "Note", + "pos": [ + 480, + 730 + ], + "size": [ + 332.6131591796875, + 168.23121643066406 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model loves a length of 121 and anything too far away from this will result in a bad video.\n\nThe width and height should be equal or bigger to 704\n\nYou can set a start_image, end_image or both at the same time." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 7.319890975952148, + -20.895429611206055 + ], + "size": [ + 380, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 157 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "Cosmos-1_0-Diffusion-7B-Video2World.safetensors", + "default" + ] + }, + { + "id": 84, + "type": "LoadImage", + "pos": [ + -6.688927173614502, + 743.3736572265625 + ], + "size": [ + 416.1836242675781, + 366.83038330078125 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 198 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "ComfyUI_256804_.png", + "image" + ] + }, + { + "id": 42, + "type": "CLIPTextEncode", + "pos": [ + 0, + 120 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 99 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 185 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "cute anime girl with massive fennec fox ears and a big fluffy tail long blonde wavy hair blue eyes wearing a pink plaid sweater and a red scarf with a oversized black open coat with a golden circuit board pattern and a long blue maxi skirt and large black boots standing in the beautiful outdoors snow with amazing view mountains forest sky clouds beautiful sunset evening colorful horizon, she is smiling as the evening turns into night\n\n" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 83, + "type": "CosmosImageToVideoLatent", + "pos": [ + 480, + 490 + ], + "size": [ + 315, + 170 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 195 + }, + { + "name": "start_image", + "type": "IMAGE", + "link": 198, + "shape": 7 + }, + { + "name": "end_image", + "type": "IMAGE", + "link": null, + "shape": 7 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 196 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CosmosImageToVideoLatent" + }, + "widgets_values": [ + 1024, + 1024, + 121, + 1 + ] + } + ], + "links": [ + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 87, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 99, + 38, + 0, + 42, + 0, + "CLIP" + ], + [ + 157, + 37, + 0, + 61, + 0, + "MODEL" + ], + [ + 172, + 8, + 0, + 69, + 0, + "IMAGE" + ], + [ + 181, + 8, + 0, + 73, + 0, + "IMAGE" + ], + [ + 185, + 42, + 0, + 74, + 0, + "CONDITIONING" + ], + [ + 186, + 7, + 0, + 74, + 1, + "CONDITIONING" + ], + [ + 187, + 74, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 188, + 74, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 194, + 61, + 0, + 3, + 0, + "MODEL" + ], + [ + 195, + 39, + 0, + 83, + 0, + "VAE" + ], + [ + 196, + 83, + 0, + 3, + 3, + "LATENT" + ], + [ + 198, + 84, + 0, + 83, + 1, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} \ No newline at end of file diff --git a/cosmos/image_to_video_cosmos_7B.webp b/cosmos/image_to_video_cosmos_7B.webp new file mode 100644 index 0000000..875ba1b Binary files /dev/null and b/cosmos/image_to_video_cosmos_7B.webp differ diff --git a/cosmos/text_to_video_cosmos_7B.json b/cosmos/text_to_video_cosmos_7B.json new file mode 100644 index 0000000..48e435e --- /dev/null +++ b/cosmos/text_to_video_cosmos_7B.json @@ -0,0 +1,663 @@ +{ + "last_node_id": 82, + "last_link_id": 194, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -332.08502197265625, + 231.04571533203125 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 75, + 99 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "oldt5_xxl_fp8_e4m3fn_scaled.safetensors", + "cosmos", + "default" + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1270.6927490234375, + 120.51702117919922 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 87 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 172, + 181 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 61, + "type": "ModelSamplingContinuousEDM", + "pos": [ + 410, + -20 + ], + "size": [ + 327.5999755859375, + 106 + ], + "flags": {}, + "order": 9, + "mode": 4, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 157 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 194 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingContinuousEDM" + }, + "widgets_values": [ + "edm", + 80, + 0.002 + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 950.17138671875, + 453.1830749511719 + ], + "size": [ + 278.68310546875, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 76 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "cosmos_cv8x8x8_1.0.safetensors" + ] + }, + { + "id": 78, + "type": "EmptyCosmosLatentVideo", + "pos": [ + 473.05047607421875, + 380.00341796875 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 193 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyCosmosLatentVideo" + }, + "widgets_values": [ + 1280, + 704, + 121, + 1 + ] + }, + { + "id": 74, + "type": "LTXVConditioning", + "pos": [ + 540, + 150 + ], + "size": [ + 210, + 78 + ], + "flags": {}, + "order": 10, + "mode": 4, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 185 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 186 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 187 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 188 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 80, + "type": "Note", + "pos": [ + 475.15997314453125, + -163.2658233642578 + ], + "size": [ + 266.2419128417969, + 99.78375244140625 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "These pink nodes are \"bypassed\" meaning they don't do anything. To unbypass them: right click -> bypass\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 70, + -20 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 157 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "Cosmos-1_0-Diffusion-7B-Text2World.safetensors", + "default" + ] + }, + { + "id": 81, + "type": "Note", + "pos": [ + 475.4506530761719, + 570.9951782226562 + ], + "size": [ + 312.01824951171875, + 126.14599609375 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model loves a length of 121 and anything too far away from this will result in a bad video.\n\nThe width and height should be equal or bigger to 704" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 82, + "type": "Note", + "pos": [ + -1.2908354997634888, + 565.2498168945312 + ], + "size": [ + 312.01824951171875, + 126.14599609375 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "The positive and negative prompts should be long. Short prompts will still generate a coherent video however it might not follow the prompt very well." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 42, + "type": "CLIPTextEncode", + "pos": [ + 0, + 120 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 99 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 185 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A crystalline waterfall stands partially frozen, its edges draped with translucent ice that catches the sunlight in prisms of blue and silver. Below, a half-frozen pool spreads out, bordered by delicate ice formations. Through the fresh snow, a red fox moves gracefully, its russet coat vibrant against the white landscape, leaving perfect star-shaped prints behind as steam rises from its breath in the crisp winter air. The scene is wrapped in snow-muffled silence, broken only by the gentle murmur of water still flowing beneath the ice.\n\n" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 0, + 330 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 186 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality." + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 69, + "type": "SaveAnimatedWEBP", + "pos": [ + 1520, + 120 + ], + "size": [ + 763.5289916992188, + 578.3422241210938 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 172 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 80, + "default" + ] + }, + { + "id": 73, + "type": "SaveAnimatedPNG", + "pos": [ + 2299.842041015625, + 121.30572509765625 + ], + "size": [ + 720.1341552734375, + 829.0499877929688 + ], + "flags": {}, + "order": 14, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 181 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveAnimatedPNG" + }, + "widgets_values": [ + "ComfyUI", + 24, + 4 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 843.46337890625, + 122.69183349609375 + ], + "size": [ + 385.8114318847656, + 262 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 194 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 187 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 188 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 193 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 87 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 959521281192720, + "randomize", + 20, + 6.5, + "res_multistep", + "karras", + 1 + ] + } + ], + "links": [ + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 87, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 99, + 38, + 0, + 42, + 0, + "CLIP" + ], + [ + 157, + 37, + 0, + 61, + 0, + "MODEL" + ], + [ + 172, + 8, + 0, + 69, + 0, + "IMAGE" + ], + [ + 181, + 8, + 0, + 73, + 0, + "IMAGE" + ], + [ + 185, + 42, + 0, + 74, + 0, + "CONDITIONING" + ], + [ + 186, + 7, + 0, + 74, + 1, + "CONDITIONING" + ], + [ + 187, + 74, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 188, + 74, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 193, + 78, + 0, + 3, + 3, + "LATENT" + ], + [ + 194, + 61, + 0, + 3, + 0, + "MODEL" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.3513057093105383, + "offset": [ + 347.8132028514172, + 200.3286418889474 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/cosmos/text_to_video_cosmos_7B.webp b/cosmos/text_to_video_cosmos_7B.webp new file mode 100644 index 0000000..18530d1 Binary files /dev/null and b/cosmos/text_to_video_cosmos_7B.webp differ diff --git a/cosmos_predict2/README.md b/cosmos_predict2/README.md new file mode 100644 index 0000000..bf77457 --- /dev/null +++ b/cosmos_predict2/README.md @@ -0,0 +1,46 @@ +# Nvidia Cosmos Predict2 + +These are a family of text to image and image to video models from Nvidia. + +## Files to Download + +You will first need: + +#### Text encoder and VAE: + +[oldt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/comfyanonymous/cosmos_1.0_text_encoder_and_VAE_ComfyUI/tree/main/text_encoders) goes in: ComfyUI/models/text_encoders/ + +[wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) goes in: ComfyUI/models/vae/ + + +Note: oldt5_xxl is not the same as the t5xxl used in flux and other models. +oldt5_xxl is t5xxl 1.0 while the one used in flux and others is t5xxl 1.1 + + +You can find all the diffusion models (go in ComfyUI/models/diffusion_models/) here: [Repackaged safetensors files](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/tree/main) or [Official Nvidia Model Files](https://huggingface.co/collections/nvidia/cosmos-predict2-68028efc052239369a0f2959) + + +## Workflows + +### Text to Image + +This workflow uses the 2B text to image cosmos predict2 model. The file used in the workflow is [cosmos_predict2_2B_t2i.safetensors](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/blob/main/cosmos_predict2_2B_t2i.safetensors) this file goes in: ComfyUI/models/diffusion_models/ + +![Example](cosmos_predict2_2b_t2i_example.png) + +You can load this image in [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the full workflow. + +I think the 2B model is the most interesting one but you can find the bigger 14B model here: [cosmos_predict2_14B_t2i.safetensors](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/blob/main/cosmos_predict2_14B_t2i.safetensors) and use it in the workflow above. + + +### Image to Video + +These models are pretty picky about the resolution/length of the videos. This workflow is for the 480p models, for the 720p models you will have to set the resolution to 720p or your results might be bad. + +This workflow uses the 2B image to video cosmos predict2 model. The file used in the workflow is [cosmos_predict2_2B_video2world_480p_16fps.safetensors](https://huggingface.co/Comfy-Org/Cosmos_Predict2_repackaged/blob/main/cosmos_predict2_2B_video2world_480p_16fps.safetensors) this file goes in: ComfyUI/models/diffusion_models/ + +![Example](cosmos_predict2_2b_i2v_example.webp) + +[Workflow in Json format](cosmos_predict2_2b_i2v_example.json) + + diff --git a/cosmos_predict2/cosmos_predict2_2b_i2v_example.json b/cosmos_predict2/cosmos_predict2_2b_i2v_example.json new file mode 100644 index 0000000..f7d8bdd --- /dev/null +++ b/cosmos_predict2/cosmos_predict2_2b_i2v_example.json @@ -0,0 +1,548 @@ +{ + "id": "242a6140-7341-49ca-876b-c01366b39b84", + "revision": 0, + "last_node_id": 31, + "last_link_id": 46, + "nodes": [ + { + "id": 10, + "type": "CLIPLoader", + "pos": [ + 0, + 250 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 34, + 35 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "oldt5_xxl_fp8_e4m3fn_scaled.safetensors", + "cosmos", + "default" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 870, + 180 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 33 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 4 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 42 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 7 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 788533681999051, + "randomize", + 30, + 4, + "euler", + "simple", + 1 + ] + }, + { + "id": 15, + "type": "VAELoader", + "pos": [ + 80, + 400 + ], + "size": [ + 300, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 17, + 43 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 28, + "type": "CosmosPredict2ImageToVideoLatent", + "pos": [ + 499.99761962890625, + 616.21435546875 + ], + "size": [ + 330.7769470214844, + 170 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 43 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 44 + }, + { + "name": "end_image", + "shape": 7, + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 42 + ] + } + ], + "properties": { + "Node name for S&R": "CosmosPredict2ImageToVideoLatent" + }, + "widgets_values": [ + 848, + 480, + 93, + 1 + ] + }, + { + "id": 29, + "type": "LoadImage", + "pos": [ + 85.3239517211914, + 633.9439697265625 + ], + "size": [ + 274.080078125, + 314 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 44 + ] + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "mountains.png", + "image" + ] + }, + { + "id": 13, + "type": "UNETLoader", + "pos": [ + 0, + 120 + ], + "size": [ + 410, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 33 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "cosmos_predict2_2B_video2world_480p_16fps.safetensors", + "default" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 390 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 34 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 6 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1200, + 180 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 7 + }, + { + "name": "vae", + "type": "VAE", + "link": 17 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 45, + 46 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 31, + "type": "SaveWEBM", + "pos": [ + 1890, + 190 + ], + "size": [ + 270, + 274.8302001953125 + ], + "flags": {}, + "order": 10, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 46 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 24 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 420, + 180 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 35 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 4 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "camera moving quickly through the scene timelapse wind" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 30, + "type": "SaveAnimatedWEBP", + "pos": [ + 1440, + 180 + ], + "size": [ + 270, + 366 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 45 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16.000000000000004, + false, + 85, + "default" + ] + } + ], + "links": [ + [ + 4, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 6, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 7, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 17, + 15, + 0, + 8, + 1, + "VAE" + ], + [ + 33, + 13, + 0, + 3, + 0, + "MODEL" + ], + [ + 34, + 10, + 0, + 7, + 0, + "CLIP" + ], + [ + 35, + 10, + 0, + 6, + 0, + "CLIP" + ], + [ + 42, + 28, + 0, + 3, + 3, + "LATENT" + ], + [ + 43, + 15, + 0, + 28, + 0, + "VAE" + ], + [ + 44, + 29, + 0, + 28, + 1, + "IMAGE" + ], + [ + 45, + 8, + 0, + 30, + 0, + "IMAGE" + ], + [ + 46, + 8, + 0, + 31, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.8390545288824265, + "offset": [ + 61.29088261146557, + 145.33443300197447 + ] + }, + "frontendVersion": "1.21.7" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/cosmos_predict2/cosmos_predict2_2b_i2v_example.webp b/cosmos_predict2/cosmos_predict2_2b_i2v_example.webp new file mode 100644 index 0000000..29d98f8 Binary files /dev/null and b/cosmos_predict2/cosmos_predict2_2b_i2v_example.webp differ diff --git a/cosmos_predict2/cosmos_predict2_2b_t2i_example.png b/cosmos_predict2/cosmos_predict2_2b_t2i_example.png new file mode 100644 index 0000000..50cd0a5 Binary files /dev/null and b/cosmos_predict2/cosmos_predict2_2b_t2i_example.png differ diff --git a/edit_models/README.md b/edit_models/README.md new file mode 100644 index 0000000..65740b6 --- /dev/null +++ b/edit_models/README.md @@ -0,0 +1,11 @@ +# Image Edit Model Examples + +Edit models also called InstructPix2Pix models are models that can be used to edit images using a text prompt. + +Here is the workflow for the stability SDXL edit model, the checkpoint can be downloaded from: [here](https://huggingface.co/stabilityai/cosxl). To use it download the cosxl_edit.safetensors file and put it in the ComfyUI/models/checkpoints folder. + +![Example](sdxl_edit_model.png) + +You can download the above image and then drag or load them on ComfyUI to get the workflow embedded in the image. + +The input image used in the above example can be found [here](../unclip/mountains.png). diff --git a/edit_models/sdxl_edit_model.png b/edit_models/sdxl_edit_model.png new file mode 100644 index 0000000..24ea553 Binary files /dev/null and b/edit_models/sdxl_edit_model.png differ diff --git a/faq/README.md b/faq/README.md index d3b1b2d..6b1a182 100644 --- a/faq/README.md +++ b/faq/README.md @@ -19,7 +19,7 @@ There are also many other differences but these two are the ones that have most ## Why do I get incoherent images with some checkpoints that are less than 1.9GB? -Some rare checkpoints like ProtoGen_X3.4 don't come with CLIP weights. The CLIPLoader node in ComfyUI can be used to load CLIP model weights like [these SD1.5 ones](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/text_encoder/model.safetensors). +Some rare checkpoints like ProtoGen_X3.4 don't come with CLIP weights. The CLIPLoader node in ComfyUI can be used to load CLIP model weights like [these CLIP L ones that can be used on SD1.5](https://huggingface.co/comfyanonymous/flux_text_encoders/blob/main/clip_l.safetensors). ## What is the difference between strength_model and strength_clip in the "Load LoRA" node? diff --git a/flux/README.md b/flux/README.md new file mode 100644 index 0000000..621e5cd --- /dev/null +++ b/flux/README.md @@ -0,0 +1,132 @@ +# Flux Examples + +Flux is a family of diffusion models by [black forest labs](https://blackforestlabs.ai/announcing-black-forest-labs/) + +For the easy to use single file versions that you can easily use in [ComfyUI](https://github.com/comfyanonymous/ComfyUI) see below: [FP8 Checkpoint Version](#simple-to-use-fp8-checkpoint-version) + +## Regular Full Version + +### Files to download for the regular version + +If you don't have t5xxl_fp16.safetensors or clip_l.safetensors already in your ComfyUI/models/text_encoders/ directory you can find them on: [this link.](https://huggingface.co/comfyanonymous/flux_text_encoders/tree/main) You can use t5xxl_fp8_e4m3fn_scaled.safetensors instead for lower memory usage but the fp16 one is recommended if you have more than 32GB ram. + +The VAE can be found [here](https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/blob/main/split_files/vae/ae.safetensors) and should go in your ComfyUI/models/vae/ folder. + +### Tips if you are running out of memory: + +Use the single file fp8 version that you can find by looking [Below](#simple-to-use-fp8-checkpoint-version) + +You can set the weight_dtype in the "Load Diffusion Model" node to fp8 which will lower the memory usage by half but might reduce quality a tiny bit. You can also download the example. + +### Flux Dev + +You can find the Flux Dev diffusion model weights [here](https://huggingface.co/black-forest-labs/FLUX.1-dev). Put the flux1-dev.safetensors file in your: ComfyUI/models/diffusion_models/ folder. + +You can then load or drag the following image in ComfyUI to get the workflow: + +![Example](flux_dev_example.png) + +### Flux Schnell + +Flux Schnell is a distilled 4 step model. + +You can find the Flux Schnell diffusion model weights [here](https://huggingface.co/black-forest-labs/FLUX.1-schnell) the flux1-schnell.safetensors file should go in your: ComfyUI/models/unet/ folder. + + +You can then load or drag the following image in ComfyUI to get the workflow: + +![Example](flux_schnell_example.png) + + +## Simple to use FP8 Checkpoint version + +### Flux Dev + +You can find an easy to use checkpoint for the Flux dev [here](https://huggingface.co/Comfy-Org/flux1-dev/blob/main/flux1-dev-fp8.safetensors) that you can put in your: ComfyUI/models/checkpoints/ directory. + +This file can be loaded with the regular "Load Checkpoint" node. Make sure you set CFG to 1.0 when using it. + +Note that fp8 degrades the quality a bit so if you have the resources the official full 16 bit version is recommended. + +You can then load or drag the following image in ComfyUI to get the workflow: + +![Example](flux_dev_checkpoint_example.png) + +### Flux Schnell + +For Flux schnell you can get the checkpoint [here](https://huggingface.co/Comfy-Org/flux1-schnell/blob/main/flux1-schnell-fp8.safetensors) that you can put in your: ComfyUI/models/checkpoints/ directory. + +You can then load or drag the following image in ComfyUI to get the workflow: + +![Example](flux_schnell_checkpoint_example.png) + +## Flux Extras + +The following examples might require that you have some of the regular flux files that you can find links to at the top of this page. + +### Flux Kontext (image editing) model + +Download the [flux1-kontext-dev.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev) model file and put it in your ComfyUI/models/diffusion_models/ folder. There is an alternative fp8 model here: [flux1-dev-kontext_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/flux1-kontext-dev_ComfyUI/blob/main/split_files/diffusion_models/flux1-dev-kontext_fp8_scaled.safetensors) if the other one is too large for you. + +Here's a simple example. You can load or drag the following image in ComfyUI to get the workflow: + +![Example](flux_kontext_example.png) + + +You can find the input image for the above workflow [here](../chroma/fennec_girl_sing.png) + +Here's another more complex example that generates a comic from the above input image: + +![Example](flux_kontext_example_comic.webp) + +### Fill (Inpainting) model + +Download the [flux1-fill-dev.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev) model file and put it in your ComfyUI/models/diffusion_models/ folder. + +Here is an example you can drag in ComfyUI for inpainting, a reminder that you can right click images in the "Load Image" node and "Open in MaskEditor". + +![Example](flux_fill_inpaint_example.png) + +Here is an example for outpainting: + +![Example](flux_fill_outpaint_example.png) + + +### Redux + +The Redux model is a model that can be used to prompt flux dev or flux schnell with one or more images. + +Download the [sigclip_vision_patch14_384.safetensors](https://huggingface.co/Comfy-Org/sigclip_vision_384/blob/main/sigclip_vision_patch14_384.safetensors) model and put it in your ComfyUI/models/clip_vision folder and download the [flux1-redux-dev.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev) and put it in your ComfyUI/models/style_models folder. + +You can then load or drag the following image in ComfyUI to get the workflow: + +![Example](flux_redux_model_example.png) + +### Canny and Depth + +They are published in two versions, full model format: [flux1-canny-dev.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev) and [flux1-depth-dev.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev), put them in your ComfyUI/models/diffusion_models/ folder + +Here is an example for the full canny model: + +![Example](flux_canny_model_example.png) + +They are also published in lora format that can be applied to the flux dev model: [flux1-canny-dev-lora.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev-lora) and [flux1-depth-dev-lora.safetensors](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev-lora), put them in your ComfyUI/models/loras/ folder + +Here is an example for the depth lora. + +![Example](flux_depth_lora_example.png) + + +### Community Flux Controlnets + +XLab and InstantX + Shakker Labs have released Controlnets for Flux. You can find the InstantX Canny model file [here](https://huggingface.co/InstantX/FLUX.1-dev-Controlnet-Canny/blob/main/diffusion_pytorch_model.safetensors) (rename to instantx_flux_canny.safetensors for the example below), the Depth controlnet [here](https://huggingface.co/Shakker-Labs/FLUX.1-dev-ControlNet-Depth/blob/main/diffusion_pytorch_model.safetensors) and the Union Controlnet [here](https://huggingface.co/Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro/blob/main/diffusion_pytorch_model.safetensors). + +The XLab controlnets can be found here [here](https://huggingface.co/XLabs-AI/flux-controlnet-collections). + +Put these files under `ComfyUI/models/controlnet` directory. + +Try an example Canny Controlnet workflow by dragging in this image into ComfyUI. + +![Example](flux_controlnet_example.png) + +If you need an example input image for the canny, use [this](girl_in_field.png). Put it under `ComfyUI/input`. diff --git a/flux/flux_canny_model_example.png b/flux/flux_canny_model_example.png new file mode 100644 index 0000000..1b3da7b Binary files /dev/null and b/flux/flux_canny_model_example.png differ diff --git a/flux/flux_controlnet_example.png b/flux/flux_controlnet_example.png new file mode 100644 index 0000000..2872ecf Binary files /dev/null and b/flux/flux_controlnet_example.png differ diff --git a/flux/flux_depth_lora_example.png b/flux/flux_depth_lora_example.png new file mode 100644 index 0000000..241f2a1 Binary files /dev/null and b/flux/flux_depth_lora_example.png differ diff --git a/flux/flux_dev_checkpoint_example.png b/flux/flux_dev_checkpoint_example.png new file mode 100644 index 0000000..8c2131f Binary files /dev/null and b/flux/flux_dev_checkpoint_example.png differ diff --git a/flux/flux_dev_example.png b/flux/flux_dev_example.png new file mode 100644 index 0000000..0369432 Binary files /dev/null and b/flux/flux_dev_example.png differ diff --git a/flux/flux_fill_inpaint_example.png b/flux/flux_fill_inpaint_example.png new file mode 100644 index 0000000..1fc8d9b Binary files /dev/null and b/flux/flux_fill_inpaint_example.png differ diff --git a/flux/flux_fill_outpaint_example.png b/flux/flux_fill_outpaint_example.png new file mode 100644 index 0000000..fb5d79b Binary files /dev/null and b/flux/flux_fill_outpaint_example.png differ diff --git a/flux/flux_kontext_example.png b/flux/flux_kontext_example.png new file mode 100644 index 0000000..3dea15b Binary files /dev/null and b/flux/flux_kontext_example.png differ diff --git a/flux/flux_kontext_example_comic.webp b/flux/flux_kontext_example_comic.webp new file mode 100644 index 0000000..6586fdd Binary files /dev/null and b/flux/flux_kontext_example_comic.webp differ diff --git a/flux/flux_redux_model_example.png b/flux/flux_redux_model_example.png new file mode 100644 index 0000000..88985fd Binary files /dev/null and b/flux/flux_redux_model_example.png differ diff --git a/flux/flux_schnell_checkpoint_example.png b/flux/flux_schnell_checkpoint_example.png new file mode 100644 index 0000000..cecb1bf Binary files /dev/null and b/flux/flux_schnell_checkpoint_example.png differ diff --git a/flux/flux_schnell_example.png b/flux/flux_schnell_example.png new file mode 100644 index 0000000..d397f4b Binary files /dev/null and b/flux/flux_schnell_example.png differ diff --git a/flux/girl_in_field.png b/flux/girl_in_field.png new file mode 100644 index 0000000..eb16186 Binary files /dev/null and b/flux/girl_in_field.png differ diff --git a/hidream/README.md b/hidream/README.md new file mode 100644 index 0000000..cb1ffc6 --- /dev/null +++ b/hidream/README.md @@ -0,0 +1,56 @@ +# HiDream + +[HiDream I1](https://github.com/HiDream-ai/HiDream-I1) is a state of the art image diffusion model. + +## Files to Download + +Download the text encoder files: + +* [clip_l_hidream.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/text_encoders/clip_l_hidream.safetensors) +* [clip_g_hidream.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/text_encoders/clip_g_hidream.safetensors) +* [t5xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/text_encoders/t5xxl_fp8_e4m3fn_scaled.safetensors) +* [llama_3.1_8b_instruct_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/text_encoders/llama_3.1_8b_instruct_fp8_scaled.safetensors) + +Put these 4 files in your ComfyUI/models/text_encoders directory. + +You can find them all: [here](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/tree/main/split_files/text_encoders). You might already have t5xxl downloaded. + +The VAE can be found [here](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/vae/ae.safetensors) and should go in your ComfyUI/models/vae/ folder. This is the Flux VAE so you might already have it. + +The diffusion models can be found in this [folder](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/tree/main/split_files/diffusion_models) + +## HiDream dev Workflow + +Download [hidream_i1_dev_bf16.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/diffusion_models/hidream_i1_dev_bf16.safetensors) and put it in your ComfyUI/models/diffusion_models/ directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hidream_dev_example.png) + +## HiDream full Workflow + +Download [hidream_i1_full_fp16.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/diffusion_models/hidream_i1_full_fp16.safetensors) and put it in your ComfyUI/models/diffusion_models/ directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hidream_full_example.png) + +## HiDream e1.1 + +This is an edit model, download [hidream_e1_1_bf16.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/diffusion_models/hidream_e1_1_bf16.safetensors) and put it in your ComfyUI/models/diffusion_models/ directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hidream_e1.1_example.png) + + +
+Old hidream 1.0 edit model. +## HiDream e1 + +This is an experimental edit model, download [hidream_e1_full_bf16.safetensors](https://huggingface.co/Comfy-Org/HiDream-I1_ComfyUI/blob/main/split_files/diffusion_models/hidream_e1_full_bf16.safetensors) and put it in your ComfyUI/models/diffusion_models/ directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hidream_e1_example.png) +
diff --git a/hidream/hidream_dev_example.png b/hidream/hidream_dev_example.png new file mode 100644 index 0000000..db79354 Binary files /dev/null and b/hidream/hidream_dev_example.png differ diff --git a/hidream/hidream_e1.1_example.png b/hidream/hidream_e1.1_example.png new file mode 100644 index 0000000..ebc62c6 Binary files /dev/null and b/hidream/hidream_e1.1_example.png differ diff --git a/hidream/hidream_e1_example.png b/hidream/hidream_e1_example.png new file mode 100644 index 0000000..e595e22 Binary files /dev/null and b/hidream/hidream_e1_example.png differ diff --git a/hidream/hidream_full_example.png b/hidream/hidream_full_example.png new file mode 100644 index 0000000..224f4d3 Binary files /dev/null and b/hidream/hidream_full_example.png differ diff --git a/hunyuan_dit/README.md b/hunyuan_dit/README.md new file mode 100644 index 0000000..17589fe --- /dev/null +++ b/hunyuan_dit/README.md @@ -0,0 +1,12 @@ +# Hunyuan DiT Examples + +Hunyuan DiT is a diffusion model that understands both english and chinese. + +## Hunyuan DiT 1.2 + +Download [hunyuan_dit_1.2.safetensors](https://huggingface.co/comfyanonymous/hunyuan_dit_comfyui/blob/main/hunyuan_dit_1.2.safetensors) and put it in your ComfyUI/models/checkpoints directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hunyuan_dit_1.2_example.png) + diff --git a/hunyuan_dit/hunyuan_dit_1.2_example.png b/hunyuan_dit/hunyuan_dit_1.2_example.png new file mode 100644 index 0000000..5033622 Binary files /dev/null and b/hunyuan_dit/hunyuan_dit_1.2_example.png differ diff --git a/hunyuan_image/README.md b/hunyuan_image/README.md new file mode 100644 index 0000000..3b51cfe --- /dev/null +++ b/hunyuan_image/README.md @@ -0,0 +1,26 @@ +# Hunyuan Image 2.1 + +[Hunyuan Image 2.1](https://huggingface.co/tencent/HunyuanImage-2.1) is a powerful diffusion model for image generation. + +## Basic Workflow + +Download the following models and place them in the appropriate ComfyUI directories: + +### Text Encoders +Download and put in your ComfyUI/models/text_encoders directory: +- [byt5_small_glyphxl_fp16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/text_encoders/byt5_small_glyphxl_fp16.safetensors) +- [qwen_2.5_vl_7b.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/text_encoders/qwen_2.5_vl_7b.safetensors) + +### VAE Models +Download and put in your ComfyUI/models/vae directory: +- [hunyuan_image_2.1_vae_fp16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/vae/hunyuan_image_2.1_vae_fp16.safetensors) +- **Optional (for refiner):** [hunyuan_image_refiner_vae_fp16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/vae/hunyuan_image_refiner_vae_fp16.safetensors) + +### Diffusion Models +Download and put in your ComfyUI/models/diffusion_models directory: +- [hunyuanimage2.1_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/diffusion_models/hunyuanimage2.1_bf16.safetensors) +- **Optional (for refiner):** [hunyuanimage2.1_refiner_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanImage_2.1_ComfyUI/blob/main/split_files/diffusion_models/hunyuanimage2.1_refiner_bf16.safetensors) + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](hunyuan_image_example.png) diff --git a/hunyuan_image/hunyuan_image_example.png b/hunyuan_image/hunyuan_image_example.png new file mode 100644 index 0000000..3dcfbbc Binary files /dev/null and b/hunyuan_image/hunyuan_image_example.png differ diff --git a/hunyuan_video/README.md b/hunyuan_video/README.md new file mode 100644 index 0000000..0f936c1 --- /dev/null +++ b/hunyuan_video/README.md @@ -0,0 +1,50 @@ +# Hunyuan Video Model + +[Hunyuan Video](https://huggingface.co/tencent/HunyuanVideo) is a text to video model. + + +Download the clip_l.safetensors and llava_llama3_fp8_scaled.safetensors files from [here](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/text_encoders) and put them in your ComfyUI/models/text_encoders directory. + +Download the [hunyuan_video_vae_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/vae) file and put it in your ComfyUI/models/vae folder. + +### Text to Video + +Download the [hunyuan_video_t2v_720p_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/diffusion_models) file and put it in your ComfyUI/models/diffusion_models folder. + +This model can also generate still images by setting the video length to 1. + +![Example](hunyuan_video_text_to_video.webp) + +[Workflow in Json format](hunyuan_video_text_to_video.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. + +### Image to Video + +Download the [llava_llama3_vision.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/blob/main/split_files/clip_vision/llava_llama3_vision.safetensors) file and put it in your ComfyUI/models/clip_vision/ folder. + +There are two different models you can choose from which give different results. + +#### v1 "concat" + +This first model follows the guiding image less than the other one but might give better movement. + +Download the [hunyuan_video_image_to_video_720p_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/diffusion_models) file and put it in your ComfyUI/models/diffusion_models/ folder. + +![Example](hunyuan_video_image_to_video.webp) + +[Workflow in Json format](hunyuan_video_image_to_video.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. The input image can be found on the [flux](../flux) page. + +#### v2 "replace" + +This second model follows the guiding image very closely but seems to be a bit less dynamic than the first one. + +Download the [hunyuan_video_v2_replace_image_to_video_720p_bf16.safetensors](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/diffusion_models) file and put it in your ComfyUI/models/diffusion_models/ folder. + +![Example](hunyuan_video_image_to_video_v2.webp) + +[Workflow in Json format](hunyuan_video_image_to_video_v2.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. The input image can be found on the [flux](../flux) page. diff --git a/hunyuan_video/hunyuan_video_image_to_video.json b/hunyuan_video/hunyuan_video_image_to_video.json new file mode 100644 index 0000000..0a8508c --- /dev/null +++ b/hunyuan_video/hunyuan_video_image_to_video.json @@ -0,0 +1,1142 @@ +{ + "last_node_id": 89, + "last_link_id": 230, + "nodes": [ + { + "id": 22, + "type": "BasicGuider", + "pos": [ + 600, + 0 + ], + "size": [ + 222.3482666015625, + 46 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 195, + "slot_index": 0 + }, + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 129, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "GUIDER", + "type": "GUIDER", + "shape": 3, + "links": [ + 30 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BasicGuider" + }, + "widgets_values": [] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1150, + 90 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 19, + "mode": 2, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 181 + }, + { + "name": "vae", + "type": "VAE", + "link": 206 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 13, + "type": "SamplerCustomAdvanced", + "pos": [ + 860, + 200 + ], + "size": [ + 272.3617858886719, + 124.53733825683594 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "name": "noise", + "type": "NOISE", + "link": 37, + "slot_index": 0 + }, + { + "name": "guider", + "type": "GUIDER", + "link": 30, + "slot_index": 1 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 19, + "slot_index": 2 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 20, + "slot_index": 3 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 216, + "slot_index": 4 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "shape": 3, + "links": [ + 181, + 210 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "shape": 3, + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustomAdvanced" + }, + "widgets_values": [] + }, + { + "id": 74, + "type": "Note", + "pos": [ + 1147.7459716796875, + 405.0789489746094 + ], + "size": [ + 210, + 170 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Use the tiled decode node by default because most people will need it.\n\nLower the tile_size and overlap if you run out of memory." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 26, + "type": "FluxGuidance", + "pos": [ + 514.2149047851562, + 86.77685546875 + ], + "size": [ + 317.4000244140625, + 58 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 225 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "shape": 3, + "links": [ + 129 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "FluxGuidance" + }, + "widgets_values": [ + 6 + ], + "color": "#233", + "bgcolor": "#355" + }, + { + "id": 67, + "type": "ModelSamplingSD3", + "pos": [ + 360, + 0 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 209 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 195 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 7 + ] + }, + { + "id": 17, + "type": "BasicScheduler", + "pos": [ + 510, + 660 + ], + "size": [ + 315, + 106 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 190, + "slot_index": 0 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "shape": 3, + "links": [ + 20 + ] + } + ], + "properties": { + "Node name for S&R": "BasicScheduler" + }, + "widgets_values": [ + "simple", + 20, + 1 + ] + }, + { + "id": 16, + "type": "KSamplerSelect", + "pos": [ + 520, + 550 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "shape": 3, + "links": [ + 19 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 78, + "type": "HunyuanImageToVideo", + "pos": [ + 510, + 820 + ], + "size": [ + 315, + 170 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 218 + }, + { + "name": "vae", + "type": "VAE", + "link": 223 + }, + { + "name": "start_image", + "type": "IMAGE", + "shape": 7, + "link": 222 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 225 + ], + "slot_index": 0 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 216 + ] + } + ], + "properties": { + "Node name for S&R": "HunyuanImageToVideo" + }, + "widgets_values": [ + 720, + 720, + 57, + 1 + ] + }, + { + "id": 73, + "type": "VAEDecodeTiled", + "pos": [ + 1150, + 200 + ], + "size": [ + 210, + 150 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 210 + }, + { + "name": "vae", + "type": "VAE", + "link": 211 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 230 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecodeTiled" + }, + "widgets_values": [ + 256, + 64, + 64, + 8 + ] + }, + { + "id": 75, + "type": "SaveAnimatedWEBP", + "pos": [ + 1640, + 200 + ], + "size": [ + 621.495361328125, + 587.12451171875 + ], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 228 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 90, + "default" + ] + }, + { + "id": 84, + "type": "SaveWEBM", + "pos": [ + 2280, + 200 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 23, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 229 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 12 + ] + }, + { + "id": 87, + "type": "Note", + "pos": [ + 1410, + 340 + ], + "size": [ + 210, + 170 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "The official code removes the first 4 frames because they are sometimes bad. You can bypass (CTRL-B) this node if you don't want this." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 80, + "type": "TextEncodeHunyuanVideo_ImageToVideo", + "pos": [ + 390, + 180 + ], + "size": [ + 441, + 200 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 224 + }, + { + "name": "clip_vision_output", + "type": "CLIP_VISION_OUTPUT", + "link": 219 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 218 + ] + } + ], + "title": "Text Encode Hunyuan Video (ImageToVideo)", + "properties": { + "Node name for S&R": "TextEncodeHunyuanVideo_ImageToVideo" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail wearing a maid outfit running" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 81, + "type": "CLIPVisionEncode", + "pos": [ + 200, + 530 + ], + "size": [ + 253.60000610351562, + 78 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 220 + }, + { + "name": "image", + "type": "IMAGE", + "link": 221 + } + ], + "outputs": [ + { + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "links": [ + 219 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 89, + "type": "Note", + "pos": [ + 190, + 660 + ], + "size": [ + 260, + 210 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Select your image here. The image is fed to both the text encoder and directly to the model.\n\nYou can set the resolution and length of the video using the HunyuanImageToVideo node." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 83, + "type": "LoadImage", + "pos": [ + -190, + 700 + ], + "size": [ + 365.4132080078125, + 471.8512268066406 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 221, + 222 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "flux_dev_example.png", + "image" + ] + }, + { + "id": 82, + "type": "CLIPVisionLoader", + "pos": [ + -190, + 580 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 220 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader" + }, + "widgets_values": [ + "llava_llama3_vision.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 10, + "type": "VAELoader", + "pos": [ + -190, + 470 + ], + "size": [ + 350, + 60 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "shape": 3, + "links": [ + 206, + 211, + 223 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "hunyuan_video_vae_bf16.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 11, + "type": "DualCLIPLoader", + "pos": [ + -190, + 290 + ], + "size": [ + 350, + 122 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "shape": 3, + "links": [ + 224 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "DualCLIPLoader" + }, + "widgets_values": [ + "clip_l.safetensors", + "llava_llama3_fp8_scaled.safetensors", + "hunyuan_video", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 12, + "type": "UNETLoader", + "pos": [ + -190, + 160 + ], + "size": [ + 404.6181640625, + 82 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "shape": 3, + "links": [ + 190, + 209 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "hunyuan_video_image_to_video_720p_bf16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 77, + "type": "Note", + "pos": [ + -140, + 0 + ], + "size": [ + 350, + 110 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Select a fp8 weight_dtype if you are running out of memory." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 86, + "type": "ImageFromBatch", + "pos": [ + 1410, + 200 + ], + "size": [ + 210, + 82 + ], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 230 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 228, + 229 + ] + } + ], + "properties": { + "Node name for S&R": "ImageFromBatch" + }, + "widgets_values": [ + 4, + 4096 + ] + }, + { + "id": 25, + "type": "RandomNoise", + "pos": [ + 520, + 420 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "NOISE", + "type": "NOISE", + "shape": 3, + "links": [ + 37 + ] + } + ], + "properties": { + "Node name for S&R": "RandomNoise" + }, + "widgets_values": [ + 187330947843879, + "randomize" + ], + "color": "#2a363b", + "bgcolor": "#3f5159" + } + ], + "links": [ + [ + 19, + 16, + 0, + 13, + 2, + "SAMPLER" + ], + [ + 20, + 17, + 0, + 13, + 3, + "SIGMAS" + ], + [ + 30, + 22, + 0, + 13, + 1, + "GUIDER" + ], + [ + 37, + 25, + 0, + 13, + 0, + "NOISE" + ], + [ + 129, + 26, + 0, + 22, + 1, + "CONDITIONING" + ], + [ + 181, + 13, + 0, + 8, + 0, + "LATENT" + ], + [ + 190, + 12, + 0, + 17, + 0, + "MODEL" + ], + [ + 195, + 67, + 0, + 22, + 0, + "MODEL" + ], + [ + 206, + 10, + 0, + 8, + 1, + "VAE" + ], + [ + 209, + 12, + 0, + 67, + 0, + "MODEL" + ], + [ + 210, + 13, + 0, + 73, + 0, + "LATENT" + ], + [ + 211, + 10, + 0, + 73, + 1, + "VAE" + ], + [ + 216, + 78, + 1, + 13, + 4, + "LATENT" + ], + [ + 218, + 80, + 0, + 78, + 0, + "CONDITIONING" + ], + [ + 219, + 81, + 0, + 80, + 1, + "CLIP_VISION_OUTPUT" + ], + [ + 220, + 82, + 0, + 81, + 0, + "CLIP_VISION" + ], + [ + 221, + 83, + 0, + 81, + 1, + "IMAGE" + ], + [ + 222, + 83, + 0, + 78, + 2, + "IMAGE" + ], + [ + 223, + 10, + 0, + 78, + 1, + "VAE" + ], + [ + 224, + 11, + 0, + 80, + 0, + "CLIP" + ], + [ + 225, + 78, + 0, + 26, + 0, + "CONDITIONING" + ], + [ + 228, + 86, + 0, + 75, + 0, + "IMAGE" + ], + [ + 229, + 86, + 0, + 84, + 0, + "IMAGE" + ], + [ + 230, + 73, + 0, + 86, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "groupNodes": {} + }, + "models": [ + { + "name": "llava_llama3_vision.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/resolve/main/split_files/clip_vision/llava_llama3_vision.safetensors?download=true", + "directory": "clip_vision" + }, + { + "name": "clip_l.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/resolve/main/split_files/text_encoders/clip_l.safetensors?download=true", + "directory": "text_encoders" + }, + { + "name": "llava_llama3_fp8_scaled.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/resolve/main/split_files/text_encoders/llava_llama3_fp8_scaled.safetensors?download=true", + "directory": "text_encoders" + }, + { + "name": "hunyuan_video_vae_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/resolve/main/split_files/vae/hunyuan_video_vae_bf16.safetensors?download=true", + "directory": "vae" + }, + { + "name": "hunyuan_video_image_to_video_720p_bf16.safetensors", + "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/resolve/main/split_files/diffusion_models/hunyuan_video_image_to_video_720p_bf16.safetensors?download=true", + "directory": "diffusion_models" + } + ], + "version": 0.4 +} diff --git a/hunyuan_video/hunyuan_video_image_to_video.webp b/hunyuan_video/hunyuan_video_image_to_video.webp new file mode 100644 index 0000000..772ddfd Binary files /dev/null and b/hunyuan_video/hunyuan_video_image_to_video.webp differ diff --git a/hunyuan_video/hunyuan_video_image_to_video_v2.json b/hunyuan_video/hunyuan_video_image_to_video_v2.json new file mode 100644 index 0000000..2900399 --- /dev/null +++ b/hunyuan_video/hunyuan_video_image_to_video_v2.json @@ -0,0 +1,1048 @@ +{ + "last_node_id": 90, + "last_link_id": 233, + "nodes": [ + { + "id": 22, + "type": "BasicGuider", + "pos": [ + 600, + 0 + ], + "size": [ + 222.3482666015625, + 46 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 195, + "slot_index": 0 + }, + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 129, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "GUIDER", + "type": "GUIDER", + "shape": 3, + "links": [ + 30 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BasicGuider" + }, + "widgets_values": [] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1150, + 90 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 18, + "mode": 2, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 181 + }, + { + "name": "vae", + "type": "VAE", + "link": 206 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 13, + "type": "SamplerCustomAdvanced", + "pos": [ + 860, + 200 + ], + "size": [ + 272.3617858886719, + 124.53733825683594 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "name": "noise", + "type": "NOISE", + "link": 37, + "slot_index": 0 + }, + { + "name": "guider", + "type": "GUIDER", + "link": 30, + "slot_index": 1 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 19, + "slot_index": 2 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 20, + "slot_index": 3 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 216, + "slot_index": 4 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "shape": 3, + "links": [ + 181, + 210 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "shape": 3, + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustomAdvanced" + }, + "widgets_values": [] + }, + { + "id": 74, + "type": "Note", + "pos": [ + 1147.7459716796875, + 405.0789489746094 + ], + "size": [ + 210, + 170 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Use the tiled decode node by default because most people will need it.\n\nLower the tile_size and overlap if you run out of memory." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 26, + "type": "FluxGuidance", + "pos": [ + 514.2149047851562, + 86.77685546875 + ], + "size": [ + 317.4000244140625, + 58 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 225 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "shape": 3, + "links": [ + 129 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "FluxGuidance" + }, + "widgets_values": [ + 6 + ], + "color": "#233", + "bgcolor": "#355" + }, + { + "id": 67, + "type": "ModelSamplingSD3", + "pos": [ + 360, + 0 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 209 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 195 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 7 + ] + }, + { + "id": 17, + "type": "BasicScheduler", + "pos": [ + 510, + 660 + ], + "size": [ + 315, + 106 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 190, + "slot_index": 0 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "shape": 3, + "links": [ + 20 + ] + } + ], + "properties": { + "Node name for S&R": "BasicScheduler" + }, + "widgets_values": [ + "simple", + 20, + 1 + ] + }, + { + "id": 16, + "type": "KSamplerSelect", + "pos": [ + 520, + 550 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "shape": 3, + "links": [ + 19 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 81, + "type": "CLIPVisionEncode", + "pos": [ + 200, + 530 + ], + "size": [ + 253.60000610351562, + 78 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 220 + }, + { + "name": "image", + "type": "IMAGE", + "link": 221 + } + ], + "outputs": [ + { + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "links": [ + 219 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 89, + "type": "Note", + "pos": [ + 190, + 660 + ], + "size": [ + 260, + 210 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Select your image here. The image is fed to both the text encoder and directly to the model.\n\nYou can set the resolution and length of the video using the HunyuanImageToVideo node." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 83, + "type": "LoadImage", + "pos": [ + -190, + 700 + ], + "size": [ + 365.4132080078125, + 471.8512268066406 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 221, + 222 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "flux_dev_example.png", + "image" + ] + }, + { + "id": 82, + "type": "CLIPVisionLoader", + "pos": [ + -190, + 580 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 220 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader" + }, + "widgets_values": [ + "llava_llama3_vision.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 10, + "type": "VAELoader", + "pos": [ + -190, + 470 + ], + "size": [ + 350, + 60 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "shape": 3, + "links": [ + 206, + 211, + 223 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "hunyuan_video_vae_bf16.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 11, + "type": "DualCLIPLoader", + "pos": [ + -190, + 290 + ], + "size": [ + 350, + 122 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "shape": 3, + "links": [ + 224 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "DualCLIPLoader" + }, + "widgets_values": [ + "clip_l.safetensors", + "llava_llama3_fp8_scaled.safetensors", + "hunyuan_video", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 77, + "type": "Note", + "pos": [ + -140, + 0 + ], + "size": [ + 350, + 110 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Select a fp8 weight_dtype if you are running out of memory." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 78, + "type": "HunyuanImageToVideo", + "pos": [ + 510, + 820 + ], + "size": [ + 315, + 194 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 218 + }, + { + "name": "vae", + "type": "VAE", + "link": 223 + }, + { + "name": "start_image", + "type": "IMAGE", + "shape": 7, + "link": 222 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 225 + ], + "slot_index": 0 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 216 + ] + } + ], + "properties": { + "Node name for S&R": "HunyuanImageToVideo" + }, + "widgets_values": [ + 720, + 720, + 57, + 1, + "v2 (replace)" + ] + }, + { + "id": 73, + "type": "VAEDecodeTiled", + "pos": [ + 1150, + 200 + ], + "size": [ + 210, + 150 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 210 + }, + { + "name": "vae", + "type": "VAE", + "link": 211 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 232, + 233 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecodeTiled" + }, + "widgets_values": [ + 256, + 64, + 64, + 8 + ] + }, + { + "id": 75, + "type": "SaveAnimatedWEBP", + "pos": [ + 1390, + 200 + ], + "size": [ + 621.495361328125, + 587.12451171875 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 232 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 90, + "default" + ] + }, + { + "id": 84, + "type": "SaveWEBM", + "pos": [ + 2030, + 200 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 21, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 233 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 12 + ] + }, + { + "id": 80, + "type": "TextEncodeHunyuanVideo_ImageToVideo", + "pos": [ + 390, + 180 + ], + "size": [ + 441, + 200 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 224 + }, + { + "name": "clip_vision_output", + "type": "CLIP_VISION_OUTPUT", + "link": 219 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 218 + ] + } + ], + "title": "Text Encode Hunyuan Video (ImageToVideo)", + "properties": { + "Node name for S&R": "TextEncodeHunyuanVideo_ImageToVideo" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail wearing a maid outfit walking forward", + 4 + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 25, + "type": "RandomNoise", + "pos": [ + 520, + 420 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "NOISE", + "type": "NOISE", + "shape": 3, + "links": [ + 37 + ] + } + ], + "properties": { + "Node name for S&R": "RandomNoise" + }, + "widgets_values": [ + 72275780843998, + "randomize" + ], + "color": "#2a363b", + "bgcolor": "#3f5159" + }, + { + "id": 12, + "type": "UNETLoader", + "pos": [ + -190, + 160 + ], + "size": [ + 404.6181640625, + 82 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "shape": 3, + "links": [ + 190, + 209 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "hunyuan_video_v2_replace_image_to_video_720p_bf16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + } + ], + "links": [ + [ + 19, + 16, + 0, + 13, + 2, + "SAMPLER" + ], + [ + 20, + 17, + 0, + 13, + 3, + "SIGMAS" + ], + [ + 30, + 22, + 0, + 13, + 1, + "GUIDER" + ], + [ + 37, + 25, + 0, + 13, + 0, + "NOISE" + ], + [ + 129, + 26, + 0, + 22, + 1, + "CONDITIONING" + ], + [ + 181, + 13, + 0, + 8, + 0, + "LATENT" + ], + [ + 190, + 12, + 0, + 17, + 0, + "MODEL" + ], + [ + 195, + 67, + 0, + 22, + 0, + "MODEL" + ], + [ + 206, + 10, + 0, + 8, + 1, + "VAE" + ], + [ + 209, + 12, + 0, + 67, + 0, + "MODEL" + ], + [ + 210, + 13, + 0, + 73, + 0, + "LATENT" + ], + [ + 211, + 10, + 0, + 73, + 1, + "VAE" + ], + [ + 216, + 78, + 1, + 13, + 4, + "LATENT" + ], + [ + 218, + 80, + 0, + 78, + 0, + "CONDITIONING" + ], + [ + 219, + 81, + 0, + 80, + 1, + "CLIP_VISION_OUTPUT" + ], + [ + 220, + 82, + 0, + 81, + 0, + "CLIP_VISION" + ], + [ + 221, + 83, + 0, + 81, + 1, + "IMAGE" + ], + [ + 222, + 83, + 0, + 78, + 2, + "IMAGE" + ], + [ + 223, + 10, + 0, + 78, + 1, + "VAE" + ], + [ + 224, + 11, + 0, + 80, + 0, + "CLIP" + ], + [ + 225, + 78, + 0, + 26, + 0, + "CONDITIONING" + ], + [ + 232, + 73, + 0, + 75, + 0, + "IMAGE" + ], + [ + 233, + 73, + 0, + 84, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "groupNodes": {} + }, + "version": 0.4 +} \ No newline at end of file diff --git a/hunyuan_video/hunyuan_video_image_to_video_v2.webp b/hunyuan_video/hunyuan_video_image_to_video_v2.webp new file mode 100644 index 0000000..fc3dda7 Binary files /dev/null and b/hunyuan_video/hunyuan_video_image_to_video_v2.webp differ diff --git a/hunyuan_video/hunyuan_video_text_to_video.json b/hunyuan_video/hunyuan_video_text_to_video.json new file mode 100644 index 0000000..fbc8841 --- /dev/null +++ b/hunyuan_video/hunyuan_video_text_to_video.json @@ -0,0 +1,784 @@ +{ + "last_node_id": 77, + "last_link_id": 215, + "nodes": [ + { + "id": 16, + "type": "KSamplerSelect", + "pos": [ + 484, + 751 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 19 + ], + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 17, + "type": "BasicScheduler", + "pos": [ + 478, + 860 + ], + "size": [ + 315, + 106 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 190, + "slot_index": 0 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 20 + ], + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "BasicScheduler" + }, + "widgets_values": [ + "simple", + 20, + 1 + ] + }, + { + "id": 26, + "type": "FluxGuidance", + "pos": [ + 520, + 100 + ], + "size": [ + 317.4000244140625, + 58 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 175 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 129 + ], + "slot_index": 0, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "FluxGuidance" + }, + "widgets_values": [ + 6 + ], + "color": "#233", + "bgcolor": "#355" + }, + { + "id": 45, + "type": "EmptyHunyuanLatentVideo", + "pos": [ + 475.540771484375, + 432.673583984375 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 180 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyHunyuanLatentVideo" + }, + "widgets_values": [ + 848, + 480, + 73, + 1 + ] + }, + { + "id": 22, + "type": "BasicGuider", + "pos": [ + 600, + 0 + ], + "size": [ + 222.3482666015625, + 46 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 195, + "slot_index": 0 + }, + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 129, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 30 + ], + "slot_index": 0, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "BasicGuider" + }, + "widgets_values": [] + }, + { + "id": 67, + "type": "ModelSamplingSD3", + "pos": [ + 360, + 0 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 209 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 195 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 7 + ] + }, + { + "id": 10, + "type": "VAELoader", + "pos": [ + 0, + 420 + ], + "size": [ + 350, + 60 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 206, + 211 + ], + "slot_index": 0, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "hunyuan_video_vae_bf16.safetensors" + ] + }, + { + "id": 11, + "type": "DualCLIPLoader", + "pos": [ + 0, + 270 + ], + "size": [ + 350, + 106 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 205 + ], + "slot_index": 0, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "DualCLIPLoader" + }, + "widgets_values": [ + "clip_l.safetensors", + "llava_llama3_fp8_scaled.safetensors", + "hunyuan_video" + ] + }, + { + "id": 73, + "type": "VAEDecodeTiled", + "pos": [ + 1150, + 200 + ], + "size": [ + 210, + 102 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 210 + }, + { + "name": "vae", + "type": "VAE", + "link": 211 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 215 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecodeTiled" + }, + "widgets_values": [ + 256, + 64 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1150, + 90 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 14, + "mode": 2, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 181 + }, + { + "name": "vae", + "type": "VAE", + "link": 206 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 74, + "type": "Note", + "pos": [ + 1150, + 360 + ], + "size": [ + 210, + 170 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Use the tiled decode node by default because most people will need it.\n\nLower the tile_size and overlap if you run out of memory." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 12, + "type": "UNETLoader", + "pos": [ + 0, + 150 + ], + "size": [ + 350, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 190, + 209 + ], + "slot_index": 0, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "hunyuan_video_t2v_720p_bf16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 77, + "type": "Note", + "pos": [ + 0, + 0 + ], + "size": [ + 350, + 110 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Select a fp8 weight_dtype if you are running out of memory." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 13, + "type": "SamplerCustomAdvanced", + "pos": [ + 860, + 200 + ], + "size": [ + 272.3617858886719, + 124.53733825683594 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "noise", + "type": "NOISE", + "link": 37, + "slot_index": 0 + }, + { + "name": "guider", + "type": "GUIDER", + "link": 30, + "slot_index": 1 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 19, + "slot_index": 2 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 20, + "slot_index": 3 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 180, + "slot_index": 4 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "links": [ + 181, + 210 + ], + "slot_index": 0, + "shape": 3 + }, + { + "name": "denoised_output", + "type": "LATENT", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "SamplerCustomAdvanced" + }, + "widgets_values": [] + }, + { + "id": 44, + "type": "CLIPTextEncode", + "pos": [ + 420, + 200 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 205 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 175 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "anime style anime girl with massive fennec ears and one big fluffy tail, she has blonde hair long hair blue eyes wearing a pink sweater and a long blue skirt walking in a beautiful outdoor scenery with snow mountains in the background" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 75, + "type": "SaveAnimatedWEBP", + "pos": [ + 1410, + 200 + ], + "size": [ + 315, + 366 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 215 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 80, + "default" + ] + }, + { + "id": 25, + "type": "RandomNoise", + "pos": [ + 479, + 618 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "NOISE", + "type": "NOISE", + "links": [ + 37 + ], + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "RandomNoise" + }, + "widgets_values": [ + 1, + "randomize" + ], + "color": "#2a363b", + "bgcolor": "#3f5159" + } + ], + "links": [ + [ + 19, + 16, + 0, + 13, + 2, + "SAMPLER" + ], + [ + 20, + 17, + 0, + 13, + 3, + "SIGMAS" + ], + [ + 30, + 22, + 0, + 13, + 1, + "GUIDER" + ], + [ + 37, + 25, + 0, + 13, + 0, + "NOISE" + ], + [ + 129, + 26, + 0, + 22, + 1, + "CONDITIONING" + ], + [ + 175, + 44, + 0, + 26, + 0, + "CONDITIONING" + ], + [ + 180, + 45, + 0, + 13, + 4, + "LATENT" + ], + [ + 181, + 13, + 0, + 8, + 0, + "LATENT" + ], + [ + 190, + 12, + 0, + 17, + 0, + "MODEL" + ], + [ + 195, + 67, + 0, + 22, + 0, + "MODEL" + ], + [ + 205, + 11, + 0, + 44, + 0, + "CLIP" + ], + [ + 206, + 10, + 0, + 8, + 1, + "VAE" + ], + [ + 209, + 12, + 0, + 67, + 0, + "MODEL" + ], + [ + 210, + 13, + 0, + 73, + 0, + "LATENT" + ], + [ + 211, + 10, + 0, + 73, + 1, + "VAE" + ], + [ + 215, + 73, + 0, + 75, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "groupNodes": {} + }, + "version": 0.4 +} \ No newline at end of file diff --git a/hunyuan_video/hunyuan_video_text_to_video.webp b/hunyuan_video/hunyuan_video_text_to_video.webp new file mode 100644 index 0000000..a30eea5 Binary files /dev/null and b/hunyuan_video/hunyuan_video_text_to_video.webp differ diff --git a/ltxv/README.md b/ltxv/README.md new file mode 100644 index 0000000..e97ab27 --- /dev/null +++ b/ltxv/README.md @@ -0,0 +1,42 @@ +# Lightricks LTX-Video Model + +[LTX-Video](https://huggingface.co/Lightricks/LTX-Video) is a very efficient video model by lightricks. + +The important thing with this model is to give it long descriptive prompts. + +Download the [ltx-video-2b-v0.9.5.safetensors](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.5.safetensors) file and put it in your ComfyUI/models/checkpoints folder. + +If you don't have it already downloaded you can download the [t5xxl_fp16.safetensors](https://huggingface.co/Comfy-Org/mochi_preview_repackaged/blob/main/split_files/text_encoders/t5xxl_fp16.safetensors) file and put it in your ComfyUI/models/text_encoders/ folder. + +### Image to Video + +Input image: + + +#### Simple img2vid workflow with start image only: + +![Example](ltxv_image_to_video_simple.0.9.5.webp) + +[Workflow in Json format](ltxv_image_to_video_simple.0.9.5.json) + + +#### More complex img2vid workflow with multiple guiding images: + +![Example](ltxv_image_to_video.0.9.5.webp) + +[Workflow in Json format](ltxv_image_to_video.0.9.5.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. + +### Text to Video + +![Example](ltxv_text_to_video_0.9.5.webp) + +[Workflow in Json format](ltxv_text_to_video_0.9.5.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. + + +[Old ltxv examples](README_old.md) + + diff --git a/ltxv/README_old.md b/ltxv/README_old.md new file mode 100644 index 0000000..ea1b077 --- /dev/null +++ b/ltxv/README_old.md @@ -0,0 +1,31 @@ +# Lightricks LTX-Video Model + +[LTX-Video](https://huggingface.co/Lightricks/LTX-Video) is a very efficient video model by lightricks. + +The important thing with this model is to give it long descriptive prompts. + +Download the [ltx-video-2b-v0.9.1.safetensors](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.1.safetensors) or old [ltx-video-2b-v0.9.safetensors](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.safetensors) file and put it in your ComfyUI/models/checkpoints folder. + +If you don't have it already downloaded you can download the [t5xxl_fp16.safetensors](https://huggingface.co/Comfy-Org/mochi_preview_repackaged/blob/main/split_files/text_encoders/t5xxl_fp16.safetensors) file and put it in your ComfyUI/models/text_encoders/ folder. + +### Text to Video + +![Example](ltxv_text_to_video.webp) + +[Workflow in Json format](ltxv_text_to_video.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. + +### Image to Video + +[Input image](https://commons.wikimedia.org/wiki/File:Havelock_Island,_Mangrove_tree_on_the_beach,_Andaman_Islands.jpg): + + +Workflow: + +![Example](ltxv_image_to_video.webp) + +[Workflow in Json format](ltxv_image_to_video.json) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. + diff --git a/ltxv/fox.jpg b/ltxv/fox.jpg new file mode 100644 index 0000000..bb91607 Binary files /dev/null and b/ltxv/fox.jpg differ diff --git a/ltxv/island.jpg b/ltxv/island.jpg new file mode 100644 index 0000000..44e7d70 Binary files /dev/null and b/ltxv/island.jpg differ diff --git a/ltxv/ltxv_image_to_video.0.9.5.json b/ltxv/ltxv_image_to_video.0.9.5.json new file mode 100644 index 0000000..91fb7f5 --- /dev/null +++ b/ltxv/ltxv_image_to_video.0.9.5.json @@ -0,0 +1,1158 @@ +{ + "last_node_id": 94, + "last_link_id": 234, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 60, + 190 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "t5xxl_fp16.safetensors", + "ltxv", + "default" + ] + }, + { + "id": 76, + "type": "Note", + "pos": [ + 40, + 350 + ], + "size": [ + 360, + 200 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model needs long descriptive prompts, if the prompt is too short the quality will suffer greatly." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 390 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 195 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 44, + "type": "CheckpointLoaderSimple", + "pos": [ + 520, + 30 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 181 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": null + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 87, + 196, + 207 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "ltx-video-2b-v0.9.5.safetensors" + ] + }, + { + "id": 71, + "type": "LTXVScheduler", + "pos": [ + 880, + 290 + ], + "size": [ + 315, + 154 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "latent", + "type": "LATENT", + "shape": 7, + "link": 228 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 182 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVScheduler" + }, + "widgets_values": [ + 30, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 79, + "type": "LTXVAddGuide", + "pos": [ + 900, + 670 + ], + "size": [ + 315, + 162 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 194 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 195 + }, + { + "name": "vae", + "type": "VAE", + "link": 196 + }, + { + "name": "latent", + "type": "LATENT", + "link": 193 + }, + { + "name": "image", + "type": "IMAGE", + "link": 203 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 210 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 211 + ], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 212, + 228 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LTXVAddGuide" + }, + "widgets_values": [ + 0, + 1 + ] + }, + { + "id": 81, + "type": "EmptyLTXVLatentVideo", + "pos": [ + 560, + 790 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 193 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyLTXVLatentVideo" + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 92, + "type": "LTXVPreprocess", + "pos": [ + 560, + 1070 + ], + "size": [ + 275.9266662597656, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 229 + } + ], + "outputs": [ + { + "name": "output_image", + "type": "IMAGE", + "links": [ + 230 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVPreprocess" + }, + "widgets_values": [ + 35 + ] + }, + { + "id": 73, + "type": "KSamplerSelect", + "pos": [ + 880, + 190 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 172 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 85, + "type": "LoadImage", + "pos": [ + 150, + 1070 + ], + "size": [ + 385.15606689453125, + 333.3305358886719 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 229 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "sunset.png", + "image" + ] + }, + { + "id": 84, + "type": "LTXVAddGuide", + "pos": [ + 1240, + 670 + ], + "size": [ + 315, + 162 + ], + "flags": {}, + "order": 13, + "mode": 4, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 210 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 211 + }, + { + "name": "vae", + "type": "VAE", + "link": 207 + }, + { + "name": "latent", + "type": "LATENT", + "link": 212 + }, + { + "name": "image", + "type": "IMAGE", + "link": 230 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 213 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 214 + ], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 215 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LTXVAddGuide" + }, + "widgets_values": [ + -1, + 1 + ] + }, + { + "id": 78, + "type": "LoadImage", + "pos": [ + 150, + 670 + ], + "size": [ + 385.15606689453125, + 333.3305358886719 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 226 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fox.jpg", + "image" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 420, + 180 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 194 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A red fox moving gracefully, its russet coat vibrant against the white landscape, leaving perfect star-shaped prints behind as steam rises from its breath in the crisp winter air. The scene is wrapped in snow-muffled silence, broken only by the gentle murmur of water still flowing beneath the ice." + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 82, + "type": "LTXVPreprocess", + "pos": [ + 570, + 670 + ], + "size": [ + 275.9266662597656, + 58 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 226 + } + ], + "outputs": [ + { + "name": "output_image", + "type": "IMAGE", + "links": [ + 203 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVPreprocess" + }, + "widgets_values": [ + 40 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1740, + 30 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 232 + }, + { + "name": "vae", + "type": "VAE", + "link": 87 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106, + 217 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 86, + "type": "SaveWEBM", + "pos": [ + 2480, + 30 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 20, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 217 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 12 + ] + }, + { + "id": 93, + "type": "Note", + "pos": [ + 1239.5482177734375, + 880.7611083984375 + ], + "size": [ + 310, + 150 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "These nodes can be chained together to set multiple guiding images.\n\nIn this case -1 means the last frame so you can unbypass this node (CTRL-B) if you want to predict from the last frame instead of the first." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 41, + "type": "SaveAnimatedWEBP", + "pos": [ + 1970, + 30 + ], + "size": [ + 493.98468017578125, + 481.28692626953125 + ], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 90, + "default" + ] + }, + { + "id": 72, + "type": "SamplerCustom", + "pos": [ + 1201, + 32 + ], + "size": [ + 355.20001220703125, + 230 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 181 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 199 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 167 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 172 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 182 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 215 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "links": [ + 231 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustom" + }, + "widgets_values": [ + true, + 397166166231987, + "randomize", + 3 + ] + }, + { + "id": 94, + "type": "LTXVCropGuides", + "pos": [ + 1552.4571533203125, + -99.19783020019531 + ], + "size": [ + 216.59999084472656, + 66 + ], + "flags": {}, + "order": 17, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 233 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 234 + }, + { + "name": "latent", + "type": "LATENT", + "link": 231 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": null + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": null + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 232 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LTXVCropGuides" + }, + "widgets_values": [] + }, + { + "id": 69, + "type": "LTXVConditioning", + "pos": [ + 920, + 60 + ], + "size": [ + 223.8660125732422, + 78 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 213 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 214 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 199, + 233 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 167, + 234 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 25 + ] + } + ], + "links": [ + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 87, + 44, + 2, + 8, + 1, + "VAE" + ], + [ + 106, + 8, + 0, + 41, + 0, + "IMAGE" + ], + [ + 167, + 69, + 1, + 72, + 2, + "CONDITIONING" + ], + [ + 172, + 73, + 0, + 72, + 3, + "SAMPLER" + ], + [ + 181, + 44, + 0, + 72, + 0, + "MODEL" + ], + [ + 182, + 71, + 0, + 72, + 4, + "SIGMAS" + ], + [ + 193, + 81, + 0, + 79, + 3, + "LATENT" + ], + [ + 194, + 6, + 0, + 79, + 0, + "CONDITIONING" + ], + [ + 195, + 7, + 0, + 79, + 1, + "CONDITIONING" + ], + [ + 196, + 44, + 2, + 79, + 2, + "VAE" + ], + [ + 199, + 69, + 0, + 72, + 1, + "CONDITIONING" + ], + [ + 203, + 82, + 0, + 79, + 4, + "IMAGE" + ], + [ + 207, + 44, + 2, + 84, + 2, + "VAE" + ], + [ + 210, + 79, + 0, + 84, + 0, + "CONDITIONING" + ], + [ + 211, + 79, + 1, + 84, + 1, + "CONDITIONING" + ], + [ + 212, + 79, + 2, + 84, + 3, + "LATENT" + ], + [ + 213, + 84, + 0, + 69, + 0, + "CONDITIONING" + ], + [ + 214, + 84, + 1, + 69, + 1, + "CONDITIONING" + ], + [ + 215, + 84, + 2, + 72, + 5, + "LATENT" + ], + [ + 217, + 8, + 0, + 86, + 0, + "IMAGE" + ], + [ + 226, + 78, + 0, + 82, + 0, + "IMAGE" + ], + [ + 228, + 79, + 2, + 71, + 0, + "LATENT" + ], + [ + 229, + 85, + 0, + 92, + 0, + "IMAGE" + ], + [ + 230, + 92, + 0, + 84, + 4, + "IMAGE" + ], + [ + 231, + 72, + 0, + 94, + 2, + "LATENT" + ], + [ + 232, + 94, + 2, + 8, + 0, + "LATENT" + ], + [ + 233, + 69, + 0, + 94, + 0, + "CONDITIONING" + ], + [ + 234, + 69, + 1, + 94, + 1, + "CONDITIONING" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} \ No newline at end of file diff --git a/ltxv/ltxv_image_to_video.0.9.5.webp b/ltxv/ltxv_image_to_video.0.9.5.webp new file mode 100644 index 0000000..8ed52b0 Binary files /dev/null and b/ltxv/ltxv_image_to_video.0.9.5.webp differ diff --git a/ltxv/ltxv_image_to_video.json b/ltxv/ltxv_image_to_video.json new file mode 100644 index 0000000..b706b61 --- /dev/null +++ b/ltxv/ltxv_image_to_video.json @@ -0,0 +1,719 @@ +{ + "last_node_id": 78, + "last_link_id": 190, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 60, + 190 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "t5xxl_fp16.safetensors", + "ltxv" + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1600, + 30 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 171 + }, + { + "name": "vae", + "type": "VAE", + "link": 87 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 69, + "type": "LTXVConditioning", + "pos": [ + 920, + 60 + ], + "size": [ + 223.8660125732422, + 78 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 183 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 184 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 166 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 167 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 72, + "type": "SamplerCustom", + "pos": [ + 1201, + 32 + ], + "size": [ + 355.20001220703125, + 230 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 181 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 166 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 167 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 172 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 182 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 186 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "links": [ + 171 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustom" + }, + "widgets_values": [ + true, + 501744655390087, + "randomize", + 3 + ] + }, + { + "id": 44, + "type": "CheckpointLoaderSimple", + "pos": [ + 520, + 30 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 181 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": null + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 87, + 189 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "ltx-video-2b-v0.9.safetensors" + ] + }, + { + "id": 71, + "type": "LTXVScheduler", + "pos": [ + 856, + 531 + ], + "size": [ + 315, + 154 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "latent", + "type": "LATENT", + "link": 185, + "shape": 7 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 182 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVScheduler" + }, + "widgets_values": [ + 30, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 390 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 188 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 73, + "type": "KSamplerSelect", + "pos": [ + 860, + 420 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 172 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 76, + "type": "Note", + "pos": [ + 40, + 350 + ], + "size": [ + 360, + 200 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model needs long descriptive prompts, if the prompt is too short the quality will suffer greatly." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 77, + "type": "LTXVImgToVideo", + "pos": [ + 863, + 181 + ], + "size": [ + 315, + 190 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 187 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 188 + }, + { + "name": "vae", + "type": "VAE", + "link": 189 + }, + { + "name": "image", + "type": "IMAGE", + "link": 190 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 183 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 184 + ], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 185, + 186 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LTXVImgToVideo" + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 78, + "type": "LoadImage", + "pos": [ + 420, + 620 + ], + "size": [ + 385.15606689453125, + 333.3305358886719 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 190 + ] + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "island.jpg", + "image" + ] + }, + { + "id": 41, + "type": "SaveAnimatedWEBP", + "pos": [ + 1830, + 30 + ], + "size": [ + 680, + 610 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 90, + "default" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 420, + 190 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 187 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "best quality, 4k, HDR, a tracking shot of a beautiful scene of the sea waves on the beach with a massive explosion in the water" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 87, + 44, + 2, + 8, + 1, + "VAE" + ], + [ + 106, + 8, + 0, + 41, + 0, + "IMAGE" + ], + [ + 166, + 69, + 0, + 72, + 1, + "CONDITIONING" + ], + [ + 167, + 69, + 1, + 72, + 2, + "CONDITIONING" + ], + [ + 171, + 72, + 0, + 8, + 0, + "LATENT" + ], + [ + 172, + 73, + 0, + 72, + 3, + "SAMPLER" + ], + [ + 181, + 44, + 0, + 72, + 0, + "MODEL" + ], + [ + 182, + 71, + 0, + 72, + 4, + "SIGMAS" + ], + [ + 183, + 77, + 0, + 69, + 0, + "CONDITIONING" + ], + [ + 184, + 77, + 1, + 69, + 1, + "CONDITIONING" + ], + [ + 185, + 77, + 2, + 71, + 0, + "LATENT" + ], + [ + 186, + 77, + 2, + 72, + 5, + "LATENT" + ], + [ + 187, + 6, + 0, + 77, + 0, + "CONDITIONING" + ], + [ + 188, + 7, + 0, + 77, + 1, + "CONDITIONING" + ], + [ + 189, + 44, + 2, + 77, + 2, + "VAE" + ], + [ + 190, + 78, + 0, + 77, + 3, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} \ No newline at end of file diff --git a/ltxv/ltxv_image_to_video.webp b/ltxv/ltxv_image_to_video.webp new file mode 100644 index 0000000..21efded Binary files /dev/null and b/ltxv/ltxv_image_to_video.webp differ diff --git a/ltxv/ltxv_image_to_video_simple.0.9.5.json b/ltxv/ltxv_image_to_video_simple.0.9.5.json new file mode 100644 index 0000000..547d5cb --- /dev/null +++ b/ltxv/ltxv_image_to_video_simple.0.9.5.json @@ -0,0 +1,808 @@ +{ + "last_node_id": 95, + "last_link_id": 250, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 60, + 190 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "t5xxl_fp16.safetensors", + "ltxv", + "default" + ] + }, + { + "id": 76, + "type": "Note", + "pos": [ + 40, + 350 + ], + "size": [ + 360, + 200 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model needs long descriptive prompts, if the prompt is too short the quality will suffer greatly." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 44, + "type": "CheckpointLoaderSimple", + "pos": [ + 520, + 30 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 181 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": null + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 87, + 250 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "ltx-video-2b-v0.9.5.safetensors" + ] + }, + { + "id": 71, + "type": "LTXVScheduler", + "pos": [ + 880, + 290 + ], + "size": [ + 315, + 154 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "latent", + "type": "LATENT", + "shape": 7, + "link": 249 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 182 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVScheduler" + }, + "widgets_values": [ + 30, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 73, + "type": "KSamplerSelect", + "pos": [ + 880, + 190 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 172 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 78, + "type": "LoadImage", + "pos": [ + 150, + 670 + ], + "size": [ + 385.15606689453125, + 333.3305358886719 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 226 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fox.jpg", + "image" + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1740, + 30 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 235 + }, + { + "name": "vae", + "type": "VAE", + "link": 87 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106, + 217 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 86, + "type": "SaveWEBM", + "pos": [ + 2480, + 30 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 14, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 217 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 12 + ] + }, + { + "id": 41, + "type": "SaveAnimatedWEBP", + "pos": [ + 1970, + 30 + ], + "size": [ + 493.98468017578125, + 481.28692626953125 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 90, + "default" + ] + }, + { + "id": 69, + "type": "LTXVConditioning", + "pos": [ + 920, + 60 + ], + "size": [ + 223.8660125732422, + 78 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 245 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 246 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 199 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 167 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 420, + 180 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 239 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A red fox moving gracefully, its russet coat vibrant against the white landscape, leaving perfect star-shaped prints behind as steam rises from its breath in the crisp winter air. The scene is wrapped in snow-muffled silence, broken only by the gentle murmur of water still flowing beneath the ice." + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 390 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 240 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 82, + "type": "LTXVPreprocess", + "pos": [ + 570, + 670 + ], + "size": [ + 275.9266662597656, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 226 + } + ], + "outputs": [ + { + "name": "output_image", + "type": "IMAGE", + "links": [ + 248 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVPreprocess" + }, + "widgets_values": [ + 40 + ] + }, + { + "id": 95, + "type": "LTXVImgToVideo", + "pos": [ + 888.8251342773438, + 608.7010498046875 + ], + "size": [ + 315, + 190 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 239 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 240 + }, + { + "name": "vae", + "type": "VAE", + "link": 250 + }, + { + "name": "image", + "type": "IMAGE", + "link": 248 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 245 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 246 + ], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 247, + 249 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LTXVImgToVideo" + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 72, + "type": "SamplerCustom", + "pos": [ + 1201, + 32 + ], + "size": [ + 355.20001220703125, + 230 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 181 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 199 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 167 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 172 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 182 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 247 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "links": [ + 235 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustom" + }, + "widgets_values": [ + true, + 1092847494041144, + "randomize", + 3 + ] + } + ], + "links": [ + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 87, + 44, + 2, + 8, + 1, + "VAE" + ], + [ + 106, + 8, + 0, + 41, + 0, + "IMAGE" + ], + [ + 167, + 69, + 1, + 72, + 2, + "CONDITIONING" + ], + [ + 172, + 73, + 0, + 72, + 3, + "SAMPLER" + ], + [ + 181, + 44, + 0, + 72, + 0, + "MODEL" + ], + [ + 182, + 71, + 0, + 72, + 4, + "SIGMAS" + ], + [ + 199, + 69, + 0, + 72, + 1, + "CONDITIONING" + ], + [ + 217, + 8, + 0, + 86, + 0, + "IMAGE" + ], + [ + 226, + 78, + 0, + 82, + 0, + "IMAGE" + ], + [ + 235, + 72, + 0, + 8, + 0, + "LATENT" + ], + [ + 239, + 6, + 0, + 95, + 0, + "CONDITIONING" + ], + [ + 240, + 7, + 0, + 95, + 1, + "CONDITIONING" + ], + [ + 245, + 95, + 0, + 69, + 0, + "CONDITIONING" + ], + [ + 246, + 95, + 1, + 69, + 1, + "CONDITIONING" + ], + [ + 247, + 95, + 2, + 72, + 5, + "LATENT" + ], + [ + 248, + 82, + 0, + 95, + 3, + "IMAGE" + ], + [ + 249, + 95, + 2, + 71, + 0, + "LATENT" + ], + [ + 250, + 44, + 2, + 95, + 2, + "VAE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} \ No newline at end of file diff --git a/ltxv/ltxv_image_to_video_simple.0.9.5.webp b/ltxv/ltxv_image_to_video_simple.0.9.5.webp new file mode 100644 index 0000000..d623f7b Binary files /dev/null and b/ltxv/ltxv_image_to_video_simple.0.9.5.webp differ diff --git a/ltxv/ltxv_text_to_video.json b/ltxv/ltxv_text_to_video.json new file mode 100644 index 0000000..04c26d6 --- /dev/null +++ b/ltxv/ltxv_text_to_video.json @@ -0,0 +1,612 @@ +{ + "last_node_id": 76, + "last_link_id": 182, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 60, + 190 + ], + "size": [ + 315, + 82 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "t5xxl_fp16.safetensors", + "ltxv" + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1600, + 30 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 171 + }, + { + "name": "vae", + "type": "VAE", + "link": 87 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 69, + "type": "LTXVConditioning", + "pos": [ + 920, + 60 + ], + "size": [ + 223.8660125732422, + 78 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 169 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 170 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 166 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 167 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 72, + "type": "SamplerCustom", + "pos": [ + 1201, + 32 + ], + "size": [ + 355.20001220703125, + 230 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 181 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 166 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 167 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 172 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 182 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 175 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "links": [ + 171 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustom" + }, + "widgets_values": [ + true, + 497797676867141, + "randomize", + 3 + ] + }, + { + "id": 44, + "type": "CheckpointLoaderSimple", + "pos": [ + 520, + 30 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 181 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": null + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 87 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "ltx-video-2b-v0.9.safetensors" + ] + }, + { + "id": 70, + "type": "EmptyLTXVLatentVideo", + "pos": [ + 860, + 240 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 168, + 175 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLTXVLatentVideo" + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 71, + "type": "LTXVScheduler", + "pos": [ + 856, + 531 + ], + "size": [ + 315, + 154 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "latent", + "type": "LATENT", + "link": 168, + "shape": 7 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 182 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVScheduler" + }, + "widgets_values": [ + 30, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 420, + 190 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 169 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage." + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 390 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 170 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 73, + "type": "KSamplerSelect", + "pos": [ + 860, + 420 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 172 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 76, + "type": "Note", + "pos": [ + 40, + 350 + ], + "size": [ + 360, + 200 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model needs long descriptive prompts, if the prompt is too short the quality will suffer greatly." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 41, + "type": "SaveAnimatedWEBP", + "pos": [ + 1830, + 30 + ], + "size": [ + 680, + 610 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 90, + "default" + ] + } + ], + "links": [ + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 87, + 44, + 2, + 8, + 1, + "VAE" + ], + [ + 106, + 8, + 0, + 41, + 0, + "IMAGE" + ], + [ + 166, + 69, + 0, + 72, + 1, + "CONDITIONING" + ], + [ + 167, + 69, + 1, + 72, + 2, + "CONDITIONING" + ], + [ + 168, + 70, + 0, + 71, + 0, + "LATENT" + ], + [ + 169, + 6, + 0, + 69, + 0, + "CONDITIONING" + ], + [ + 170, + 7, + 0, + 69, + 1, + "CONDITIONING" + ], + [ + 171, + 72, + 0, + 8, + 0, + "LATENT" + ], + [ + 172, + 73, + 0, + 72, + 3, + "SAMPLER" + ], + [ + 175, + 70, + 0, + 72, + 5, + "LATENT" + ], + [ + 181, + 44, + 0, + 72, + 0, + "MODEL" + ], + [ + 182, + 71, + 0, + 72, + 4, + "SIGMAS" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} \ No newline at end of file diff --git a/ltxv/ltxv_text_to_video.webp b/ltxv/ltxv_text_to_video.webp new file mode 100644 index 0000000..f0db2e2 Binary files /dev/null and b/ltxv/ltxv_text_to_video.webp differ diff --git a/ltxv/ltxv_text_to_video_0.9.5.json b/ltxv/ltxv_text_to_video_0.9.5.json new file mode 100644 index 0000000..3243ade --- /dev/null +++ b/ltxv/ltxv_text_to_video_0.9.5.json @@ -0,0 +1,654 @@ +{ + "last_node_id": 77, + "last_link_id": 183, + "nodes": [ + { + "id": 69, + "type": "LTXVConditioning", + "pos": [ + 920, + 60 + ], + "size": [ + 223.8660125732422, + 78 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 169 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 170 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 166 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 167 + ], + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 25 + ] + }, + { + "id": 71, + "type": "LTXVScheduler", + "pos": [ + 856, + 531 + ], + "size": [ + 315, + 154 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "latent", + "type": "LATENT", + "shape": 7, + "link": 168 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 182 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LTXVScheduler" + }, + "widgets_values": [ + 30, + 2.05, + 0.95, + true, + 0.1 + ] + }, + { + "id": 76, + "type": "Note", + "pos": [ + 40, + 350 + ], + "size": [ + 360, + 200 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model needs long descriptive prompts, if the prompt is too short the quality will suffer greatly." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 44, + "type": "CheckpointLoaderSimple", + "pos": [ + 520, + 30 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 181 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": null + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 87 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "ltx-video-2b-v0.9.5.safetensors" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 60, + 190 + ], + "size": [ + 315, + 98 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "t5xxl_fp16.safetensors", + "ltxv", + "default" + ] + }, + { + "id": 72, + "type": "SamplerCustom", + "pos": [ + 1201, + 32 + ], + "size": [ + 355.20001220703125, + 230 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 181 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 166 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 167 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 172 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 182 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 175 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "links": [ + 171 + ], + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "links": null + } + ], + "properties": { + "Node name for S&R": "SamplerCustom" + }, + "widgets_values": [ + true, + 552872474466407, + "randomize", + 3 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1600, + 30 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 171 + }, + { + "name": "vae", + "type": "VAE", + "link": 87 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106, + 183 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 77, + "type": "SaveWEBM", + "pos": [ + 2530, + 30 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 12, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 183 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 18 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 420, + 500 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 170 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 41, + "type": "SaveAnimatedWEBP", + "pos": [ + 1830, + 30 + ], + "size": [ + 680, + 610 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24, + false, + 95, + "default" + ] + }, + { + "id": 73, + "type": "KSamplerSelect", + "pos": [ + 860, + 420 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 172 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "res_multistep" + ] + }, + { + "id": 70, + "type": "EmptyLTXVLatentVideo", + "pos": [ + 860, + 240 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 168, + 175 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLTXVLatentVideo" + }, + "widgets_values": [ + 768, + 512, + 97, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 422.44207763671875, + 184.3018035888672 + ], + "size": [ + 430, + 270 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 169 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A drone quickly rises through a bank of morning fog, revealing a pristine alpine lake surrounded by snow-capped mountains. The camera glides forward over the glassy water, capturing perfect reflections of the peaks. As it continues, the perspective shifts to reveal a lone wooden cabin with a curl of smoke from its chimney, nestled among tall pines at the lake's edge. The final shot tracks upward rapidly, transitioning from intimate to epic as the full mountain range comes into view, bathed in the golden light of sunrise breaking through scattered clouds." + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 87, + 44, + 2, + 8, + 1, + "VAE" + ], + [ + 106, + 8, + 0, + 41, + 0, + "IMAGE" + ], + [ + 166, + 69, + 0, + 72, + 1, + "CONDITIONING" + ], + [ + 167, + 69, + 1, + 72, + 2, + "CONDITIONING" + ], + [ + 168, + 70, + 0, + 71, + 0, + "LATENT" + ], + [ + 169, + 6, + 0, + 69, + 0, + "CONDITIONING" + ], + [ + 170, + 7, + 0, + 69, + 1, + "CONDITIONING" + ], + [ + 171, + 72, + 0, + 8, + 0, + "LATENT" + ], + [ + 172, + 73, + 0, + 72, + 3, + "SAMPLER" + ], + [ + 175, + 70, + 0, + 72, + 5, + "LATENT" + ], + [ + 181, + 44, + 0, + 72, + 0, + "MODEL" + ], + [ + 182, + 71, + 0, + 72, + 4, + "SIGMAS" + ], + [ + 183, + 8, + 0, + 77, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} \ No newline at end of file diff --git a/ltxv/ltxv_text_to_video_0.9.5.webp b/ltxv/ltxv_text_to_video_0.9.5.webp new file mode 100644 index 0000000..604e813 Binary files /dev/null and b/ltxv/ltxv_text_to_video_0.9.5.webp differ diff --git a/lumina2/README.md b/lumina2/README.md new file mode 100644 index 0000000..f942ecd --- /dev/null +++ b/lumina2/README.md @@ -0,0 +1,12 @@ +# Lumina Image 2.0 + +[Lumina Image 2.0](https://github.com/Alpha-VLLM/Lumina-Image-2.0) is a interesting diffusion model that uses gemma 2 2B for its text encoder. + +## Basic Workflow + +Download [lumina_2.safetensors](https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/blob/main/all_in_one/lumina_2.safetensors) and put it in your ComfyUI/models/checkpoints directory. + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](lumina2_basic_example.png) + diff --git a/lumina2/lumina2_basic_example.png b/lumina2/lumina2_basic_example.png new file mode 100644 index 0000000..a2e85cd Binary files /dev/null and b/lumina2/lumina2_basic_example.png differ diff --git a/mochi/README.md b/mochi/README.md new file mode 100644 index 0000000..10de9af --- /dev/null +++ b/mochi/README.md @@ -0,0 +1,27 @@ +# Mochi Video Model + +[Mochi](https://huggingface.co/genmo/mochi-1-preview) is a state of the art video model. + +You can find all the model files for the following workflow [here](https://huggingface.co/Comfy-Org/mochi_preview_repackaged/tree/main/split_files) + +``` +diffusion_models/mochi_preview_bf16.safetensors goes in: ComfyUI/models/diffusion_models/ +text_encoders/t5xxl_fp16.safetensors goes in: ComfyUI/models/text_encoders/ +vae/mochi_vae.safetensors goes in: ComfyUI/models/vae/ +``` + +If you have memory issues you can pick the fp8 files instead of the bf16/fp16 ones. + +![Example](mochi_text_to_video_example.webp) + +You can download this webp animated image and load it or drag it on [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. + + +There is also an all in one fp8 checkpoint [here](https://huggingface.co/Comfy-Org/mochi_preview_repackaged/blob/main/all_in_one/mochi_preview_fp8_scaled.safetensors) that contains the fp8 versions of the files in the above workflow packaged in a single checkpoint. + +Note that using the fp8 files will give lower quality than using the 16 bit ones but might be faster especially if you have a GPU that supports fp8 ops. + +Here is the workflow to use it: + +![Example](mochi_simple_checkpoint.webp) + diff --git a/mochi/mochi_simple_checkpoint.webp b/mochi/mochi_simple_checkpoint.webp new file mode 100644 index 0000000..90ebb5a Binary files /dev/null and b/mochi/mochi_simple_checkpoint.webp differ diff --git a/mochi/mochi_text_to_video_example.webp b/mochi/mochi_text_to_video_example.webp new file mode 100644 index 0000000..819756c Binary files /dev/null and b/mochi/mochi_text_to_video_example.webp differ diff --git a/model_merging/README.md b/model_merging/README.md index 26f535e..966081c 100644 --- a/model_merging/README.md +++ b/model_merging/README.md @@ -26,3 +26,14 @@ If you are familiar with the "Add Difference" option in other UIs this is how to ![Example](model_merging_inpaint.png) One important thing you should note is that models are merged and saved in the precision that is used for inference on your hardware so usually it will be 16 bit float. If you want do do merges in 32 bit float launch ComfyUI with: --force-fp32 + + +### Advanced Merging + +#### CosXL + +Here is an example of how to create a CosXL model from a regular SDXL model with merging. The requirements are the [CosXL base model](https://huggingface.co/stabilityai/cosxl), the [SDXL base model](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0_0.9vae.safetensors) and the SDXL model you want to convert. In this example I used [albedobase-xl](https://civitai.com/models/140737/albedobase-xl). + +![Example](model_merging_cosxl.png) + + diff --git a/model_merging/model_merging_cosxl.png b/model_merging/model_merging_cosxl.png new file mode 100644 index 0000000..63f39ab Binary files /dev/null and b/model_merging/model_merging_cosxl.png differ diff --git a/omnigen/README.md b/omnigen/README.md new file mode 100644 index 0000000..da334b2 --- /dev/null +++ b/omnigen/README.md @@ -0,0 +1,24 @@ +# Omnigen 2 + +[Omnigen 2](https://github.com/VectorSpaceLab/OmniGen2) is a model that can be used to edit images with text prompts. + +## Files to Download + +You will first need: + +[omnigen2_fp16.safetensors](https://huggingface.co/Comfy-Org/Omnigen2_ComfyUI_repackaged/blob/main/split_files/diffusion_models/omnigen2_fp16.safetensors) goes in: ComfyUI/models/diffusion_models/ + +[qwen_2.5_vl_fp16.safetensors](https://huggingface.co/Comfy-Org/Omnigen2_ComfyUI_repackaged/blob/main/split_files/text_encoders/qwen_2.5_vl_fp16.safetensors) goes in: ComfyUI/models/text_encoders/ + +[ae.safetensors](https://huggingface.co/Comfy-Org/Omnigen2_ComfyUI_repackaged/blob/main/split_files/vae/ae.safetensors), this is the flux VAE that you might already have, it goes in: ComfyUI/models/vae/ + +## Workflows + +This is a basic workflow using an image as a character reference. For multiple image inputs chain ReferenceLatent nodes together + +![Example](omnigen2_example.png) + +You can load this image in [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the full workflow. + +You can find the input image [here](../chroma/fennec_girl_sing.png) + diff --git a/omnigen/omnigen2_example.png b/omnigen/omnigen2_example.png new file mode 100644 index 0000000..bf3329d Binary files /dev/null and b/omnigen/omnigen2_example.png differ diff --git a/qwen_image/README.md b/qwen_image/README.md new file mode 100644 index 0000000..460b833 --- /dev/null +++ b/qwen_image/README.md @@ -0,0 +1,42 @@ +# Qwen Image + +[Qwen Image](https://github.com/QwenLM/Qwen-Image) is a 20B diffusion model. + +## Basic Workflow + +Download [qwen_image_fp8_e4m3fn.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/blob/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors) and put it in your ComfyUI/models/diffusion_models directory. + +[qwen_2.5_vl_7b_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/blob/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors) and put it in your ComfyUI/models/text_encoders directory. + +[qwen_image_vae.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/blob/main/split_files/vae/qwen_image_vae.safetensors) and put it in your ComfyUI/models/vae/ directory + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](qwen_image_basic_example.png) + +## Edit Model v2509 + +Make sure you downloaded the text encoder and vae files for the basic workflow above. This model supports up to 3 different image inputs. + +Download [qwen_image_edit_2509_fp8_e4m3fn.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/blob/main/split_files/diffusion_models/qwen_image_edit_2509_fp8_e4m3fn.safetensors) and put it in your ComfyUI/models/diffusion_models directory. + + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](qwen_image_edit_2509_basic_example.png) + +You can find the input image [here](../chroma/fennec_girl_sing.png) + + +## Edit Model (older first version) + +Make sure you downloaded the text encoder and vae files for the basic workflow above. + +Download [qwen_image_edit_fp8_e4m3fn.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/blob/main/split_files/diffusion_models/qwen_image_edit_fp8_e4m3fn.safetensors) and put it in your ComfyUI/models/diffusion_models directory. + + +You can then load up or drag the following image in ComfyUI to get the workflow: + +![Example](qwen_image_edit_basic_example.png) + +You can find the input image [here](../chroma/fennec_girl_sing.png) diff --git a/qwen_image/qwen_image_basic_example.png b/qwen_image/qwen_image_basic_example.png new file mode 100644 index 0000000..c6a0537 Binary files /dev/null and b/qwen_image/qwen_image_basic_example.png differ diff --git a/qwen_image/qwen_image_edit_2509_basic_example.png b/qwen_image/qwen_image_edit_2509_basic_example.png new file mode 100644 index 0000000..82895ff Binary files /dev/null and b/qwen_image/qwen_image_edit_2509_basic_example.png differ diff --git a/qwen_image/qwen_image_edit_basic_example.png b/qwen_image/qwen_image_edit_basic_example.png new file mode 100644 index 0000000..db318c0 Binary files /dev/null and b/qwen_image/qwen_image_edit_basic_example.png differ diff --git a/sd3/README.md b/sd3/README.md new file mode 100644 index 0000000..067aa3e --- /dev/null +++ b/sd3/README.md @@ -0,0 +1,34 @@ +# SD3 Examples + +## SD3.5 + +The first step is downloading the text encoder files if you don't have them already from SD3, Flux or other models: ([clip_l.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_l.safetensors), [clip_g.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_g.safetensors) and t5xxl) if you don't have them already in your ComfyUI/models/text_encoders/ folder. For the t5xxl I recommend [t5xxl_fp16.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp16.safetensors) if you have more than 32GB ram or [t5xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp8_e4m3fn_scaled.safetensors) if you don't. + +The SD3.5 model family contains a large 8B model and a medium 2.5B model. The medium model will be faster and take less memory but might have less complex understanding of some concepts. I recommend downloading both and experimenting with how each of them respond to your prompts. + +The [sd3.5_large.safetensors](https://huggingface.co/stabilityai/stable-diffusion-3.5-large/tree/main) and [sd3.5_medium.safetensors](https://huggingface.co/stabilityai/stable-diffusion-3.5-medium/tree/main) files (pick the one you want and put it in your ComfyUI/models/checkpoints/ directory) do not contain text encoder/CLIP weights so you must load them separately to use that file just like in the following example: + +![Example](sd3.5_text_encoders_example.png) + +To use the [sd3.5_large_turbo.safetensors](https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo/tree/main) file (put it in your ComfyUI/models/checkpoints/ directory) you can use the above example and set steps to 4 and cfg to 1.2. + +For convenience there is an easy to use all in one checkpoint file [sd3.5_large_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/sd3.5_large_fp8_scaled.safetensors) (put it in your ComfyUI/models/checkpoints/ directory) that can be used in the default workflow like any other checkpoint files. There is also one for SD3.5 medium: [sd3.5_medium_incl_clips_t5xxlfp8scaled.safetensors](https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/sd3.5_medium_incl_clips_t5xxlfp8scaled.safetensors) + +See this workflow for an example. + +![Example](sd3.5_simple_example.png) + +As a reminder you can save these image files and drag or load them into ComfyUI to get the workflow. + +### SD3.5 Controlnets + +Stability has released some official SD3.5 controlnets that you can find [here](https://huggingface.co/stabilityai/stable-diffusion-3.5-controlnets) these files (sd3.5_large_controlnet_canny.safetensors, sd3.5_large_controlnet_depth.safetensors, sd3.5_large_controlnet_blur.safetensors) go in your ComfyUI/models/controlnet directory and are meant to be used with SD3.5 large. + +See this workflow for an example with the canny ([sd3.5_large_controlnet_canny.safetensors](https://huggingface.co/stabilityai/stable-diffusion-3.5-controlnets/tree/main)) controlnet: + +![Example](sd3.5_large_canny_controlnet_example.png) + + +[Old SD3 medium examples](README_old.md) + + diff --git a/sd3/README_old.md b/sd3/README_old.md new file mode 100644 index 0000000..2b02561 --- /dev/null +++ b/sd3/README_old.md @@ -0,0 +1,26 @@ +# Old SD3 Medium Examples + +The SD3 checkpoints that contain text encoders: [sd3_medium_incl_clips.safetensors (5.5GB)](https://huggingface.co/stabilityai/stable-diffusion-3-medium/tree/main) and [sd3_medium_incl_clips_t5xxlfp8.safetensors (10.1GB)](https://huggingface.co/stabilityai/stable-diffusion-3-medium/tree/main) can be used like any regular checkpoint in ComfyUI. The difference between both these checkpoints is that the first contains only 2 text encoders: CLIP-L and CLIP-G while the other one contains 3: CLIP-L, CLIP-G and T5XXL. Make sure to put either sd3_medium_incl_clips.safetensors or sd3_medium_incl_clips_t5xxlfp8.safetensors in your ComfyUI/models/checkpoints/ directory. + +Here is a very basic example how to use it: + +![Example](sd3_simple_example.png) + +The [sd3_medium.safetensors](https://huggingface.co/stabilityai/stable-diffusion-3-medium/tree/main) file does not contain text encoder/CLIP weights so you must load them separately to use that file. Download the text encoder weights from the [text_encoders directory](https://huggingface.co/stabilityai/stable-diffusion-3-medium/tree/main) and put them in your ComfyUI/models/text_encoders/ directory. sd3_medium.safetensors should be put in your ComfyUI/models/checkpoints/ directory. + +Here is a basic example how to use it: + +![Example](sd3_text_encoders_example.png) + +As a reminder you can save these image files and drag or load them into ComfyUI to get the workflow. + +SD3 performs very well with the negative conditioning zeroed out like in the following example: + +![Example](sd3_anime_example.png) + +### SD3 Controlnet + +SD3 Controlnets by [InstantX](https://huggingface.co/InstantX) are also supported. Download the canny controlnet model [here](https://huggingface.co/InstantX/SD3-Controlnet-Canny/blob/main/diffusion_pytorch_model.safetensors), and put it in your ComfyUI/models/controlnet directory. Be sure to rename it to something clear like sd3_controlnet_canny.safetensors. + +Here is an example of how to use it: +![Example](sd3_controlnet_example.png) diff --git a/sd3/sd3.5_large_canny_controlnet_example.png b/sd3/sd3.5_large_canny_controlnet_example.png new file mode 100644 index 0000000..ea6ab6c Binary files /dev/null and b/sd3/sd3.5_large_canny_controlnet_example.png differ diff --git a/sd3/sd3.5_simple_example.png b/sd3/sd3.5_simple_example.png new file mode 100644 index 0000000..e19d299 Binary files /dev/null and b/sd3/sd3.5_simple_example.png differ diff --git a/sd3/sd3.5_text_encoders_example.png b/sd3/sd3.5_text_encoders_example.png new file mode 100644 index 0000000..b66729b Binary files /dev/null and b/sd3/sd3.5_text_encoders_example.png differ diff --git a/sd3/sd3_anime_example.png b/sd3/sd3_anime_example.png new file mode 100644 index 0000000..fbae8bf Binary files /dev/null and b/sd3/sd3_anime_example.png differ diff --git a/sd3/sd3_controlnet_example.png b/sd3/sd3_controlnet_example.png new file mode 100644 index 0000000..00d440c Binary files /dev/null and b/sd3/sd3_controlnet_example.png differ diff --git a/sd3/sd3_simple_example.png b/sd3/sd3_simple_example.png new file mode 100644 index 0000000..b8c25cb Binary files /dev/null and b/sd3/sd3_simple_example.png differ diff --git a/sd3/sd3_text_encoders_example.png b/sd3/sd3_text_encoders_example.png new file mode 100644 index 0000000..e798d63 Binary files /dev/null and b/sd3/sd3_text_encoders_example.png differ diff --git a/stable_cascade/README.md b/stable_cascade/README.md new file mode 100644 index 0000000..adcbafc --- /dev/null +++ b/stable_cascade/README.md @@ -0,0 +1,47 @@ +# Stable Cascade Examples + +First download the [stable_cascade_stage_c.safetensors and stable_cascade_stage_b.safetensors checkpoints](https://huggingface.co/stabilityai/stable-cascade/tree/main/comfyui_checkpoints) and put them in the ComfyUI/models/checkpoints folder. + +Stable cascade is a 3 stage process, first a low resolution latent image is generated with the Stage C diffusion model. This latent is then upscaled using the Stage B diffusion model. This upscaled latent is then upscaled again and converted to pixel space by the Stage A VAE. + +Note that you can download all images in this page and then drag or load them on ComfyUI to get the workflow embedded in the image. + +## Text to Image + +Here is a basic text to image workflow: + +![Example](stable_cascade__text_to_image.png) + + +## Image to Image + +Here's an example of how to do basic image to image by encoding the image and passing it to Stage C. + +![Example](stable_cascade__image_to_image.png) + +## Image Variations + +Stable Cascade supports creating variations of images using the output of CLIP vision. See the following workflow for an example: + +![Example](stable_cascade__image_remixing.png) + +See this next workflow for how to mix multiple images together: + +![Example](stable_cascade__image_remixing_multiple.png) + +You can find the input image for the above workflows on the [unCLIP example page](../unclip) + + +## ControlNet + +You can download the stable cascade controlnets from: [here](https://huggingface.co/stabilityai/stable-cascade/tree/main/controlnet). For these examples I have renamed the files by adding stable_cascade_ in front of the filename for example: [stable_cascade_canny.safetensors](https://huggingface.co/stabilityai/stable-cascade/blob/main/controlnet/canny.safetensors), [stable_cascade_inpainting.safetensors](https://huggingface.co/stabilityai/stable-cascade/blob/main/controlnet/inpainting.safetensors) + +Here is an example for how to use the Canny Controlnet: + +![Example](stable_cascade__canny_controlnet.png) + + +Here is an example for how to use the Inpaint Controlnet, the example input image can be found [here](../inpaint/yosemite_inpaint_example.png). A reminder that you can right click images in the LoadImage node and edit them with the mask editor. + +![Example](stable_cascade__inpaint_controlnet.png) + diff --git a/stable_cascade/stable_cascade__canny_controlnet.png b/stable_cascade/stable_cascade__canny_controlnet.png new file mode 100644 index 0000000..3feff13 Binary files /dev/null and b/stable_cascade/stable_cascade__canny_controlnet.png differ diff --git a/stable_cascade/stable_cascade__image_remixing.png b/stable_cascade/stable_cascade__image_remixing.png new file mode 100644 index 0000000..a8ab30b Binary files /dev/null and b/stable_cascade/stable_cascade__image_remixing.png differ diff --git a/stable_cascade/stable_cascade__image_remixing_multiple.png b/stable_cascade/stable_cascade__image_remixing_multiple.png new file mode 100644 index 0000000..ebff1e3 Binary files /dev/null and b/stable_cascade/stable_cascade__image_remixing_multiple.png differ diff --git a/stable_cascade/stable_cascade__image_to_image.png b/stable_cascade/stable_cascade__image_to_image.png new file mode 100644 index 0000000..89414bb Binary files /dev/null and b/stable_cascade/stable_cascade__image_to_image.png differ diff --git a/stable_cascade/stable_cascade__inpaint_controlnet.png b/stable_cascade/stable_cascade__inpaint_controlnet.png new file mode 100644 index 0000000..775b921 Binary files /dev/null and b/stable_cascade/stable_cascade__inpaint_controlnet.png differ diff --git a/stable_cascade/stable_cascade__text_to_image.png b/stable_cascade/stable_cascade__text_to_image.png new file mode 100644 index 0000000..67c3698 Binary files /dev/null and b/stable_cascade/stable_cascade__text_to_image.png differ diff --git a/upscale_models/README.md b/upscale_models/README.md index 6d61039..ab1da4a 100644 --- a/upscale_models/README.md +++ b/upscale_models/README.md @@ -8,4 +8,4 @@ Here is an example: You can load this image in [ComfyUI](https://github.com/comfyanonymous/ComfyUI) to get the workflow. -If you are looking for upscale models to use you can find some on [The Upscale Wiki](https://upscale.wiki/wiki/Model_Database) +If you are looking for upscale models to use you can find some on [OpenModelDB](https://openmodeldb.info/) diff --git a/wan/README.md b/wan/README.md new file mode 100644 index 0000000..677184f --- /dev/null +++ b/wan/README.md @@ -0,0 +1,80 @@ +# Wan 2.1 Models + +[Wan 2.1](https://github.com/Wan-Video/Wan2.1) is a family of video models. + +For Wan 2.2 see: [Wan 2.2](../wan22) + +## Files to Download + +You will first need: + +#### Text encoder and VAE: + +[umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/tree/main/split_files/text_encoders) goes in: ComfyUI/models/text_encoders/ + +[wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) goes in: ComfyUI/models/vae/ + + +#### Video Models + +The diffusion models can be found [here](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/tree/main/split_files/diffusion_models) + +Note: The fp16 versions are recommended over the bf16 versions as they will give better results. + +Quality rank (highest to lowest): fp16 > bf16 > fp8_scaled > fp8_e4m3fn + +These files go in: ComfyUI/models/diffusion_models/ + +These examples use the 16 bit files but you can use the fp8 ones instead if you don't have enough memory. + +## Workflows + +### Text to Video + +This workflow requires the [wan2.1_t2v_1.3B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_t2v_1.3B_fp16.safetensors) file (put it in: ComfyUI/models/diffusion_models/). You can also use it with the 14B model. + +![Example](text_to_video_wan.webp) + +[Workflow in Json format](text_to_video_wan.json) + +### Image to Video + +This workflow requires the [wan2.1_i2v_480p_14B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_i2v_480p_14B_fp16.safetensors) file (put it in: ComfyUI/models/diffusion_models/) and +[clip_vision_h.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/clip_vision/clip_vision_h.safetensors) which goes in: ComfyUI/models/clip_vision/ + +Note this example only generates 33 frames at 512x512 because I wanted it to be accessible, the model can do more than that. The 720p model is pretty good if you have the hardware/patience to run it. + + + +[Workflow in Json format](image_to_video_wan_example.json) + +The input image can be found on the [flux](../flux) page. + +Here's the same example with the [720p](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_i2v_720p_14B_fp16.safetensors) model: + + + + +### VACE reference Image to Video + +This workflow requires the [wan2.1_vace_14B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_vace_14B_fp16.safetensors) file (put it in: ComfyUI/models/diffusion_models/) + +This example generates a video from a reference image, this is different from generating a video from a start image. You'll notice that the video does not actually contain the reference image but is clearly derived from it. + + + +[Workflow in Json format](vace_reference_to_video.json) + +You can find the input image [here](../chroma/fennec_girl_sing.png) that image contains a [Chroma](../chroma) workflow if you are interested how it was generated. + +### Image Camera to Video + +This workflow requires the [wan2.1_fun_camera_v1.1_1.3B_bf16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/diffusion_models/wan2.1_fun_camera_v1.1_1.3B_bf16.safetensors) file (put it in: ComfyUI/models/diffusion_models/) and +[clip_vision_h.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/clip_vision/clip_vision_h.safetensors) which goes in: ComfyUI/models/clip_vision/ if you don't have it already. + + + + +[Workflow in Json format](camera_image_to_video_wan_example.json) + +The input image can be found on the [flux](../flux) page. diff --git a/wan/camera_image_to_video_wan_example.json b/wan/camera_image_to_video_wan_example.json new file mode 100644 index 0000000..74068e8 --- /dev/null +++ b/wan/camera_image_to_video_wan_example.json @@ -0,0 +1,865 @@ +{ + "id": "fa117b0f-052b-46d1-af50-d1bc60704ed5", + "revision": 0, + "last_node_id": 60, + "last_link_id": 130, + "nodes": [ + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + -540, + 170 + ], + "size": [ + 387.0450744628906, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 590, + 480 + ], + "size": [ + 290.6003723144531, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 117 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 900, + 180 + ], + "size": [ + 308.10516357421875, + 262 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 111 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 118 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 119 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 120 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 1034274237172778, + "randomize", + 20, + 6, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1230, + 180 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1480, + 180 + ], + "size": [ + 620.66796875, + 679.0053100585938 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 90, + "default" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + -140, + 370 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 116 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + -140, + 160 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 115 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail wearing a maid outfit" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 51, + "type": "CLIPVisionEncode", + "pos": [ + 350, + 680 + ], + "size": [ + 255.5699462890625, + 78 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 94 + }, + { + "name": "image", + "type": "IMAGE", + "link": 109 + } + ], + "outputs": [ + { + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "slot_index": 0, + "links": [ + 113 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + -10, + 780 + ], + "size": [ + 315, + 314 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 109, + 114 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "flux_dev_example.png", + "image" + ] + }, + { + "id": 49, + "type": "CLIPVisionLoader", + "pos": [ + 0, + 670 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "slot_index": 0, + "links": [ + 94 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader" + }, + "widgets_values": [ + "clip_vision_h.safetensors" + ] + }, + { + "id": 56, + "type": "WanCameraImageToVideo", + "pos": [ + 590, + 200 + ], + "size": [ + 290, + 230 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 115 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 116 + }, + { + "name": "vae", + "type": "VAE", + "link": 117 + }, + { + "name": "clip_vision_output", + "shape": 7, + "type": "CLIP_VISION_OUTPUT", + "link": 113 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 114 + }, + { + "name": "camera_conditions", + "shape": 7, + "type": "WAN_CAMERA_EMBEDDING", + "link": 124 + }, + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 125 + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 126 + }, + { + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 127 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 118 + ] + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 119 + ] + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 120 + ] + } + ], + "properties": { + "Node name for S&R": "WanCameraImageToVideo" + }, + "widgets_values": [ + 832, + 480, + 81, + 1 + ] + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 600, + 100 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 130 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 111 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2150, + 180 + ], + "size": [ + 315, + 210 + ], + "flags": {}, + "order": 14, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 32 + ] + }, + { + "id": 57, + "type": "WanCameraEmbedding", + "pos": [ + 310, + 300 + ], + "size": [ + 236.8000030517578, + 310 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "camera_embedding", + "type": "WAN_CAMERA_EMBEDDING", + "links": [ + 124 + ] + }, + { + "name": "width", + "type": "INT", + "links": [ + 125 + ] + }, + { + "name": "height", + "type": "INT", + "links": [ + 126 + ] + }, + { + "name": "length", + "type": "INT", + "links": [ + 127 + ] + } + ], + "properties": { + "Node name for S&R": "WanCameraEmbedding" + }, + "widgets_values": [ + "Zoom Out", + 512, + 512, + 81, + 1, + 0.5, + 0.5, + 0.5, + 0.5 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + -540, + 50 + ], + "size": [ + 390, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 130 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.1_fun_camera_v1.1_1.3B_bf16.safetensors", + "default" + ] + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 49, + 0, + 51, + 0, + "CLIP_VISION" + ], + [ + 109, + 52, + 0, + 51, + 1, + "IMAGE" + ], + [ + 111, + 54, + 0, + 3, + 0, + "MODEL" + ], + [ + 113, + 51, + 0, + 56, + 3, + "CLIP_VISION_OUTPUT" + ], + [ + 114, + 52, + 0, + 56, + 4, + "IMAGE" + ], + [ + 115, + 6, + 0, + 56, + 0, + "CONDITIONING" + ], + [ + 116, + 7, + 0, + 56, + 1, + "CONDITIONING" + ], + [ + 117, + 39, + 0, + 56, + 2, + "VAE" + ], + [ + 118, + 56, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 119, + 56, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 120, + 56, + 2, + 3, + 3, + "LATENT" + ], + [ + 124, + 57, + 0, + 56, + 5, + "WAN_CAMERA_EMBEDDING" + ], + [ + 125, + 57, + 1, + 56, + 6, + "INT" + ], + [ + 126, + 57, + 2, + 56, + 7, + "INT" + ], + [ + 127, + 57, + 3, + 56, + 8, + "INT" + ], + [ + 130, + 37, + 0, + 54, + 0, + "MODEL" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.6934334949441638, + "offset": [ + 570.9293716820114, + 14.391611998548521 + ] + }, + "frontendVersion": "1.20.7" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan/camera_image_to_video_wan_example.webp b/wan/camera_image_to_video_wan_example.webp new file mode 100644 index 0000000..042ac9f Binary files /dev/null and b/wan/camera_image_to_video_wan_example.webp differ diff --git a/wan/image_to_video_wan_720p_example.webp b/wan/image_to_video_wan_720p_example.webp new file mode 100644 index 0000000..567042f Binary files /dev/null and b/wan/image_to_video_wan_720p_example.webp differ diff --git a/wan/image_to_video_wan_example.json b/wan/image_to_video_wan_example.json new file mode 100644 index 0000000..1bccc0f --- /dev/null +++ b/wan/image_to_video_wan_example.json @@ -0,0 +1,743 @@ +{ + "last_node_id": 54, + "last_link_id": 111, + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 56, + 93 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 866.3932495117188, + 499.18597412109375 + ], + "size": [ + 306.36004638671875, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 76, + 99 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1460, + 190 + ], + "size": [ + 870.8511352539062, + 643.7430419921875 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 90, + "default" + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2367.213134765625, + 193.6114959716797 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 13, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 32 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 98 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 50, + "type": "WanImageToVideo", + "pos": [ + 673.0507202148438, + 627.272705078125 + ], + "size": [ + 342.5999755859375, + 210 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 97 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 98 + }, + { + "name": "vae", + "type": "VAE", + "link": 99 + }, + { + "name": "clip_vision_output", + "type": "CLIP_VISION_OUTPUT", + "shape": 7, + "link": 107 + }, + { + "name": "start_image", + "type": "IMAGE", + "shape": 7, + "link": 106 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 101 + ], + "slot_index": 0 + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 102 + ], + "slot_index": 1 + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 103 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "WanImageToVideo" + }, + "widgets_values": [ + 512, + 512, + 33, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 97 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail wearing a maid outfit turning around" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 111 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 101 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 102 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 103 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 35 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 987948718394761, + "randomize", + 20, + 6, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 49, + "type": "CLIPVisionLoader", + "pos": [ + 20, + 640 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 94 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPVisionLoader" + }, + "widgets_values": [ + "clip_vision_h.safetensors" + ] + }, + { + "id": 51, + "type": "CLIPVisionEncode", + "pos": [ + 360, + 640 + ], + "size": [ + 253.60000610351562, + 78 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 94 + }, + { + "name": "image", + "type": "IMAGE", + "link": 109 + } + ], + "outputs": [ + { + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", + "links": [ + 107 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "none" + ] + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + 20, + 760 + ], + "size": [ + 315, + 314 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106, + 109 + ], + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "flux_dev_example.png", + "image" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 390, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 70 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 110 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.1_i2v_480p_14B_fp16.safetensors", + "default" + ] + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 510, + 70 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 111 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 49, + 0, + 51, + 0, + "CLIP_VISION" + ], + [ + 97, + 6, + 0, + 50, + 0, + "CONDITIONING" + ], + [ + 98, + 7, + 0, + 50, + 1, + "CONDITIONING" + ], + [ + 99, + 39, + 0, + 50, + 2, + "VAE" + ], + [ + 101, + 50, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 102, + 50, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 103, + 50, + 2, + 3, + 3, + "LATENT" + ], + [ + 106, + 52, + 0, + 50, + 4, + "IMAGE" + ], + [ + 107, + 51, + 0, + 50, + 3, + "CLIP_VISION_OUTPUT" + ], + [ + 109, + 52, + 0, + 51, + 1, + "IMAGE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 111, + 54, + 0, + 3, + 0, + "MODEL" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.015255979947749, + "offset": [ + 4.576817595742521, + -17.69629597715313 + ] + } + }, + "version": 0.4 +} diff --git a/wan/image_to_video_wan_example.webp b/wan/image_to_video_wan_example.webp new file mode 100644 index 0000000..26433ad Binary files /dev/null and b/wan/image_to_video_wan_example.webp differ diff --git a/wan/text_to_video_wan.json b/wan/text_to_video_wan.json new file mode 100644 index 0000000..3427190 --- /dev/null +++ b/wan/text_to_video_wan.json @@ -0,0 +1,528 @@ +{ + "last_node_id": 48, + "last_link_id": 95, + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 56, + 93 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 866.3932495117188, + 499.18597412109375 + ], + "size": [ + 306.36004638671875, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 76 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1460, + 190 + ], + "size": [ + 870.8511352539062, + 643.7430419921875 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 90, + "default", + "" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 52 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 12.94982624053955, + 184.6981658935547 + ], + "size": [ + 390, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 74, + 75 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 40, + "type": "EmptyHunyuanLatentVideo", + "pos": [ + 520, + 620 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 91 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyHunyuanLatentVideo" + }, + "widgets_values": [ + 832, + 480, + 33, + 1 + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2367.213134765625, + 193.6114959716797 + ], + "size": [ + 315, + 130 + ], + "flags": {}, + "order": 10, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 32 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 95 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 46 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 52 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 91 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 35 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 82628696717253, + "randomize", + 30, + 6, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 48, + "type": "ModelSamplingSD3", + "pos": [ + 440, + 50 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 94 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 95 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 40 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 94 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.1_t2v_1.3B_fp16.safetensors", + "default" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 46 + ], + "slot_index": 0 + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a fox moving quickly in a beautiful winter scenery nature trees mountains daytime tracking camera" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 46, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 52, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 91, + 40, + 0, + 3, + 3, + "LATENT" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 37, + 0, + 48, + 0, + "MODEL" + ], + [ + 95, + 48, + 0, + 3, + 0, + "MODEL" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425205, + "offset": [ + -5.675057867608515, + 8.013751263058214 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan/text_to_video_wan.webp b/wan/text_to_video_wan.webp new file mode 100644 index 0000000..f21a370 Binary files /dev/null and b/wan/text_to_video_wan.webp differ diff --git a/wan/vace_reference_to_video.json b/wan/vace_reference_to_video.json new file mode 100644 index 0000000..a2ea2ad --- /dev/null +++ b/wan/vace_reference_to_video.json @@ -0,0 +1,741 @@ +{ + "id": "0898f6a6-2814-4ccd-968a-a2405ee177e7", + "revision": 0, + "last_node_id": 58, + "last_link_id": 124, + "nodes": [ + { + "id": 39, + "type": "VAELoader", + "pos": [ + 866.3932495117188, + 499.18597412109375 + ], + "size": [ + 306.36004638671875, + 58 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 114 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 390, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ] + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 510, + 70 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 111 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 113 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 120 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 56, + "type": "TrimVideoLatent", + "pos": [ + 1265.2001953125, + 613.80859375 + ], + "size": [ + 270, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 119 + }, + { + "name": "trim_amount", + "type": "INT", + "widget": { + "name": "trim_amount" + }, + "link": 121 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 120 + ] + } + ], + "properties": { + "Node name for S&R": "TrimVideoLatent" + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 70 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.1_vace_14B_fp16.safetensors", + "default" + ] + }, + { + "id": 55, + "type": "WanVaceToVideo", + "pos": [ + 698.0429077148438, + 632.2788696289062 + ], + "size": [ + 270, + 254 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 112 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 113 + }, + { + "name": "vae", + "type": "VAE", + "link": 114 + }, + { + "name": "control_video", + "shape": 7, + "type": "IMAGE", + "link": null + }, + { + "name": "control_masks", + "shape": 7, + "type": "MASK", + "link": null + }, + { + "name": "reference_image", + "shape": 7, + "type": "IMAGE", + "link": 118 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "links": [ + 115 + ] + }, + { + "name": "negative", + "type": "CONDITIONING", + "links": [ + 116 + ] + }, + { + "name": "latent", + "type": "LATENT", + "links": [ + 117 + ] + }, + { + "name": "trim_latent", + "type": "INT", + "links": [ + 121 + ] + } + ], + "properties": { + "Node name for S&R": "WanVaceToVideo" + }, + "widgets_values": [ + 768, + 768, + 81, + 1, + 1 + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1600, + 190 + ], + "size": [ + 364.4535217285156, + 510.4535217285156 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 90, + "default" + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2060, + 190 + ], + "size": [ + 429.0989685058594, + 523.8981323242188 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 0 + ] + }, + { + "id": 58, + "type": "Note", + "pos": [ + 2509.27587890625, + 189.5493621826172 + ], + "size": [ + 263.95501708984375, + 155.10342407226562 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "crf 0 means a lossless webm, if you want a lossy once with smaller filesize increase the crf." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + 221.9611358642578, + 734.3540649414062 + ], + "size": [ + 315, + 314 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 118 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": [] + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fennec_girl_sing.png", + "image" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 111 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 115 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 116 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 117 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 119 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 399224011392770, + "randomize", + 20, + 6, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 112 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with massive fennec ears and a big fluffy tail turning around and dancing and singing on stage like an idol" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 111, + 54, + 0, + 3, + 0, + "MODEL" + ], + [ + 112, + 6, + 0, + 55, + 0, + "CONDITIONING" + ], + [ + 113, + 7, + 0, + 55, + 1, + "CONDITIONING" + ], + [ + 114, + 39, + 0, + 55, + 2, + "VAE" + ], + [ + 115, + 55, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 116, + 55, + 1, + 3, + 2, + "CONDITIONING" + ], + [ + 117, + 55, + 2, + 3, + 3, + "LATENT" + ], + [ + 118, + 52, + 0, + 55, + 5, + "IMAGE" + ], + [ + 119, + 3, + 0, + 56, + 0, + "LATENT" + ], + [ + 120, + 56, + 0, + 8, + 0, + "LATENT" + ], + [ + 121, + 55, + 3, + 56, + 1, + "INT" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.9358232486220777, + "offset": [ + -2.3933794268561357, + -27.125629672645054 + ] + }, + "frontendVersion": "1.19.9" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan/vace_reference_to_video.webp b/wan/vace_reference_to_video.webp new file mode 100644 index 0000000..731f7d1 Binary files /dev/null and b/wan/vace_reference_to_video.webp differ diff --git a/wan22/README.md b/wan22/README.md new file mode 100644 index 0000000..7f791eb --- /dev/null +++ b/wan22/README.md @@ -0,0 +1,70 @@ +# Wan 2.2 Models + +[Wan 2.2](https://github.com/Wan-Video/Wan2.2) is a family of video models and the version after [Wan 2.1](../wan) + +Wan2.2 is initially released with 3 different models, a 5B model that can do both text and image to video and two 14B models, one for text to video and the other for video to video. + +See also the [Comfy Docs Wan 2.2 page for more workflow examples.](https://docs.comfy.org/tutorials/video/wan/wan2_2) + +## Files to Download + +You will first need: + +#### Text encoder and VAE: + +[umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/tree/main/split_files/text_encoders) goes in: ComfyUI/models/text_encoders/ + +Needed for the 14B models: [wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) goes in: ComfyUI/models/vae/ + +Needed for the 5B model (NEW): [wan2.2_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan2.2_vae.safetensors) goes in: ComfyUI/models/vae/ + +#### Video Models + +The diffusion models can be found [here](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/tree/main/split_files/diffusion_models) + +These files go in: ComfyUI/models/diffusion_models/ + +## Workflows + +### 5B Model + +This workflow requires the [wan2.2_ti2v_5B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_ti2v_5B_fp16.safetensors) file (put it in: ComfyUI/models/diffusion_models/). + +Make sure you have the [wan2.2 VAE](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan2.2_vae.safetensors) (goes in: ComfyUI/models/vae/) + +#### Text to video + +![Example](text_to_video_wan22_5B.webp) + +[Workflow in Json format](text_to_video_wan22_5B.json) + + +#### Image to Video + +![Example](image_to_video_wan22_5B.webp) + +[Workflow in Json format](image_to_video_wan22_5B.json) + +You can find the input image [here](../chroma/fennec_girl_hug.png) + +### 14B Model + +Make sure you have the [wan2.1 VAE](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors) (goes in: ComfyUI/models/vae/) + +#### Text to video + +This workflow requires both the [wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors) and the [wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors) file (put it in: ComfyUI/models/diffusion_models/). + +![Example](text_to_video_wan22_14B.webp) + +[Workflow in Json format](text_to_video_wan22_14B.json) + +#### Image to Video + +This workflow requires both the [wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors) and the [wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors) file (put it in: ComfyUI/models/diffusion_models/). + +![Example](image_to_video_wan22_14B.webp) + +[Workflow in Json format](image_to_video_wan22_14B.json) + +You can find the input image [here](../chroma/fennec_girl_flowers.png) diff --git a/wan22/image_to_video_wan22_14B.json b/wan22/image_to_video_wan22_14B.json new file mode 100644 index 0000000..cea25cd --- /dev/null +++ b/wan22/image_to_video_wan22_14B.json @@ -0,0 +1,876 @@ +{ + "id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", + "revision": 0, + "last_node_id": 60, + "last_link_id": 126, + "nodes": [ + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 98 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 486.4836120605469, + -69.28914642333984 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 125 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 55, + "type": "ModelSamplingSD3", + "pos": [ + 484.0019836425781, + 54.46213912963867 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 112 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 123 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 58, + "type": "KSamplerAdvanced", + "pos": [ + 1262.509765625, + -26.73247528076172 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 123 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 121 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 122 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 113 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 124 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "disable", + 0, + "fixed", + 20, + 3.5, + "euler", + "simple", + 10, + 10000, + "disable" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 30, + 190 + ], + "size": [ + 360, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 30, + -70 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 56, + "type": "UNETLoader", + "pos": [ + 30, + 60 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 112 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 30, + 340 + ], + "size": [ + 360, + 58 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 99 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 59, + "type": "Note", + "pos": [ + -202.05557250976562, + -57.859466552734375 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses a different diffusion model for the first steps (high noise) vs the last steps (low noise).\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 60, + "type": "Note", + "pos": [ + -200, + 340 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses the wan 2.1 VAE.\n\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1590, + -20 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 124 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2530, + -20 + ], + "size": [ + 763.67041015625, + 885.67041015625 + ], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 13.3333740234375 + ] + }, + { + "id": 57, + "type": "KSamplerAdvanced", + "pos": [ + 893.0060424804688, + -29.923471450805664 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 125 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 118 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 119 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 120 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 113 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "enable", + 99822389587980, + "randomize", + 20, + 3.5, + "euler", + "simple", + 0, + 10, + "enable" + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1820, + -20 + ], + "size": [ + 674.6224975585938, + 820.6224975585938 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 80, + "default" + ] + }, + { + "id": 50, + "type": "WanImageToVideo", + "pos": [ + 491.7362060546875, + 617.798095703125 + ], + "size": [ + 342.5999755859375, + 210 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 97 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 98 + }, + { + "name": "vae", + "type": "VAE", + "link": 99 + }, + { + "name": "clip_vision_output", + "shape": 7, + "type": "CLIP_VISION_OUTPUT", + "link": null + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 126 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 118, + 121 + ] + }, + { + "name": "negative", + "type": "CONDITIONING", + "slot_index": 1, + "links": [ + 119, + 122 + ] + }, + { + "name": "latent", + "type": "LATENT", + "slot_index": 2, + "links": [ + 120 + ] + } + ], + "properties": { + "Node name for S&R": "WanImageToVideo" + }, + "widgets_values": [ + 768, + 768, + 81, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 97 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl picking up an assault rifle and moving quickly" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 52, + "type": "LoadImage", + "pos": [ + -50, + 550 + ], + "size": [ + 450, + 540 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 126 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fennec_girl_flowers.png", + "image" + ] + } + ], + "links": [ + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 97, + 6, + 0, + 50, + 0, + "CONDITIONING" + ], + [ + 98, + 7, + 0, + 50, + 1, + "CONDITIONING" + ], + [ + 99, + 39, + 0, + 50, + 2, + "VAE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 112, + 56, + 0, + 55, + 0, + "MODEL" + ], + [ + 113, + 57, + 0, + 58, + 3, + "LATENT" + ], + [ + 118, + 50, + 0, + 57, + 1, + "CONDITIONING" + ], + [ + 119, + 50, + 1, + 57, + 2, + "CONDITIONING" + ], + [ + 120, + 50, + 2, + 57, + 3, + "LATENT" + ], + [ + 121, + 50, + 0, + 58, + 1, + "CONDITIONING" + ], + [ + 122, + 50, + 1, + 58, + 2, + "CONDITIONING" + ], + [ + 123, + 55, + 0, + 58, + 0, + "MODEL" + ], + [ + 124, + 58, + 0, + 8, + 0, + "LATENT" + ], + [ + 125, + 54, + 0, + 57, + 0, + "MODEL" + ], + [ + 126, + 52, + 0, + 50, + 4, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425299, + "offset": [ + 229.4669275491141, + 115.0852193902741 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/image_to_video_wan22_14B.webp b/wan22/image_to_video_wan22_14B.webp new file mode 100644 index 0000000..d74baa2 Binary files /dev/null and b/wan22/image_to_video_wan22_14B.webp differ diff --git a/wan22/image_to_video_wan22_5B.json b/wan22/image_to_video_wan22_5B.json new file mode 100644 index 0000000..6160b10 --- /dev/null +++ b/wan22/image_to_video_wan22_5B.json @@ -0,0 +1,624 @@ +{ + "id": "91f6bbe2-ed41-4fd6-bac7-71d5b5864ecb", + "revision": 0, + "last_node_id": 57, + "last_link_id": 106, + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 52 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 95 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 46 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 52 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 104 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 869177064731501, + "randomize", + 30, + 5, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1460, + 190 + ], + "size": [ + 870.8511352539062, + 648.4141235351562 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24.000000000000004, + false, + 90, + "default" + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 20, + 340 + ], + "size": [ + 330, + 60 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 105 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan2.2_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 48, + "type": "ModelSamplingSD3", + "pos": [ + 440, + 60 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 94 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 95 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 60 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 94 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_ti2v_5B_fp16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2367.213134765625, + 193.6114959716797 + ], + "size": [ + 670, + 650 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 16.111083984375 + ] + }, + { + "id": 57, + "type": "LoadImage", + "pos": [ + 87.407958984375, + 620.4816284179688 + ], + "size": [ + 274.080078125, + 314 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 106 + ] + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "fennec_girl_hug.png", + "image" + ] + }, + { + "id": 56, + "type": "Note", + "pos": [ + 710.781005859375, + 608.9545288085938 + ], + "size": [ + 320.9936218261719, + 182.6057586669922 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Optimal resolution is: 1280x704 length 121\n\nThe reason it's lower in this workflow is just because I didn't want you to wait too long to get an initial video.\n\nTo get image to video just plug in a start image. For text to video just don't give it a start image." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 55, + "type": "Wan22ImageToVideoLatent", + "pos": [ + 420, + 610 + ], + "size": [ + 271.9126892089844, + 150 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 105 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": 106 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 104 + ] + } + ], + "properties": { + "Node name for S&R": "Wan22ImageToVideoLatent" + }, + "widgets_values": [ + 1280, + 704, + 41, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 46 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cute anime girl with fennec ears and a fluffy tail walking in a beautiful field" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 46, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 52, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 37, + 0, + 48, + 0, + "MODEL" + ], + [ + 95, + 48, + 0, + 3, + 0, + "MODEL" + ], + [ + 104, + 55, + 0, + 3, + 3, + "LATENT" + ], + [ + 105, + 39, + 0, + 55, + 0, + "VAE" + ], + [ + 106, + 57, + 0, + 55, + 1, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425287, + "offset": [ + 3.5210927484772534, + -9.231468990407302 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/image_to_video_wan22_5B.webp b/wan22/image_to_video_wan22_5B.webp new file mode 100644 index 0000000..20281aa Binary files /dev/null and b/wan22/image_to_video_wan22_5B.webp differ diff --git a/wan22/text_to_video_wan22_14B.json b/wan22/text_to_video_wan22_14B.json new file mode 100644 index 0000000..2dc3ca0 --- /dev/null +++ b/wan22/text_to_video_wan22_14B.json @@ -0,0 +1,759 @@ +{ + "id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", + "revision": 0, + "last_node_id": 61, + "last_link_id": 131, + "nodes": [ + { + "id": 54, + "type": "ModelSamplingSD3", + "pos": [ + 486.4836120605469, + -69.28914642333984 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 110 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 125 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 58, + "type": "KSamplerAdvanced", + "pos": [ + 1262.509765625, + -26.73247528076172 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 123 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 128 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 130 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 113 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 124 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "disable", + 0, + "fixed", + 20, + 3.5, + "euler", + "simple", + 10, + 10000, + "disable" + ] + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 30, + 190 + ], + "size": [ + 360, + 106 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 30, + 340 + ], + "size": [ + 360, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan_2.1_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 59, + "type": "Note", + "pos": [ + -202.05557250976562, + -57.859466552734375 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses a different diffusion model for the first steps (high noise) vs the last steps (low noise).\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 60, + "type": "Note", + "pos": [ + -200, + 340 + ], + "size": [ + 210, + 159.49227905273438 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "This model uses the wan 2.1 VAE.\n\n\n" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1590, + -20 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 124 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1820, + -20 + ], + "size": [ + 674.6224975585938, + 820.6224975585938 + ], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 16, + false, + 80, + "default" + ] + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2530, + -20 + ], + "size": [ + 763.67041015625, + 885.67041015625 + ], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 16.000000000000004, + 13.3333740234375 + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 129, + 130 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 56, + "type": "UNETLoader", + "pos": [ + 30, + 60 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 112 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 55, + "type": "ModelSamplingSD3", + "pos": [ + 484.0019836425781, + 54.46213912963867 + ], + "size": [ + 315, + 58 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 112 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 123 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 30, + -70 + ], + "size": [ + 430, + 82 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 110 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 57, + "type": "KSamplerAdvanced", + "pos": [ + 893.0060424804688, + -29.923471450805664 + ], + "size": [ + 304.748046875, + 334 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 125 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 127 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 129 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 131 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 113 + ] + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "enable", + 738226772790037, + "randomize", + 20, + 3.5, + "euler", + "simple", + 0, + 10, + "enable" + ] + }, + { + "id": 61, + "type": "EmptyHunyuanLatentVideo", + "pos": [ + 560, + 620 + ], + "size": [ + 270.0943298339844, + 130 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 131 + ] + } + ], + "properties": { + "Node name for S&R": "EmptyHunyuanLatentVideo" + }, + "widgets_values": [ + 1280, + 704, + 57, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 127, + 128 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a robot is running through a futuristic cyberpunk city with neon signs and darkness with bright HDR lights" + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 110, + 37, + 0, + 54, + 0, + "MODEL" + ], + [ + 112, + 56, + 0, + 55, + 0, + "MODEL" + ], + [ + 113, + 57, + 0, + 58, + 3, + "LATENT" + ], + [ + 123, + 55, + 0, + 58, + 0, + "MODEL" + ], + [ + 124, + 58, + 0, + 8, + 0, + "LATENT" + ], + [ + 125, + 54, + 0, + 57, + 0, + "MODEL" + ], + [ + 127, + 6, + 0, + 57, + 1, + "CONDITIONING" + ], + [ + 128, + 6, + 0, + 58, + 1, + "CONDITIONING" + ], + [ + 129, + 7, + 0, + 57, + 2, + "CONDITIONING" + ], + [ + 130, + 7, + 0, + 58, + 2, + "CONDITIONING" + ], + [ + 131, + 61, + 0, + 57, + 3, + "LATENT" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.1167815779425305, + "offset": [ + 242.9977455078102, + 122.98065462666187 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/text_to_video_wan22_14B.webp b/wan22/text_to_video_wan22_14B.webp new file mode 100644 index 0000000..4d3f6d6 Binary files /dev/null and b/wan22/text_to_video_wan22_14B.webp differ diff --git a/wan22/text_to_video_wan22_5B.json b/wan22/text_to_video_wan22_5B.json new file mode 100644 index 0000000..25dc251 --- /dev/null +++ b/wan22/text_to_video_wan22_5B.json @@ -0,0 +1,579 @@ +{ + "id": "91f6bbe2-ed41-4fd6-bac7-71d5b5864ecb", + "revision": 0, + "last_node_id": 57, + "last_link_id": 106, + "nodes": [ + { + "id": 8, + "type": "VAEDecode", + "pos": [ + 1210, + 190 + ], + "size": [ + 210, + 46 + ], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "name": "vae", + "type": "VAE", + "link": 76 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 56, + 93 + ] + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + }, + "widgets_values": [] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": [ + 425.27801513671875, + 180.6060791015625 + ], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 75 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 52 + ] + } + ], + "title": "CLIP Text Encode (Negative Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 863, + 187 + ], + "size": [ + 315, + 262 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 95 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 46 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 52 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 104 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 285741127119524, + "randomize", + 30, + 5, + "uni_pc", + "simple", + 1 + ] + }, + { + "id": 39, + "type": "VAELoader", + "pos": [ + 20, + 340 + ], + "size": [ + 330, + 60 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 76, + 105 + ] + } + ], + "properties": { + "Node name for S&R": "VAELoader" + }, + "widgets_values": [ + "wan2.2_vae.safetensors" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 38, + "type": "CLIPLoader", + "pos": [ + 20, + 190 + ], + "size": [ + 380, + 106 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 74, + 75 + ] + } + ], + "properties": { + "Node name for S&R": "CLIPLoader" + }, + "widgets_values": [ + "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "wan", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 48, + "type": "ModelSamplingSD3", + "pos": [ + 440, + 60 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 94 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 95 + ] + } + ], + "properties": { + "Node name for S&R": "ModelSamplingSD3" + }, + "widgets_values": [ + 8.000000000000002 + ] + }, + { + "id": 37, + "type": "UNETLoader", + "pos": [ + 20, + 60 + ], + "size": [ + 346.7470703125, + 82 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 94 + ] + } + ], + "properties": { + "Node name for S&R": "UNETLoader" + }, + "widgets_values": [ + "wan2.2_ti2v_5B_fp16.safetensors", + "default" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 47, + "type": "SaveWEBM", + "pos": [ + 2367.213134765625, + 193.6114959716797 + ], + "size": [ + 670, + 650 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 93 + } + ], + "outputs": [], + "properties": { + "Node name for S&R": "SaveWEBM" + }, + "widgets_values": [ + "ComfyUI", + "vp9", + 24, + 16.111083984375 + ] + }, + { + "id": 56, + "type": "Note", + "pos": [ + 710.781005859375, + 608.9545288085938 + ], + "size": [ + 320.9936218261719, + 182.6057586669922 + ], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Optimal resolution is: 1280x704 length 121\n\nThe reason it's lower in this workflow is just because I didn't want you to wait too long to get an initial video.\n\nTo get image to video just plug in a start image. For text to video just don't give it a start image." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 55, + "type": "Wan22ImageToVideoLatent", + "pos": [ + 420, + 610 + ], + "size": [ + 271.9126892089844, + 150 + ], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 105 + }, + { + "name": "start_image", + "shape": 7, + "type": "IMAGE", + "link": null + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 104 + ] + } + ], + "properties": { + "Node name for S&R": "Wan22ImageToVideoLatent" + }, + "widgets_values": [ + 1280, + 704, + 41, + 1 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": [ + 422.84503173828125, + 164.31304931640625 + ], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 74 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 46 + ] + } + ], + "title": "CLIP Text Encode (Positive Prompt)", + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "drone shot of a volcano erupting with a fox walking on it" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 28, + "type": "SaveAnimatedWEBP", + "pos": [ + 1460, + 190 + ], + "size": [ + 870.8511352539062, + 648.4141235351562 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 56 + } + ], + "outputs": [], + "properties": {}, + "widgets_values": [ + "ComfyUI", + 24.000000000000004, + false, + 80, + "default" + ] + } + ], + "links": [ + [ + 35, + 3, + 0, + 8, + 0, + "LATENT" + ], + [ + 46, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 52, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 56, + 8, + 0, + 28, + 0, + "IMAGE" + ], + [ + 74, + 38, + 0, + 6, + 0, + "CLIP" + ], + [ + 75, + 38, + 0, + 7, + 0, + "CLIP" + ], + [ + 76, + 39, + 0, + 8, + 1, + "VAE" + ], + [ + 93, + 8, + 0, + 47, + 0, + "IMAGE" + ], + [ + 94, + 37, + 0, + 48, + 0, + "MODEL" + ], + [ + 95, + 48, + 0, + 3, + 0, + "MODEL" + ], + [ + 104, + 55, + 0, + 3, + 3, + "LATENT" + ], + [ + 105, + 39, + 0, + 55, + 0, + "VAE" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 1.11678157794253, + "offset": [ + 7.041966347099882, + -19.733042401058505 + ] + }, + "frontendVersion": "1.23.4" + }, + "version": 0.4 +} \ No newline at end of file diff --git a/wan22/text_to_video_wan22_5B.webp b/wan22/text_to_video_wan22_5B.webp new file mode 100644 index 0000000..eafce22 Binary files /dev/null and b/wan22/text_to_video_wan22_5B.webp differ