Spaces:

jbilcke-hf
/

VideoModelStudio

Running

App Files Files Community

jbilcke-hf HF Staff commited on Apr 16

Commit

2ba9257

1 Parent(s): 267b599

improve UI to support conditioning

Browse files

Files changed (10) hide show

CLAUDE.md +17 -9
degraded_requirements.txt +8 -0
vms/config.py +73 -1
vms/ui/models/tabs/drafts_tab.py +2 -2
vms/ui/models/tabs/trained_tab.py +9 -9
vms/ui/models/tabs/training_tab.py +4 -5
vms/ui/project/services/training.py +58 -5
vms/ui/project/tabs/caption_tab.py +3 -3
vms/ui/project/tabs/manage_tab.py +2 -2
vms/ui/project/tabs/train_tab.py +265 -17

CLAUDE.md CHANGED Viewed

@@ -1,18 +1,26 @@
 # Video Model Studio - Guidelines for Claude
 ## Build & Run Commands
-- Setup: `./setup.sh` (with flash attention) or `./setup_no_captions.sh` (without)
-- Run: `./run.sh` or `python3.10 app.py`
-- Test: `python3 tests/test_dataset.py`
-- Single model test: `bash tests/scripts/dummy_cogvideox_lora.sh`
 ## Code Style
 - Python version: 3.10 (required for flash-attention compatibility)
-- Type hints: Use typing module annotations for all functions
-- Docstrings: Google style with Args/Returns sections
-- Error handling: Use try/except with specific exceptions, log errors
 - Imports: Group standard lib, third-party, and project imports
 - Naming: snake_case for functions/variables, PascalCase for classes
 - Use Path objects from pathlib instead of string paths
-- Format utility functions: Extract reusable logic to separate functions
-- Environment variables: Use parse_bool_env for boolean env vars

+# CLAUDE.md
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 # Video Model Studio - Guidelines for Claude
 ## Build & Run Commands
+- Setup: `./setup.sh` (with flash attention) or `./degraded_setup.sh` (without flash-attention)
+- Run: `./run.sh` or `python3.10 app.py`
+- Test:
+  - Full test: `python3 tests/test_dataset.py`
+  - Single model test: `bash tests/scripts/dummy_cogvideox_lora.sh` (or other model variants)
+- Run test suite: `bash tests/test_model_runs_minimally_lora.sh`
 ## Code Style
 - Python version: 3.10 (required for flash-attention compatibility)
+- Type hints: Use typing module annotations for all functions (from typing import Any, Optional, Dict, List, Union, Tuple)
+- Docstrings: Google style with Args/Returns sections for all functions
+- Error handling: Use try/except with specific exceptions, log errors appropriately
 - Imports: Group standard lib, third-party, and project imports
 - Naming: snake_case for functions/variables, PascalCase for classes
 - Use Path objects from pathlib instead of string paths
+- Extract reusable logic to separate utility functions
+- Environment variables: Use parse_bool_env for boolean env vars
+- Logging: Use the logging module with appropriate log levels (DEBUG, INFO, WARNING, ERROR)
+- UI components: Organize in tabs and use consistent naming for components dict

degraded_requirements.txt CHANGED Viewed

@@ -9,8 +9,16 @@ diffusers @ git+https://github.com/huggingface/diffusers.git@main
 imageio
 imageio-ffmpeg
 #flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 # for youtube video download
 pytube
 pytubefix

 imageio
 imageio-ffmpeg
+#--------------- MACOS HACKS ----------------
+# use eva-decord for better compatiblity on macOS
+eva-decord
+# don't install flash attention on macOS
 #flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+#--------------- / MACOS HACKS --------------
 # for youtube video download
 pytube
 pytubefix

vms/config.py CHANGED Viewed

@@ -205,7 +205,9 @@ MODEL_TYPES = {
 # Training types
 TRAINING_TYPES = {
     "LoRA Finetune": "lora",
-    "Full Finetune": "full-finetune"
 }
 # Model versions for each model type
@@ -288,6 +290,13 @@ DEFAULT_NB_LR_WARMUP_STEPS = math.ceil(0.20 * DEFAULT_NB_TRAINING_STEPS)  # 20%
 # Whether to automatically restart a training job after a server reboot or not
 DEFAULT_AUTO_RESUME = False
 # For validation
 DEFAULT_VALIDATION_NB_STEPS = 50
 DEFAULT_VALIDATION_HEIGHT = 512
@@ -468,6 +477,69 @@ TRAINING_PRESETS = {
         "num_gpus": DEFAULT_NUM_GPUS,
         "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
         "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
     }
 }

 # Training types
 TRAINING_TYPES = {
     "LoRA Finetune": "lora",
+    "Full Finetune": "full-finetune",
+    "Control LoRA": "control-lora",
+    "Control Full Finetune": "control-full-finetune"
 }
 # Model versions for each model type
 # Whether to automatically restart a training job after a server reboot or not
 DEFAULT_AUTO_RESUME = False
+# Control training defaults
+DEFAULT_CONTROL_TYPE = "canny"
+DEFAULT_TRAIN_QK_NORM = False
+DEFAULT_FRAME_CONDITIONING_TYPE = "full"
+DEFAULT_FRAME_CONDITIONING_INDEX = 0
+DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK = False
 # For validation
 DEFAULT_VALIDATION_NB_STEPS = 50
 DEFAULT_VALIDATION_HEIGHT = 512
         "num_gpus": DEFAULT_NUM_GPUS,
         "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
         "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
+    },
+    "Wan-2.1-I2V (Control LoRA)": {
+        "model_type": "wan",
+        "training_type": "control-lora",
+        "lora_rank": "32",
+        "lora_alpha": "32",
+        "train_steps": DEFAULT_NB_TRAINING_STEPS,
+        "batch_size": DEFAULT_BATCH_SIZE,
+        "learning_rate": 5e-5,
+        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
+        "training_buckets": SMALL_TRAINING_BUCKETS,
+        "flow_weighting_scheme": "logit_normal",
+        "num_gpus": DEFAULT_NUM_GPUS,
+        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
+        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
+        "control_type": "custom",
+        "train_qk_norm": True,
+        "frame_conditioning_type": "index",
+        "frame_conditioning_index": 0,
+        "frame_conditioning_concatenate_mask": True,
+        "description": "Image-conditioned video generation with LoRA adapters"
+    },
+    "LTX-Video (Control LoRA)": {
+        "model_type": "ltx_video",
+        "training_type": "control-lora",
+        "lora_rank": "128",
+        "lora_alpha": "128",
+        "train_steps": DEFAULT_NB_TRAINING_STEPS,
+        "batch_size": DEFAULT_BATCH_SIZE,
+        "learning_rate": DEFAULT_LEARNING_RATE,
+        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
+        "training_buckets": SMALL_TRAINING_BUCKETS,
+        "flow_weighting_scheme": "logit_normal",
+        "num_gpus": DEFAULT_NUM_GPUS,
+        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
+        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
+        "control_type": "custom",
+        "train_qk_norm": True,
+        "frame_conditioning_type": "index",
+        "frame_conditioning_index": 0,
+        "frame_conditioning_concatenate_mask": True,
+        "description": "Image-conditioned video generation with LoRA adapters"
+    },
+    "HunyuanVideo (Control LoRA)": {
+        "model_type": "hunyuan_video",
+        "training_type": "control-lora",
+        "lora_rank": "128",
+        "lora_alpha": "128",
+        "train_steps": DEFAULT_NB_TRAINING_STEPS,
+        "batch_size": DEFAULT_BATCH_SIZE,
+        "learning_rate": 2e-5,
+        "save_iterations": DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
+        "training_buckets": SMALL_TRAINING_BUCKETS,
+        "flow_weighting_scheme": "none",
+        "num_gpus": DEFAULT_NUM_GPUS,
+        "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
+        "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
+        "control_type": "custom",
+        "train_qk_norm": True,
+        "frame_conditioning_type": "index",
+        "frame_conditioning_index": 0,
+        "frame_conditioning_concatenate_mask": True,
+        "description": "Image-conditioned video generation with HunyuanVideo and LoRA adapters"
     }
 }

vms/ui/models/tabs/drafts_tab.py CHANGED Viewed

@@ -88,7 +88,7 @@ class DraftsTab(BaseTab):
                                     edit_btn.click(
                                         fn=lambda model_id=model.id: self.edit_model(model_id),
                                         inputs=[],
-                                        outputs=[]
                                     )
                                 with gr.Column(scale=1, min_width=10):
                                     delete_btn = gr.Button("🗑️ Delete", size="sm", variant="stop")
@@ -107,7 +107,7 @@ class DraftsTab(BaseTab):
             # Switch to project view with this model
             self.app.switch_project(model_id)
             # Set main tab to Project (index 0)
-            self.app.switch_to_tab(0)
     def delete_model(self, model_id: str) -> gr.Column:
         """Delete a model and refresh the list"""

                                     edit_btn.click(
                                         fn=lambda model_id=model.id: self.edit_model(model_id),
                                         inputs=[],
+                                        outputs=[self.app.main_tabs]
                                     )
                                 with gr.Column(scale=1, min_width=10):
                                     delete_btn = gr.Button("🗑️ Delete", size="sm", variant="stop")
             # Switch to project view with this model
             self.app.switch_project(model_id)
             # Set main tab to Project (index 0)
+            return self.app.main_tabs.update(selected=0)
     def delete_model(self, model_id: str) -> gr.Column:
         """Delete a model and refresh the list"""

vms/ui/models/tabs/trained_tab.py CHANGED Viewed

@@ -94,7 +94,7 @@ class TrainedTab(BaseTab):
                                 preview_btn.click(
                                     fn=lambda model_id=model.id: self.preview_model(model_id),
                                     inputs=[],
-                                    outputs=[]
                                 )
                                 download_btn.click(
@@ -106,7 +106,7 @@ class TrainedTab(BaseTab):
                                 publish_btn.click(
                                     fn=lambda model_id=model.id: self.publish_model(model_id),
                                     inputs=[],
-                                    outputs=[]
                                 )
                                 delete_btn.click(
@@ -117,28 +117,28 @@ class TrainedTab(BaseTab):
         return new_container
-    def preview_model(self, model_id: str) -> None:
         """Open model preview"""
         if self.app:
             # Switch to project view with this model
             self.app.switch_project(model_id)
             # Set main tab to Project (index 0)
-            self.app.switch_to_tab(0)
-            # Navigate to preview tab
-            # TODO: Implement proper tab navigation
     def download_model(self, model_id: str) -> None:
         """Download model weights"""
         # TODO: Implement file download
         gr.Info(f"Download for model {model_id[:8]}... is not yet implemented")
-    def publish_model(self, model_id: str) -> None:
         """Publish model to Hugging Face Hub"""
         if self.app:
             # Switch to the selected model project
             self.app.switch_project(model_id)
-            # Navigate to publish tab (typically in Manage tab)
-            # TODO: Implement proper tab navigation
     def delete_model(self, model_id: str) -> gr.Column:
         """Delete a model and refresh the list"""

                                 preview_btn.click(
                                     fn=lambda model_id=model.id: self.preview_model(model_id),
                                     inputs=[],
+                                    outputs=[self.app.main_tabs]
                                 )
                                 download_btn.click(
                                 publish_btn.click(
                                     fn=lambda model_id=model.id: self.publish_model(model_id),
                                     inputs=[],
+                                    outputs=[self.app.main_tabs]
                                 )
                                 delete_btn.click(
         return new_container
+    def preview_model(self, model_id: str) -> gr.Tabs:
         """Open model preview"""
         if self.app:
             # Switch to project view with this model
             self.app.switch_project(model_id)
             # Set main tab to Project (index 0)
+            return self.app.main_tabs.update(selected=0)
+            # TODO: Navigate to preview tab
     def download_model(self, model_id: str) -> None:
         """Download model weights"""
         # TODO: Implement file download
         gr.Info(f"Download for model {model_id[:8]}... is not yet implemented")
+    def publish_model(self, model_id: str) -> gr.Tabs:
         """Publish model to Hugging Face Hub"""
         if self.app:
             # Switch to the selected model project
             self.app.switch_project(model_id)
+            # Navigate to the main project tab
+            return self.app.main_tabs.update(selected=0)
+            # TODO: Navigate to publish tab
     def delete_model(self, model_id: str) -> gr.Column:
         """Delete a model and refresh the list"""

vms/ui/models/tabs/training_tab.py CHANGED Viewed

@@ -109,7 +109,7 @@ class TrainingTab(BaseTab):
                                 preview_btn.click(
                                     fn=lambda model_id=model.id: self.preview_model(model_id),
                                     inputs=[],
-                                    outputs=[]
                                 )
                                 download_btn.click(
@@ -147,15 +147,14 @@ class TrainingTab(BaseTab):
         # Refresh the list
         return self.refresh_models()
-    def preview_model(self, model_id: str) -> None:
         """Open model preview"""
         if self.app:
             # Switch to project view with this model
             self.app.switch_project(model_id)
             # Set main tab to Project (index 0)
-            self.app.switch_to_tab(0)
-            # Switch to preview tab (index 3)
-            # TODO: Implement proper tab navigation
     def download_model(self, model_id: str) -> None:
         """Download model weights"""

                                 preview_btn.click(
                                     fn=lambda model_id=model.id: self.preview_model(model_id),
                                     inputs=[],
+                                    outputs=[self.app.main_tabs]
                                 )
                                 download_btn.click(
         # Refresh the list
         return self.refresh_models()
+    def preview_model(self, model_id: str) -> gr.Tabs:
         """Open model preview"""
         if self.app:
             # Switch to project view with this model
             self.app.switch_project(model_id)
             # Set main tab to Project (index 0)
+            return self.app.main_tabs.update(selected=0)
+            # TODO: Navigate to preview tab
     def download_model(self, model_id: str) -> None:
         """Download model weights"""

vms/ui/project/services/training.py CHANGED Viewed

@@ -40,6 +40,9 @@ from vms.config import (
     DEFAULT_NB_TRAINING_STEPS,
     DEFAULT_NB_LR_WARMUP_STEPS,
     DEFAULT_AUTO_RESUME,
     generate_model_project_id
 )
 from vms.utils import (
@@ -229,7 +232,13 @@ class TrainingService:
             "num_gpus": DEFAULT_NUM_GPUS,
             "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
             "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
-            "auto_resume": DEFAULT_AUTO_RESUME
         }
         return default_state
@@ -756,9 +765,37 @@ class TrainingService:
             config.data_root = str(dataset_config_file)
             # Update LoRA parameters if using LoRA training type
-            if training_type == "lora":
                 config.lora_rank = int(lora_rank)
                 config.lora_alpha = int(lora_alpha)
             # Update with resume_from_checkpoint if provided
             if resume_from_checkpoint:
@@ -882,8 +919,11 @@ class TrainingService:
             with open(self.app.output_pid_file, 'w') as f:
                 f.write(str(process.pid))
-            # Save session info including repo_id for later hub upload
-            self.save_session({
                 "model_type": model_type,
                 "model_version": model_version,
                 "training_type": training_type,
@@ -898,7 +938,20 @@ class TrainingService:
                 "lr_warmup_steps": lr_warmup_steps,
                 "repo_id": repo_id,
                 "start_time": datetime.now().isoformat()
-            })
             # Update initial training status
             total_steps = int(train_steps)

     DEFAULT_NB_TRAINING_STEPS,
     DEFAULT_NB_LR_WARMUP_STEPS,
     DEFAULT_AUTO_RESUME,
+    DEFAULT_CONTROL_TYPE, DEFAULT_TRAIN_QK_NORM,
+    DEFAULT_FRAME_CONDITIONING_TYPE, DEFAULT_FRAME_CONDITIONING_INDEX,
+    DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK,
     generate_model_project_id
 )
 from vms.utils import (
             "num_gpus": DEFAULT_NUM_GPUS,
             "precomputation_items": DEFAULT_PRECOMPUTATION_ITEMS,
             "lr_warmup_steps": DEFAULT_NB_LR_WARMUP_STEPS,
+            "auto_resume": DEFAULT_AUTO_RESUME,
+            # Control parameters
+            "control_type": DEFAULT_CONTROL_TYPE,
+            "train_qk_norm": DEFAULT_TRAIN_QK_NORM,
+            "frame_conditioning_type": DEFAULT_FRAME_CONDITIONING_TYPE,
+            "frame_conditioning_index": DEFAULT_FRAME_CONDITIONING_INDEX,
+            "frame_conditioning_concatenate_mask": DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK
         }
         return default_state
             config.data_root = str(dataset_config_file)
             # Update LoRA parameters if using LoRA training type
+            if training_type == "lora" or training_type == "control-lora":
                 config.lora_rank = int(lora_rank)
                 config.lora_alpha = int(lora_alpha)
+            # Update Control parameters if using control training types
+            if training_type in ["control-lora", "control-full-finetune"]:
+                # Get control parameters from UI state
+                current_state = self.load_ui_state()
+                # Add control-specific parameters
+                control_type = current_state.get("control_type", DEFAULT_CONTROL_TYPE)
+                train_qk_norm = current_state.get("train_qk_norm", DEFAULT_TRAIN_QK_NORM)
+                frame_conditioning_type = current_state.get("frame_conditioning_type", DEFAULT_FRAME_CONDITIONING_TYPE)
+                frame_conditioning_index = current_state.get("frame_conditioning_index", DEFAULT_FRAME_CONDITIONING_INDEX)
+                frame_conditioning_concatenate_mask = current_state.get("frame_conditioning_concatenate_mask", DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK)
+                # Map boolean from UI state to command line args
+                config_args.extend([
+                    "--control_type", control_type,
+                ])
+                if train_qk_norm:
+                    config_args.append("--train_qk_norm")
+                config_args.extend([
+                    "--frame_conditioning_type", frame_conditioning_type,
+                    "--frame_conditioning_index", str(frame_conditioning_index)
+                ])
+                if frame_conditioning_concatenate_mask:
+                    config_args.append("--frame_conditioning_concatenate_mask")
             # Update with resume_from_checkpoint if provided
             if resume_from_checkpoint:
             with open(self.app.output_pid_file, 'w') as f:
                 f.write(str(process.pid))
+            # Get current UI state for all parameters
+            current_state = self.load_ui_state()
+            # Build session data
+            session_data = {
                 "model_type": model_type,
                 "model_version": model_version,
                 "training_type": training_type,
                 "lr_warmup_steps": lr_warmup_steps,
                 "repo_id": repo_id,
                 "start_time": datetime.now().isoformat()
+            }
+            # Add control parameters if relevant
+            if training_type in ["control-lora", "control-full-finetune"]:
+                session_data.update({
+                    "control_type": current_state.get("control_type", DEFAULT_CONTROL_TYPE),
+                    "train_qk_norm": current_state.get("train_qk_norm", DEFAULT_TRAIN_QK_NORM),
+                    "frame_conditioning_type": current_state.get("frame_conditioning_type", DEFAULT_FRAME_CONDITIONING_TYPE),
+                    "frame_conditioning_index": current_state.get("frame_conditioning_index", DEFAULT_FRAME_CONDITIONING_INDEX),
+                    "frame_conditioning_concatenate_mask": current_state.get("frame_conditioning_concatenate_mask", DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK)
+                })
+            # Save session
+            self.save_session(session_data)
             # Update initial training status
             total_steps = int(train_steps)

vms/ui/project/tabs/caption_tab.py CHANGED Viewed

@@ -11,7 +11,7 @@ from pathlib import Path
 import mimetypes
 from vms.utils import BaseTab, is_image_file, is_video_file, copy_files_to_training_dir
-from vms.config import DEFAULT_CAPTIONING_BOT_INSTRUCTIONS, DEFAULT_PROMPT_PREFIX, STAGING_PATH, TRAINING_VIDEOS_PATH, USE_LARGE_DATASET
 logger = logging.getLogger(__name__)
@@ -52,11 +52,11 @@ class CaptionTab(BaseTab):
                         )
                     with gr.Row():
                         self.components["run_autocaption_btn"] = gr.Button(
-                            "Automatically fill missing captions",
                             variant="primary"
                         )
                         self.components["copy_files_to_training_dir_btn"] = gr.Button(
-                            "Copy assets to training directory",
                             variant="primary"
                         )
                         self.components["stop_autocaption_btn"] = gr.Button(

 import mimetypes
 from vms.utils import BaseTab, is_image_file, is_video_file, copy_files_to_training_dir
+from vms.config import DEFAULT_CAPTIONING_BOT_INSTRUCTIONS, DEFAULT_PROMPT_PREFIX, STAGING_PATH, USE_LARGE_DATASET
 logger = logging.getLogger(__name__)
                         )
                     with gr.Row():
                         self.components["run_autocaption_btn"] = gr.Button(
+                            "Automatically caption data",
                             variant="primary"
                         )
                         self.components["copy_files_to_training_dir_btn"] = gr.Button(
+                            "Copy assets to training folder",
                             variant="primary"
                         )
                         self.components["stop_autocaption_btn"] = gr.Button(

vms/ui/project/tabs/manage_tab.py CHANGED Viewed

@@ -10,8 +10,8 @@ from typing import Dict, Any, List, Optional
 from vms.utils import BaseTab, validate_model_repo
 from vms.config import (
-    HF_API_TOKEN, VIDEOS_TO_SPLIT_PATH, STAGING_PATH, TRAINING_VIDEOS_PATH,
-    TRAINING_PATH, MODEL_PATH, OUTPUT_PATH, LOG_FILE_PATH, USE_LARGE_DATASET
 )
 logger = logging.getLogger(__name__)

 from vms.utils import BaseTab, validate_model_repo
 from vms.config import (
+    HF_API_TOKEN, VIDEOS_TO_SPLIT_PATH, STAGING_PATH,
+    USE_LARGE_DATASET
 )
 logger = logging.getLogger(__name__)

vms/ui/project/tabs/train_tab.py CHANGED Viewed

@@ -26,7 +26,10 @@ from vms.config import (
     DEFAULT_PRECOMPUTATION_ITEMS,
     DEFAULT_NB_TRAINING_STEPS,
     DEFAULT_NB_LR_WARMUP_STEPS,
-    DEFAULT_AUTO_RESUME
 )
 logger = logging.getLogger(__name__)
@@ -116,18 +119,165 @@ class TrainTab(BaseTab):
                     # LoRA specific parameters (will show/hide based on training type)
                     with gr.Row(visible=True) as lora_params_row:
                         self.components["lora_params_row"] = lora_params_row
-                        self.components["lora_rank"] = gr.Dropdown(
-                            label="LoRA Rank",
-                            choices=["16", "32", "64", "128", "256", "512", "1024"],
-                            value=DEFAULT_LORA_RANK_STR,
-                            type="value"
-                        )
-                        self.components["lora_alpha"] = gr.Dropdown(
-                            label="LoRA Alpha",
-                            choices=["16", "32", "64", "128", "256", "512", "1024"],
-                            value=DEFAULT_LORA_ALPHA_STR,
-                            type="value"
-                        )
                     with gr.Row():
                         self.components["train_steps"] = gr.Number(
@@ -426,6 +576,37 @@ class TrainTab(BaseTab):
             inputs=[self.components["lora_alpha"]],
             outputs=[]
         )
         self.components["train_steps"].change(
             fn=lambda v: self.app.update_ui_state(train_steps=v),
@@ -470,11 +651,23 @@ class TrainTab(BaseTab):
                 self.components["save_iterations"],
                 self.components["preset_info"],
                 self.components["lora_params_row"],
                 self.components["num_gpus"],
                 self.components["precomputation_items"],
                 self.components["lr_warmup_steps"],
                 # Add model_version to the outputs
-                self.components["model_version"]
             ]
         )
@@ -702,11 +895,28 @@ class TrainTab(BaseTab):
         # Get model info text
         model_info = self.get_model_info(model_type, training_type)
         # Get default parameters for this model type and training type
         params = self.get_default_params(MODEL_TYPES.get(model_type), TRAINING_TYPES.get(training_type))
         # Check if LoRA params should be visible
-        show_lora_params = training_type == "LoRA Finetune"
         # Return updates for UI components
         return {
@@ -715,7 +925,12 @@ class TrainTab(BaseTab):
             self.components["batch_size"]: params["batch_size"],
             self.components["learning_rate"]: params["learning_rate"],
             self.components["save_iterations"]: params["save_iterations"],
-            self.components["lora_params_row"]: gr.Row(visible=show_lora_params)
         }
     def get_model_info(self, model_type: str, training_type: str) -> str:
@@ -729,6 +944,10 @@ class TrainTab(BaseTab):
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~18GB minimum\n- Default LoRA rank: 128 (~400 MB)"
             else:
                 return base_info + "\n- Required VRAM: ~48GB minimum\n- **Full finetune not recommended due to VRAM requirements**"
@@ -740,6 +959,10 @@ class TrainTab(BaseTab):
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~18GB minimum\n- Default LoRA rank: 128 (~400 MB)"
             else:
                 return base_info + "\n- Required VRAM: ~21GB minimum\n- Full model size: ~8GB"
@@ -751,6 +974,10 @@ class TrainTab(BaseTab):
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~16GB minimum\n- Default LoRA rank: 32 (~120 MB)"
             else:
                 return base_info + "\n- **Full finetune not recommended due to VRAM requirements**"
@@ -848,7 +1075,11 @@ class TrainTab(BaseTab):
         info_text = f"{description}{bucket_info}"
         # Check if LoRA params should be visible
-        show_lora_params = preset["training_type"] == "lora"
         # Use preset defaults but preserve user-modified values if they exist
         lora_rank_val = current_state.get("lora_rank") if current_state.get("lora_rank") != preset.get("lora_rank", DEFAULT_LORA_RANK_STR) else preset.get("lora_rank", DEFAULT_LORA_RANK_STR)
@@ -861,6 +1092,13 @@ class TrainTab(BaseTab):
         precomputation_items_val = current_state.get("precomputation_items") if current_state.get("precomputation_items") != preset.get("precomputation_items", DEFAULT_PRECOMPUTATION_ITEMS) else preset.get("precomputation_items", DEFAULT_PRECOMPUTATION_ITEMS)
         lr_warmup_steps_val = current_state.get("lr_warmup_steps") if current_state.get("lr_warmup_steps") != preset.get("lr_warmup_steps", DEFAULT_NB_LR_WARMUP_STEPS) else preset.get("lr_warmup_steps", DEFAULT_NB_LR_WARMUP_STEPS)
         # Get the appropriate model version for the selected model type
         model_versions = self.get_model_version_choices(model_display_name)
         default_model_version = self.get_default_model_version(model_display_name)
@@ -896,6 +1134,16 @@ class TrainTab(BaseTab):
             precomputation_items_val,
             lr_warmup_steps_val,
             model_version_update,
         )

     DEFAULT_PRECOMPUTATION_ITEMS,
     DEFAULT_NB_TRAINING_STEPS,
     DEFAULT_NB_LR_WARMUP_STEPS,
+    DEFAULT_AUTO_RESUME,
+    DEFAULT_CONTROL_TYPE, DEFAULT_TRAIN_QK_NORM,
+    DEFAULT_FRAME_CONDITIONING_TYPE, DEFAULT_FRAME_CONDITIONING_INDEX,
+    DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK
 )
 logger = logging.getLogger(__name__)
                     # LoRA specific parameters (will show/hide based on training type)
                     with gr.Row(visible=True) as lora_params_row:
                         self.components["lora_params_row"] = lora_params_row
+                        with gr.Column():
+                            gr.Markdown("""
+                            ## 🔄 LoRA Training Parameters
+                            LoRA (Low-Rank Adaptation) trains small adapter matrices instead of the full model, requiring much less memory while still achieving great results.
+                            """)
+                    # Second row for actual LoRA parameters
+                    with gr.Row(visible=True) as lora_settings_row:
+                        self.components["lora_settings_row"] = lora_settings_row
+                        with gr.Column():
+                            self.components["lora_rank"] = gr.Dropdown(
+                                label="LoRA Rank",
+                                choices=["16", "32", "64", "128", "256", "512", "1024"],
+                                value=DEFAULT_LORA_RANK_STR,
+                                type="value",
+                                info="Controls the size and expressiveness of LoRA adapters. Higher values = better quality but larger file size"
+                            )
+                            with gr.Accordion("What is LoRA Rank?", open=False):
+                                gr.Markdown("""
+                                **LoRA Rank** determines the complexity of the LoRA adapters:
+                                - **Lower rank (16-32)**: Smaller file size, faster training, but less expressive
+                                - **Medium rank (64-128)**: Good balance between quality and file size
+                                - **Higher rank (256-1024)**: More expressive adapters, better quality but larger file size
+                                Think of rank as the "capacity" of your adapter. Higher ranks can learn more complex modifications to the base model but require more VRAM during training and result in larger files.
+                                **Quick guide:**
+                                - For Wan models: Use 32-64 (Wan models work well with lower ranks)
+                                - For LTX-Video: Use 128-256
+                                - For Hunyuan Video: Use 128
+                                """)
+                        with gr.Column():
+                            self.components["lora_alpha"] = gr.Dropdown(
+                                label="LoRA Alpha",
+                                choices=["16", "32", "64", "128", "256", "512", "1024"],
+                                value=DEFAULT_LORA_ALPHA_STR,
+                                type="value",
+                                info="Controls the effective learning rate scaling of LoRA adapters. Usually set to same value as rank"
+                            )
+                            with gr.Accordion("What is LoRA Alpha?", open=False):
+                                gr.Markdown("""
+                                **LoRA Alpha** controls the effective scale of the LoRA updates:
+                                - The actual scaling factor is calculated as `alpha ÷ rank`
+                                - Usually set to match the rank value (alpha = rank)
+                                - Higher alpha = stronger effect from the adapters
+                                - Lower alpha = more subtle adapter influence
+                                **Best practice:**
+                                - For most cases, set alpha equal to rank
+                                - For more aggressive training, set alpha higher than rank
+                                - For more conservative training, set alpha lower than rank
+                                """)
+                    # Control specific parameters (will show/hide based on training type)
+                    with gr.Row(visible=False) as control_params_row:
+                        self.components["control_params_row"] = control_params_row
+                        with gr.Column():
+                            gr.Markdown("""
+                            ## 🖼️ Control Training Settings
+                            Control training enables **image-to-video generation** by teaching the model how to use an image as a guide for video creation.
+                            This is ideal for turning still images into dynamic videos while preserving composition, style, and content.
+                            """)
+                    # Second row for control parameters
+                    with gr.Row(visible=False) as control_settings_row:
+                        self.components["control_settings_row"] = control_settings_row
+                        with gr.Column():
+                            self.components["control_type"] = gr.Dropdown(
+                                label="Control Type",
+                                choices=["canny", "custom"],
+                                value=DEFAULT_CONTROL_TYPE,
+                                info="Type of control conditioning. 'canny' uses edge detection preprocessing, 'custom' allows direct image conditioning."
+                            )
+                            with gr.Accordion("What is Control Conditioning?", open=False):
+                                gr.Markdown("""
+                                **Control Conditioning** allows the model to be guided by an input image, adapting the video generation based on the image content. This is used for image-to-video generation where you want to turn an image into a moving video while maintaining its style, composition or content.
+                                - **canny**: Uses edge detection to extract outlines from images for structure-preserving video generation
+                                - **custom**: Direct image conditioning without preprocessing, preserving more image details
+                                """)
+                        with gr.Column():
+                            self.components["train_qk_norm"] = gr.Checkbox(
+                                label="Train QK Normalization Layers",
+                                value=DEFAULT_TRAIN_QK_NORM,
+                                info="Enable to train query-key normalization layers for better control signal integration"
+                            )
+                            with gr.Accordion("What is QK Normalization?", open=False):
+                                gr.Markdown("""
+                                **QK Normalization** refers to normalizing the query and key values in the attention mechanism of transformers.
+                                - When enabled, allows the model to better integrate control signals with content generation
+                                - Improves training stability for control models
+                                - Generally recommended for control training, especially with image conditioning
+                                """)
+                    with gr.Row(visible=False) as frame_conditioning_row:
+                        self.components["frame_conditioning_row"] = frame_conditioning_row
+                        with gr.Column():
+                            self.components["frame_conditioning_type"] = gr.Dropdown(
+                                label="Frame Conditioning Type",
+                                choices=["index", "prefix", "random", "first_and_last", "full"],
+                                value=DEFAULT_FRAME_CONDITIONING_TYPE,
+                                info="Determines which frames receive conditioning during training"
+                            )
+                            with gr.Accordion("Frame Conditioning Type Explanation", open=False):
+                                gr.Markdown("""
+                                **Frame Conditioning Types** determine which frames in the video receive image conditioning:
+                                - **index**: Only applies conditioning to a single frame at the specified index
+                                - **prefix**: Applies conditioning to all frames before a certain point
+                                - **random**: Randomly selects frames to receive conditioning during training
+                                - **first_and_last**: Only applies conditioning to the first and last frames
+                                - **full**: Applies conditioning to all frames in the video
+                                For image-to-video tasks, 'index' (usually with index 0) is most common as it conditions only the first frame.
+                                """)
+                        with gr.Column():
+                            self.components["frame_conditioning_index"] = gr.Number(
+                                label="Frame Conditioning Index",
+                                value=DEFAULT_FRAME_CONDITIONING_INDEX,
+                                precision=0,
+                                info="Specifies which frame receives conditioning when using 'index' type (0 = first frame)"
+                            )
+                    with gr.Row(visible=False) as control_options_row:
+                        self.components["control_options_row"] = control_options_row
+                        with gr.Column():
+                            self.components["frame_conditioning_concatenate_mask"] = gr.Checkbox(
+                                label="Concatenate Frame Mask",
+                                value=DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK,
+                                info="Enable to add frame mask information to the conditioning channels"
+                            )
+                            with gr.Accordion("What is Frame Mask Concatenation?", open=False):
+                                gr.Markdown("""
+                                **Frame Mask Concatenation** adds an additional channel to the control signal that indicates which frames are being conditioned:
+                                - Creates a binary mask (0/1) indicating which frames receive conditioning
+                                - Helps the model distinguish between conditioned and unconditioned frames
+                                - Particularly useful for 'index' conditioning where only select frames are conditioned
+                                - Generally improves temporal consistency between conditioned and unconditioned frames
+                                """)
+                        with gr.Column():
+                            # Empty column for layout balance
+                            pass
                     with gr.Row():
                         self.components["train_steps"] = gr.Number(
             inputs=[self.components["lora_alpha"]],
             outputs=[]
         )
+        # Control parameters change events
+        self.components["control_type"].change(
+            fn=lambda v: self.app.update_ui_state(control_type=v),
+            inputs=[self.components["control_type"]],
+            outputs=[]
+        )
+        self.components["train_qk_norm"].change(
+            fn=lambda v: self.app.update_ui_state(train_qk_norm=v),
+            inputs=[self.components["train_qk_norm"]],
+            outputs=[]
+        )
+        self.components["frame_conditioning_type"].change(
+            fn=lambda v: self.app.update_ui_state(frame_conditioning_type=v),
+            inputs=[self.components["frame_conditioning_type"]],
+            outputs=[]
+        )
+        self.components["frame_conditioning_index"].change(
+            fn=lambda v: self.app.update_ui_state(frame_conditioning_index=v),
+            inputs=[self.components["frame_conditioning_index"]],
+            outputs=[]
+        )
+        self.components["frame_conditioning_concatenate_mask"].change(
+            fn=lambda v: self.app.update_ui_state(frame_conditioning_concatenate_mask=v),
+            inputs=[self.components["frame_conditioning_concatenate_mask"]],
+            outputs=[]
+        )
         self.components["train_steps"].change(
             fn=lambda v: self.app.update_ui_state(train_steps=v),
                 self.components["save_iterations"],
                 self.components["preset_info"],
                 self.components["lora_params_row"],
+                self.components["lora_settings_row"],
                 self.components["num_gpus"],
                 self.components["precomputation_items"],
                 self.components["lr_warmup_steps"],
                 # Add model_version to the outputs
+                self.components["model_version"],
+                # Control parameters rows visibility
+                self.components["control_params_row"],
+                self.components["control_settings_row"],
+                self.components["frame_conditioning_row"],
+                self.components["control_options_row"],
+                # Control parameter values
+                self.components["control_type"],
+                self.components["train_qk_norm"],
+                self.components["frame_conditioning_type"],
+                self.components["frame_conditioning_index"],
+                self.components["frame_conditioning_concatenate_mask"],
             ]
         )
         # Get model info text
         model_info = self.get_model_info(model_type, training_type)
+        # Add general information about the selected training type
+        if training_type == "Full Finetune":
+            finetune_info = """
+            ## 🧠 Full Finetune Mode
+            Full finetune mode trains all parameters of the model, requiring more VRAM but potentially enabling higher quality results.
+            - Requires 20-50GB+ VRAM depending on model
+            - Creates a complete standalone model (~8GB+ file size)
+            - Recommended only for high-end GPUs (A100, H100, etc.)
+            - Not recommended for the larger models like Hunyuan Video on consumer hardware
+            """
+            model_info = finetune_info + "\n\n" + model_info
         # Get default parameters for this model type and training type
         params = self.get_default_params(MODEL_TYPES.get(model_type), TRAINING_TYPES.get(training_type))
         # Check if LoRA params should be visible
+        show_lora_params = training_type in ["LoRA Finetune", "Control LoRA"]
+        # Check if Control-specific params should be visible
+        show_control_params = training_type in ["Control LoRA", "Control Full Finetune"]
         # Return updates for UI components
         return {
             self.components["batch_size"]: params["batch_size"],
             self.components["learning_rate"]: params["learning_rate"],
             self.components["save_iterations"]: params["save_iterations"],
+            self.components["lora_params_row"]: gr.Row(visible=show_lora_params),
+            self.components["lora_settings_row"]: gr.Row(visible=show_lora_params),
+            self.components["control_params_row"]: gr.Row(visible=show_control_params),
+            self.components["control_settings_row"]: gr.Row(visible=show_control_params),
+            self.components["frame_conditioning_row"]: gr.Row(visible=show_control_params),
+            self.components["control_options_row"]: gr.Row(visible=show_control_params)
         }
     def get_model_info(self, model_type: str, training_type: str) -> str:
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~18GB minimum\n- Default LoRA rank: 128 (~400 MB)"
+            elif training_type == "Control LoRA":
+                return base_info + "\n- Required VRAM: ~20GB minimum\n- Default LoRA rank: 128 (~400 MB)\n- Supports image conditioning"
+            elif training_type == "Control Full Finetune":
+                return base_info + "\n- Required VRAM: ~50GB minimum\n- Supports image conditioning\n- **Not recommended due to VRAM requirements**"
             else:
                 return base_info + "\n- Required VRAM: ~48GB minimum\n- **Full finetune not recommended due to VRAM requirements**"
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~18GB minimum\n- Default LoRA rank: 128 (~400 MB)"
+            elif training_type == "Control LoRA":
+                return base_info + "\n- Required VRAM: ~20GB minimum\n- Default LoRA rank: 128 (~400 MB)\n- Supports image conditioning"
+            elif training_type == "Control Full Finetune":
+                return base_info + "\n- Required VRAM: ~23GB minimum\n- Supports image conditioning"
             else:
                 return base_info + "\n- Required VRAM: ~21GB minimum\n- Full model size: ~8GB"
             if training_type == "LoRA Finetune":
                 return base_info + "\n- Required VRAM: ~16GB minimum\n- Default LoRA rank: 32 (~120 MB)"
+            elif training_type == "Control LoRA":
+                return base_info + "\n- Required VRAM: ~18GB minimum\n- Default LoRA rank: 32 (~120 MB)\n- Supports image conditioning"
+            elif training_type == "Control Full Finetune":
+                return base_info + "\n- Required VRAM: ~40GB minimum\n- Supports image conditioning\n- **Not recommended due to VRAM requirements**"
             else:
                 return base_info + "\n- **Full finetune not recommended due to VRAM requirements**"
         info_text = f"{description}{bucket_info}"
         # Check if LoRA params should be visible
+        training_type_internal = preset["training_type"]
+        show_lora_params = training_type_internal == "lora" or training_type_internal == "control-lora"
+        # Check if Control params should be visible
+        show_control_params = training_type_internal == "control-lora" or training_type_internal == "control-full-finetune"
         # Use preset defaults but preserve user-modified values if they exist
         lora_rank_val = current_state.get("lora_rank") if current_state.get("lora_rank") != preset.get("lora_rank", DEFAULT_LORA_RANK_STR) else preset.get("lora_rank", DEFAULT_LORA_RANK_STR)
         precomputation_items_val = current_state.get("precomputation_items") if current_state.get("precomputation_items") != preset.get("precomputation_items", DEFAULT_PRECOMPUTATION_ITEMS) else preset.get("precomputation_items", DEFAULT_PRECOMPUTATION_ITEMS)
         lr_warmup_steps_val = current_state.get("lr_warmup_steps") if current_state.get("lr_warmup_steps") != preset.get("lr_warmup_steps", DEFAULT_NB_LR_WARMUP_STEPS) else preset.get("lr_warmup_steps", DEFAULT_NB_LR_WARMUP_STEPS)
+        # Control parameters
+        control_type_val = current_state.get("control_type") if current_state.get("control_type") != preset.get("control_type", DEFAULT_CONTROL_TYPE) else preset.get("control_type", DEFAULT_CONTROL_TYPE)
+        train_qk_norm_val = current_state.get("train_qk_norm") if current_state.get("train_qk_norm") != preset.get("train_qk_norm", DEFAULT_TRAIN_QK_NORM) else preset.get("train_qk_norm", DEFAULT_TRAIN_QK_NORM)
+        frame_conditioning_type_val = current_state.get("frame_conditioning_type") if current_state.get("frame_conditioning_type") != preset.get("frame_conditioning_type", DEFAULT_FRAME_CONDITIONING_TYPE) else preset.get("frame_conditioning_type", DEFAULT_FRAME_CONDITIONING_TYPE)
+        frame_conditioning_index_val = current_state.get("frame_conditioning_index") if current_state.get("frame_conditioning_index") != preset.get("frame_conditioning_index", DEFAULT_FRAME_CONDITIONING_INDEX) else preset.get("frame_conditioning_index", DEFAULT_FRAME_CONDITIONING_INDEX)
+        frame_conditioning_concatenate_mask_val = current_state.get("frame_conditioning_concatenate_mask") if current_state.get("frame_conditioning_concatenate_mask") != preset.get("frame_conditioning_concatenate_mask", DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK) else preset.get("frame_conditioning_concatenate_mask", DEFAULT_FRAME_CONDITIONING_CONCATENATE_MASK)
         # Get the appropriate model version for the selected model type
         model_versions = self.get_model_version_choices(model_display_name)
         default_model_version = self.get_default_model_version(model_display_name)
             precomputation_items_val,
             lr_warmup_steps_val,
             model_version_update,
+            # Control parameters rows visibility
+            gr.Row(visible=show_control_params),
+            gr.Row(visible=show_control_params),
+            gr.Row(visible=show_control_params),
+            # Control parameter values
+            control_type_val,
+            train_qk_norm_val,
+            frame_conditioning_type_val,
+            frame_conditioning_index_val,
+            frame_conditioning_concatenate_mask_val,
         )