Skip to content

Instantly share code, notes, and snippets.

@Jaid
Last active December 31, 2025 05:03
Show Gist options
  • Select an option

  • Save Jaid/85d2b63289c65168ba06e113b94857e0 to your computer and use it in GitHub Desktop.

Select an option

Save Jaid/85d2b63289c65168ba06e113b94857e0 to your computer and use it in GitHub Desktop.
Kohya-SS CLI help
usage: sdxl_train_network.py [-h] [--console_log_level {DEBUG,INFO,WARNING,ERROR,CRITICAL}]
[--console_log_file CONSOLE_LOG_FILE] [--console_log_simple] [--v2]
[--v_parameterization]
[--pretrained_model_name_or_path PRETRAINED_MODEL_NAME_OR_PATH]
[--tokenizer_cache_dir TOKENIZER_CACHE_DIR]
[--train_data_dir TRAIN_DATA_DIR] [--cache_info] [--shuffle_caption]
[--caption_separator CAPTION_SEPARATOR]
[--caption_extension CAPTION_EXTENSION]
[--caption_extention CAPTION_EXTENTION] [--keep_tokens KEEP_TOKENS]
[--keep_tokens_separator KEEP_TOKENS_SEPARATOR]
[--secondary_separator SECONDARY_SEPARATOR] [--enable_wildcard]
[--caption_prefix CAPTION_PREFIX] [--caption_suffix CAPTION_SUFFIX]
[--color_aug] [--flip_aug]
[--face_crop_aug_range FACE_CROP_AUG_RANGE] [--random_crop]
[--debug_dataset] [--resolution RESOLUTION] [--cache_latents]
[--vae_batch_size VAE_BATCH_SIZE] [--cache_latents_to_disk]
[--enable_bucket] [--min_bucket_reso MIN_BUCKET_RESO]
[--max_bucket_reso MAX_BUCKET_RESO]
[--bucket_reso_steps BUCKET_RESO_STEPS] [--bucket_no_upscale]
[--token_warmup_min TOKEN_WARMUP_MIN]
[--token_warmup_step TOKEN_WARMUP_STEP] [--alpha_mask]
[--dataset_class DATASET_CLASS]
[--caption_dropout_rate CAPTION_DROPOUT_RATE]
[--caption_dropout_every_n_epochs CAPTION_DROPOUT_EVERY_N_EPOCHS]
[--caption_tag_dropout_rate CAPTION_TAG_DROPOUT_RATE]
[--reg_data_dir REG_DATA_DIR] [--in_json IN_JSON]
[--dataset_repeats DATASET_REPEATS] [--output_dir OUTPUT_DIR]
[--output_name OUTPUT_NAME]
[--huggingface_repo_id HUGGINGFACE_REPO_ID]
[--huggingface_repo_type HUGGINGFACE_REPO_TYPE]
[--huggingface_path_in_repo HUGGINGFACE_PATH_IN_REPO]
[--huggingface_token HUGGINGFACE_TOKEN]
[--huggingface_repo_visibility HUGGINGFACE_REPO_VISIBILITY]
[--save_state_to_huggingface] [--resume_from_huggingface]
[--async_upload] [--save_precision {None,float,fp16,bf16}]
[--save_every_n_epochs SAVE_EVERY_N_EPOCHS]
[--save_every_n_steps SAVE_EVERY_N_STEPS]
[--save_n_epoch_ratio SAVE_N_EPOCH_RATIO]
[--save_last_n_epochs SAVE_LAST_N_EPOCHS]
[--save_last_n_epochs_state SAVE_LAST_N_EPOCHS_STATE]
[--save_last_n_steps SAVE_LAST_N_STEPS]
[--save_last_n_steps_state SAVE_LAST_N_STEPS_STATE] [--save_state]
[--save_state_on_train_end] [--resume RESUME]
[--train_batch_size TRAIN_BATCH_SIZE]
[--max_token_length {None,150,225}] [--mem_eff_attn]
[--torch_compile]
[--dynamo_backend {eager,aot_eager,inductor,aot_ts_nvfuser,nvprims_nvfuser,cudagraphs,ofi,fx2trt,onnxrt}]
[--xformers] [--sdpa] [--vae VAE]
[--max_train_steps MAX_TRAIN_STEPS]
[--max_train_epochs MAX_TRAIN_EPOCHS]
[--max_data_loader_n_workers MAX_DATA_LOADER_N_WORKERS]
[--persistent_data_loader_workers] [--seed SEED]
[--gradient_checkpointing]
[--gradient_accumulation_steps GRADIENT_ACCUMULATION_STEPS]
[--mixed_precision {no,fp16,bf16}] [--full_fp16] [--full_bf16]
[--fp8_base] [--ddp_timeout DDP_TIMEOUT]
[--ddp_gradient_as_bucket_view] [--ddp_static_graph]
[--clip_skip CLIP_SKIP] [--logging_dir LOGGING_DIR]
[--log_with {tensorboard,wandb,all}] [--log_prefix LOG_PREFIX]
[--log_tracker_name LOG_TRACKER_NAME]
[--wandb_run_name WANDB_RUN_NAME]
[--log_tracker_config LOG_TRACKER_CONFIG]
[--wandb_api_key WANDB_API_KEY] [--log_config]
[--noise_offset NOISE_OFFSET] [--noise_offset_random_strength]
[--multires_noise_iterations MULTIRES_NOISE_ITERATIONS]
[--ip_noise_gamma IP_NOISE_GAMMA]
[--ip_noise_gamma_random_strength]
[--multires_noise_discount MULTIRES_NOISE_DISCOUNT]
[--adaptive_noise_scale ADAPTIVE_NOISE_SCALE] [--zero_terminal_snr]
[--min_timestep MIN_TIMESTEP] [--max_timestep MAX_TIMESTEP]
[--loss_type {l1,l2,huber,smooth_l1}]
[--huber_schedule {constant,exponential,snr}] [--huber_c HUBER_C]
[--lowram] [--highvram]
[--sample_every_n_steps SAMPLE_EVERY_N_STEPS] [--sample_at_first]
[--sample_every_n_epochs SAMPLE_EVERY_N_EPOCHS]
[--sample_prompts SAMPLE_PROMPTS]
[--sample_sampler {ddim,pndm,lms,euler,euler_a,heun,dpm_2,dpm_2_a,dpmsolver,dpmsolver++,dpmsingle,k_lms,k_euler,k_euler_a,k_dpm_2,k_dpm_2_a}]
[--config_file CONFIG_FILE] [--output_config]
[--metadata_title METADATA_TITLE] [--metadata_author METADATA_AUTHOR]
[--metadata_description METADATA_DESCRIPTION]
[--metadata_license METADATA_LICENSE]
[--metadata_tags METADATA_TAGS]
[--prior_loss_weight PRIOR_LOSS_WEIGHT]
[--conditioning_data_dir CONDITIONING_DATA_DIR] [--masked_loss]
[--deepspeed] [--zero_stage {0,1,2,3}]
[--offload_optimizer_device {None,cpu,nvme}]
[--offload_optimizer_nvme_path OFFLOAD_OPTIMIZER_NVME_PATH]
[--offload_param_device {None,cpu,nvme}]
[--offload_param_nvme_path OFFLOAD_PARAM_NVME_PATH]
[--zero3_init_flag] [--zero3_save_16bit_model]
[--fp16_master_weights_and_gradients]
[--optimizer_type OPTIMIZER_TYPE] [--use_8bit_adam]
[--use_lion_optimizer] [--learning_rate LEARNING_RATE]
[--max_grad_norm MAX_GRAD_NORM]
[--optimizer_args [OPTIMIZER_ARGS ...]]
[--lr_scheduler_type LR_SCHEDULER_TYPE]
[--lr_scheduler_args [LR_SCHEDULER_ARGS ...]]
[--lr_scheduler LR_SCHEDULER] [--lr_warmup_steps LR_WARMUP_STEPS]
[--lr_scheduler_num_cycles LR_SCHEDULER_NUM_CYCLES]
[--lr_scheduler_power LR_SCHEDULER_POWER] [--fused_backward_pass]
[--dataset_config DATASET_CONFIG] [--min_snr_gamma MIN_SNR_GAMMA]
[--scale_v_pred_loss_like_noise_pred]
[--v_pred_like_loss V_PRED_LIKE_LOSS] [--debiased_estimation_loss]
[--weighted_captions] [--no_metadata]
[--save_model_as {None,ckpt,pt,safetensors}] [--unet_lr UNET_LR]
[--text_encoder_lr TEXT_ENCODER_LR]
[--network_weights NETWORK_WEIGHTS]
[--network_module NETWORK_MODULE] [--network_dim NETWORK_DIM]
[--network_alpha NETWORK_ALPHA] [--network_dropout NETWORK_DROPOUT]
[--network_args [NETWORK_ARGS ...]] [--network_train_unet_only]
[--network_train_text_encoder_only]
[--training_comment TRAINING_COMMENT] [--dim_from_weights]
[--scale_weight_norms SCALE_WEIGHT_NORMS]
[--base_weights [BASE_WEIGHTS ...]]
[--base_weights_multiplier [BASE_WEIGHTS_MULTIPLIER ...]]
[--no_half_vae] [--skip_until_initial_step]
[--initial_epoch INITIAL_EPOCH] [--initial_step INITIAL_STEP]
[--cache_text_encoder_outputs]
[--cache_text_encoder_outputs_to_disk]
[--disable_mmap_load_safetensors]
options:
-h, --help show this help message and exit
--console_log_level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
Set the logging level
--console_log_file CONSOLE_LOG_FILE
Log to a file instead of stderr
--console_log_simple Simple log output
--v2 load Stable Diffusion v2.0 model
--v_parameterization enable v-parameterization training
--pretrained_model_name_or_path PRETRAINED_MODEL_NAME_OR_PATH
pretrained model to train, directory to Diffusers model or StableDiffusion
checkpoint
--tokenizer_cache_dir TOKENIZER_CACHE_DIR
directory for caching Tokenizer (for offline training)
--train_data_dir TRAIN_DATA_DIR
directory for train images
--cache_info cache meta information (caption and image size) for faster dataset
loading. only available for DreamBooth
--shuffle_caption shuffle separated caption
--caption_separator CAPTION_SEPARATOR
separator for caption
--caption_extension CAPTION_EXTENSION
extension of caption files
--caption_extention CAPTION_EXTENTION
extension of caption files (backward compatibility)
--keep_tokens KEEP_TOKENS
keep heading N tokens when shuffling caption tokens (token means comma
separated strings)
--keep_tokens_separator KEEP_TOKENS_SEPARATOR
A custom separator to divide the caption into fixed and flexible parts.
Tokens before this separator will not be shuffled. If not specified, '--
keep_tokens' will be used to determine the fixed number of tokens.
--secondary_separator SECONDARY_SEPARATOR
a secondary separator for caption. This separator is replaced to
caption_separator after dropping/shuffling caption
--enable_wildcard enable wildcard for caption (e.g. '{image|picture|rendition}')
--caption_prefix CAPTION_PREFIX
prefix for caption text
--caption_suffix CAPTION_SUFFIX
suffix for caption text
--color_aug enable weak color augmentation
--flip_aug enable horizontal flip augmentation
--face_crop_aug_range FACE_CROP_AUG_RANGE
enable face-centered crop augmentation and its range (e.g. 2.0,4.0)
--random_crop enable random crop (for style training in face-centered crop augmentation)
--debug_dataset show images for debugging (do not train)
--resolution RESOLUTION
resolution in training ('size' or 'width,height')
--cache_latents cache latents to main memory to reduce VRAM usage (augmentations must be
disabled)
--vae_batch_size VAE_BATCH_SIZE
batch size for caching latents
--cache_latents_to_disk
cache latents to disk to reduce VRAM usage (augmentations must be
disabled)
--enable_bucket enable buckets for multi aspect ratio training
--min_bucket_reso MIN_BUCKET_RESO
minimum resolution for buckets
--max_bucket_reso MAX_BUCKET_RESO
maximum resolution for buckets
--bucket_reso_steps BUCKET_RESO_STEPS
steps of resolution for buckets, divisible by 8 is recommended
--bucket_no_upscale make bucket for each image without upscaling
--token_warmup_min TOKEN_WARMUP_MIN
start learning at N tags (token means comma separated strings)
--token_warmup_step TOKEN_WARMUP_STEP
tag length reaches maximum on N steps (or N*max_train_steps if N<1)
--alpha_mask use alpha channel as mask for training
--dataset_class DATASET_CLASS
dataset class for arbitrary dataset (package.module.Class)
--caption_dropout_rate CAPTION_DROPOUT_RATE
Rate out dropout caption(0.0~1.0)
--caption_dropout_every_n_epochs CAPTION_DROPOUT_EVERY_N_EPOCHS
Dropout all captions every N epochs
--caption_tag_dropout_rate CAPTION_TAG_DROPOUT_RATE
Rate out dropout comma separated tokens(0.0~1.0)
--reg_data_dir REG_DATA_DIR
directory for regularization images
--in_json IN_JSON json metadata for dataset
--dataset_repeats DATASET_REPEATS
repeat dataset when training with captions
--output_dir OUTPUT_DIR
directory to output trained model
--output_name OUTPUT_NAME
base name of trained model file
--huggingface_repo_id HUGGINGFACE_REPO_ID
huggingface repo name to upload
--huggingface_repo_type HUGGINGFACE_REPO_TYPE
huggingface repo type to upload
--huggingface_path_in_repo HUGGINGFACE_PATH_IN_REPO
huggingface model path to upload files
--huggingface_token HUGGINGFACE_TOKEN
huggingface token
--huggingface_repo_visibility HUGGINGFACE_REPO_VISIBILITY
huggingface repository visibility ('public' for public, 'private' or None
for private)
--save_state_to_huggingface
save state to huggingface
--resume_from_huggingface
resume from huggingface (ex: --resume
{repo_id}/{path_in_repo}:{revision}:{repo_type})
--async_upload upload to huggingface asynchronously
--save_precision {None,float,fp16,bf16}
precision in saving
--save_every_n_epochs SAVE_EVERY_N_EPOCHS
save checkpoint every N epochs
--save_every_n_steps SAVE_EVERY_N_STEPS
save checkpoint every N steps
--save_n_epoch_ratio SAVE_N_EPOCH_RATIO
save checkpoint N epoch ratio (for example 5 means save at least 5 files
total)
--save_last_n_epochs SAVE_LAST_N_EPOCHS
save last N checkpoints when saving every N epochs (remove older
checkpoints)
--save_last_n_epochs_state SAVE_LAST_N_EPOCHS_STATE
save last N checkpoints of state (overrides the value of
--save_last_n_epochs)
--save_last_n_steps SAVE_LAST_N_STEPS
save checkpoints until N steps elapsed (remove older checkpoints if N
steps elapsed)
--save_last_n_steps_state SAVE_LAST_N_STEPS_STATE
save states until N steps elapsed (remove older states if N steps elapsed,
overrides --save_last_n_steps)
--save_state save training state additionally (including optimizer states etc.) when
saving model
--save_state_on_train_end
save training state (including optimizer states etc.) on train end
--resume RESUME saved state to resume training
--train_batch_size TRAIN_BATCH_SIZE
batch size for training
--max_token_length {None,150,225}
max token length of text encoder (default for 75, 150 or 225)
--mem_eff_attn use memory efficient attention for CrossAttention
--torch_compile use torch.compile (requires PyTorch 2.0)
--dynamo_backend {eager,aot_eager,inductor,aot_ts_nvfuser,nvprims_nvfuser,cudagraphs,ofi,fx2trt,onnxrt}
dynamo backend type (default is inductor)
--xformers use xformers for CrossAttention
--sdpa use sdpa for CrossAttention (requires PyTorch 2.0)
--vae VAE path to checkpoint of vae to replace
--max_train_steps MAX_TRAIN_STEPS
training steps
--max_train_epochs MAX_TRAIN_EPOCHS
training epochs (overrides max_train_steps)
--max_data_loader_n_workers MAX_DATA_LOADER_N_WORKERS
max num workers for DataLoader (lower is less main RAM usage, faster epoch
start and slower data loading)
--persistent_data_loader_workers
persistent DataLoader workers (useful for reduce time gap between epoch,
but may use more memory)
--seed SEED random seed for training
--gradient_checkpointing
enable gradient checkpointing
--gradient_accumulation_steps GRADIENT_ACCUMULATION_STEPS
Number of updates steps to accumulate before performing a backward/update
pass
--mixed_precision {no,fp16,bf16}
use mixed precision
--full_fp16 fp16 training including gradients
--full_bf16 bf16 training including gradients
--fp8_base use fp8 for base model
--ddp_timeout DDP_TIMEOUT
DDP timeout (min, None for default of accelerate)
--ddp_gradient_as_bucket_view
enable gradient_as_bucket_view for DDP
--ddp_static_graph enable static_graph for DDP
--clip_skip CLIP_SKIP
use output of nth layer from back of text encoder (n>=1)
--logging_dir LOGGING_DIR
enable logging and output TensorBoard log to this directory
--log_with {tensorboard,wandb,all}
what logging tool(s) to use (if 'all', TensorBoard and WandB are both
used)
--log_prefix LOG_PREFIX
add prefix for each log directory
--log_tracker_name LOG_TRACKER_NAME
name of tracker to use for logging, default is script-specific default
name
--wandb_run_name WANDB_RUN_NAME
The name of the specific wandb session
--log_tracker_config LOG_TRACKER_CONFIG
path to tracker config file to use for logging
--wandb_api_key WANDB_API_KEY
specify WandB API key to log in before starting training (optional).
--log_config log training configuration
--noise_offset NOISE_OFFSET
enable noise offset with this value (if enabled, around 0.1 is
recommended)
--noise_offset_random_strength
use random strength between 0~noise_offset for noise offset.
--multires_noise_iterations MULTIRES_NOISE_ITERATIONS
enable multires noise with this number of iterations (if enabled, around
6-10 is recommended)
--ip_noise_gamma IP_NOISE_GAMMA
enable input perturbation noise. used for regularization. recommended
value: around 0.1 (from arxiv.org/abs/2301.11706)
--ip_noise_gamma_random_strength
Use random strength between 0~ip_noise_gamma for input perturbation noise.
--multires_noise_discount MULTIRES_NOISE_DISCOUNT
set discount value for multires noise (has no effect without
--multires_noise_iterations)
--adaptive_noise_scale ADAPTIVE_NOISE_SCALE
add `latent mean absolute value * this value` to noise_offset (disabled if
None, default)
--zero_terminal_snr fix noise scheduler betas to enforce zero terminal SNR
--min_timestep MIN_TIMESTEP
set minimum time step for U-Net training (0~999, default is 0)
--max_timestep MAX_TIMESTEP
set maximum time step for U-Net training (1~1000, default is 1000)
--loss_type {l1,l2,huber,smooth_l1}
The type of loss function to use (L1, L2, Huber, or smooth L1), default is
L2
--huber_schedule {constant,exponential,snr}
The scheduling method for Huber loss (constant, exponential, or SNR-
based). Only used when loss_type is 'huber' or 'smooth_l1'. default is snr
--huber_c HUBER_C The huber loss parameter. Only used if one of the huber loss modes (huber
or smooth l1) is selected with loss_type. default is 0.1
--lowram enable low RAM optimization. e.g. load models to VRAM instead of RAM (for
machines which have bigger VRAM than RAM such as Colab and Kaggle)
--highvram disable low VRAM optimization. e.g. do not clear CUDA cache after each
latent caching (for machines which have bigger VRAM)
--sample_every_n_steps SAMPLE_EVERY_N_STEPS
generate sample images every N steps
--sample_at_first generate sample images before training
--sample_every_n_epochs SAMPLE_EVERY_N_EPOCHS
generate sample images every N epochs (overwrites n_steps)
--sample_prompts SAMPLE_PROMPTS
file for prompts to generate sample images
--sample_sampler {ddim,pndm,lms,euler,euler_a,heun,dpm_2,dpm_2_a,dpmsolver,dpmsolver++,dpmsingle,k_lms,k_euler,k_euler_a,k_dpm_2,k_dpm_2_a}
sampler (scheduler) type for sample images
--config_file CONFIG_FILE
using .toml instead of args to pass hyperparameter
--output_config output command line args to given .toml file
--metadata_title METADATA_TITLE
title for model metadata (default is output_name)
--metadata_author METADATA_AUTHOR
author name for model metadata
--metadata_description METADATA_DESCRIPTION
description for model metadata
--metadata_license METADATA_LICENSE
license for model metadata
--metadata_tags METADATA_TAGS
tags for model metadata, separated by comma
--prior_loss_weight PRIOR_LOSS_WEIGHT
loss weight for regularization images
--conditioning_data_dir CONDITIONING_DATA_DIR
conditioning data directory
--masked_loss apply mask for calculating loss. conditioning_data_dir is required for
dataset.
--deepspeed enable deepspeed training
--zero_stage {0,1,2,3}
Possible options are 0,1,2,3.
--offload_optimizer_device {None,cpu,nvme}
Possible options are none|cpu|nvme. Only applicable with ZeRO Stages 2 and
3.
--offload_optimizer_nvme_path OFFLOAD_OPTIMIZER_NVME_PATH
Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3.
--offload_param_device {None,cpu,nvme}
Possible options are none|cpu|nvme. Only applicable with ZeRO Stage 3.
--offload_param_nvme_path OFFLOAD_PARAM_NVME_PATH
Possible options are /nvme|/local_nvme. Only applicable with ZeRO Stage 3.
--zero3_init_flag Flag to indicate whether to enable `deepspeed.zero.Init` for constructing
massive models.Only applicable with ZeRO Stage-3.
--zero3_save_16bit_model
Flag to indicate whether to save 16-bit model. Only applicable with ZeRO
Stage-3.
--fp16_master_weights_and_gradients
fp16_master_and_gradients requires optimizer to support keeping fp16
master and gradients while keeping the optimizer states in fp32.
--optimizer_type OPTIMIZER_TYPE
Optimizer to use: AdamW (default), AdamW8bit, PagedAdamW, PagedAdamW8bit,
PagedAdamW32bit, Lion8bit, PagedLion8bit, Lion, SGDNesterov,
SGDNesterov8bit, DAdaptation(DAdaptAdamPreprint), DAdaptAdaGrad,
DAdaptAdam, DAdaptAdan, DAdaptAdanIP, DAdaptLion, DAdaptSGD, AdaFactor
--use_8bit_adam use 8bit AdamW optimizer (requires bitsandbytes)
--use_lion_optimizer use Lion optimizer (requires lion-pytorch)
--learning_rate LEARNING_RATE
learning rate
--max_grad_norm MAX_GRAD_NORM
Max gradient norm, 0 for no clipping
--optimizer_args [OPTIMIZER_ARGS ...]
additional arguments for optimizer (like "weight_decay=0.01
betas=0.9,0.999 ...")
--lr_scheduler_type LR_SCHEDULER_TYPE
custom scheduler module
--lr_scheduler_args [LR_SCHEDULER_ARGS ...]
additional arguments for scheduler (like "T_max=100")
--lr_scheduler LR_SCHEDULER
scheduler to use for learning rate: linear, cosine,
cosine_with_restarts, polynomial, constant (default),
constant_with_warmup, adafactor
--lr_warmup_steps LR_WARMUP_STEPS
Number of steps for the warmup in the lr scheduler (default is 0)
--lr_scheduler_num_cycles LR_SCHEDULER_NUM_CYCLES
Number of restarts for cosine scheduler with restarts
--lr_scheduler_power LR_SCHEDULER_POWER
Polynomial power for polynomial scheduler
--fused_backward_pass
Combines backward pass and optimizer step to reduce VRAM usage. Only
available in SDXL
--dataset_config DATASET_CONFIG
config file for detail settings
--min_snr_gamma MIN_SNR_GAMMA
gamma for reducing the weight of high loss timesteps. Lower numbers have
stronger effect. 5 is recommended by paper.
--scale_v_pred_loss_like_noise_pred
scale v-prediction loss like noise prediction loss
--v_pred_like_loss V_PRED_LIKE_LOSS
add v-prediction like loss multiplied by this value
--debiased_estimation_loss
debiased estimation loss
--weighted_captions Enable weighted captions in the standard style (token:1.3). No commas
inside parens, or shuffle/dropout may break the decoder.
--no_metadata do not save metadata in output model
--save_model_as {None,ckpt,pt,safetensors}
format to save the model (default is .safetensors)
--unet_lr UNET_LR learning rate for U-Net
--text_encoder_lr TEXT_ENCODER_LR
learning rate for Text Encoder
--network_weights NETWORK_WEIGHTS
pretrained weights for network
--network_module NETWORK_MODULE
network module to train
--network_dim NETWORK_DIM
network dimensions (depends on each network)
--network_alpha NETWORK_ALPHA
alpha for LoRA weight scaling, default 1 (same as network_dim for same
behavior as old version)
--network_dropout NETWORK_DROPOUT
Drops neurons out of training every step (0 or None is default behavior
(no dropout), 1 would drop all neurons)
--network_args [NETWORK_ARGS ...]
additional arguments for network (key=value)
--network_train_unet_only
only training U-Net part
--network_train_text_encoder_only
only training Text Encoder part
--training_comment TRAINING_COMMENT
arbitrary comment string stored in metadata
--dim_from_weights automatically determine dim (rank) from network_weights
--scale_weight_norms SCALE_WEIGHT_NORMS
Scale the weight of each key pair to help prevent overtraing via exploding
gradients. (1 is a good starting point)
--base_weights [BASE_WEIGHTS ...]
network weights to merge into the model before training
--base_weights_multiplier [BASE_WEIGHTS_MULTIPLIER ...]
multiplier for network weights to merge into the model before training
--no_half_vae do not use fp16/bf16 VAE in mixed precision (use float VAE)
--skip_until_initial_step
skip training until initial_step is reached
--initial_epoch INITIAL_EPOCH
initial epoch number, 1 means first epoch (same as not specifying). NOTE:
initial_epoch/step doesn't affect to lr scheduler. Which means lr
scheduler will start from 0 without `--resume`.
--initial_step INITIAL_STEP
initial step number including all epochs, 0 means first step (same as not
specifying). overwrites initial_epoch.
--cache_text_encoder_outputs
cache text encoder outputs
--cache_text_encoder_outputs_to_disk
cache text encoder outputs
@jacsonalber2-sketch
Copy link

The Benazir Income Support Program (BISP) plays a crucial role in providing financial assistance to vulnerable families across Pakistan. It’s a vital social safety net that helps improve the lives of low-income households, especially women and children.

@mudasirkhanjoiya1122-cmd

This file contains notes, tips, and troubleshooting help for using kohya-sd-scripts.

It is intended as a quick reference for common commands, settings, and issues.

Update as needed when scripts or workflows change.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment