Unverified Commit 56c83e45 authored by Spaceginner's avatar Spaceginner
Browse files

Merge remote-tracking branch 'origin/master'

parents 9ecf1e82 63391419
Loading
Loading
Loading
Loading
+99 −0
Original line number Diff line number Diff line
# File modified by authors of InstructPix2Pix from original (https://github.com/CompVis/stable-diffusion).
# See more details in LICENSE.

model:
  base_learning_rate: 1.0e-04
  target: modules.models.diffusion.ddpm_edit.LatentDiffusion
  params:
    linear_start: 0.00085
    linear_end: 0.0120
    num_timesteps_cond: 1
    log_every_t: 200
    timesteps: 1000
    first_stage_key: edited
    cond_stage_key: edit
    # image_size: 64
    # image_size: 32
    image_size: 16
    channels: 4
    cond_stage_trainable: false   # Note: different from the one we trained before
    conditioning_key: hybrid
    monitor: val/loss_simple_ema
    scale_factor: 0.18215
    use_ema: true
    load_ema: true

    scheduler_config: # 10000 warmup steps
      target: ldm.lr_scheduler.LambdaLinearScheduler
      params:
        warm_up_steps: [ 0 ]
        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
        f_start: [ 1.e-6 ]
        f_max: [ 1. ]
        f_min: [ 1. ]

    unet_config:
      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
      params:
        image_size: 32 # unused
        in_channels: 8
        out_channels: 4
        model_channels: 320
        attention_resolutions: [ 4, 2, 1 ]
        num_res_blocks: 2
        channel_mult: [ 1, 2, 4, 4 ]
        num_heads: 8
        use_spatial_transformer: True
        transformer_depth: 1
        context_dim: 768
        use_checkpoint: True
        legacy: False

    first_stage_config:
      target: ldm.models.autoencoder.AutoencoderKL
      params:
        embed_dim: 4
        monitor: val/rec_loss
        ddconfig:
          double_z: true
          z_channels: 4
          resolution: 256
          in_channels: 3
          out_ch: 3
          ch: 128
          ch_mult:
          - 1
          - 2
          - 4
          - 4
          num_res_blocks: 2
          attn_resolutions: []
          dropout: 0.0
        lossconfig:
          target: torch.nn.Identity

    cond_stage_config:
      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder

data:
  target: main.DataModuleFromConfig
  params:
    batch_size: 128
    num_workers: 1
    wrap: false
    validation:
      target: edit_dataset.EditDataset
      params:
        path: data/clip-filtered-dataset
        cache_dir:  data/
        cache_name: data_10k
        split: val
        min_text_sim: 0.2
        min_image_sim: 0.75
        min_direction_sim: 0.2
        max_samples_per_prompt: 1
        min_resize_res: 512
        max_resize_res: 512
        crop_res: 512
        output_as_edit: False
        real_input: True
+19 −17
Original line number Diff line number Diff line
model:
  base_learning_rate: 1.0e-4
  target: ldm.models.diffusion.ddpm.LatentDiffusion
  base_learning_rate: 7.5e-05
  target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
  params:
    parameterization: "v"
    linear_start: 0.00085
    linear_end: 0.0120
    num_timesteps_cond: 1
@@ -12,29 +11,36 @@ model:
    cond_stage_key: "txt"
    image_size: 64
    channels: 4
    cond_stage_trainable: false
    conditioning_key: crossattn
    cond_stage_trainable: false   # Note: different from the one we trained before
    conditioning_key: hybrid   # important
    monitor: val/loss_simple_ema
    scale_factor: 0.18215
    use_ema: False # we set this to false because this is an inference only config
    finetune_keys: null

    scheduler_config: # 10000 warmup steps
      target: ldm.lr_scheduler.LambdaLinearScheduler
      params:
        warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
        f_start: [ 1.e-6 ]
        f_max: [ 1. ]
        f_min: [ 1. ]

    unet_config:
      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
      params:
        use_checkpoint: True
        use_fp16: True
        image_size: 32 # unused
        in_channels: 4
        in_channels: 9  # 4 data + 4 downscaled image + 1 mask
        out_channels: 4
        model_channels: 320
        attention_resolutions: [ 4, 2, 1 ]
        num_res_blocks: 2
        channel_mult: [ 1, 2, 4, 4 ]
        num_head_channels: 64 # need to fix for flash-attn
        num_heads: 8
        use_spatial_transformer: True
        use_linear_in_transformer: True
        transformer_depth: 1
        context_dim: 1024
        context_dim: 768
        use_checkpoint: True
        legacy: False

    first_stage_config:
@@ -43,7 +49,6 @@ model:
        embed_dim: 4
        monitor: val/rec_loss
        ddconfig:
          #attn_type: "vanilla-xformers"
          double_z: true
          z_channels: 4
          resolution: 256
@@ -62,7 +67,4 @@ model:
          target: torch.nn.Identity

    cond_stage_config:
      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
      params:
        freeze: True
        layer: "penultimate"
 No newline at end of file
      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
+3 −2
Original line number Diff line number Diff line
@@ -18,7 +18,8 @@ from modules.textual_inversion.textual_inversion import create_embedding, train_
from modules.textual_inversion.preprocess import preprocess
from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork
from PIL import PngImagePlugin,Image
from modules.sd_models import checkpoints_list, find_checkpoint_config
from modules.sd_models import checkpoints_list
from modules.sd_models_config import find_checkpoint_config_near_filename
from modules.realesrgan_model import get_realesrgan_models
from modules import devices
from typing import List
@@ -387,7 +388,7 @@ class Api:
        ]

    def get_sd_models(self):
        return [{"title": x.title, "model_name": x.model_name, "hash": x.shorthash, "sha256": x.sha256, "filename": x.filename, "config": find_checkpoint_config(x)} for x in checkpoints_list.values()]
        return [{"title": x.title, "model_name": x.model_name, "hash": x.shorthash, "sha256": x.sha256, "filename": x.filename, "config": find_checkpoint_config_near_filename(x)} for x in checkpoints_list.values()]

    def get_hypernetworks(self):
        return [{"name": name, "path": shared.hypernetworks[name]} for name in shared.hypernetworks]
+1 −1
Original line number Diff line number Diff line
@@ -228,7 +228,7 @@ class SDModelItem(BaseModel):
    hash: Optional[str] = Field(title="Short hash")
    sha256: Optional[str] = Field(title="sha256 hash")
    filename: str = Field(title="Filename")
    config: str = Field(title="Config file")
    config: Optional[str] = Field(title="Config file")

class HypernetworkItem(BaseModel):
    name: str = Field(title="Name")
+10 −4
Original line number Diff line number Diff line
@@ -34,14 +34,18 @@ def get_cuda_device_string():
    return "cuda"


def get_optimal_device():
def get_optimal_device_name():
    if torch.cuda.is_available():
        return torch.device(get_cuda_device_string())
        return get_cuda_device_string()

    if has_mps():
        return torch.device("mps")
        return "mps"

    return "cpu"

    return cpu

def get_optimal_device():
    return torch.device(get_optimal_device_name())


def get_device_for(task):
@@ -139,6 +143,8 @@ def test_for_nans(x, where):
    else:
        message = "A tensor with all NaNs was produced."

    message += " Use --disable-nan-check commandline argument to disable this check."

    raise NansException(message)


Loading