Merge remote-tracking branch 'origin/master' (56c83e45) · Commits · github_fork / Stable Diffusion Webui

configs/instruct-pix2pix.yaml

0 → 100644

+99 −0

Original line number	Diff line number	Diff line
		# File modified by authors of InstructPix2Pix from original (https://github.com/CompVis/stable-diffusion).
		# See more details in LICENSE.

		model:
		base_learning_rate: 1.0e-04
		target: modules.models.diffusion.ddpm_edit.LatentDiffusion
		params:
		linear_start: 0.00085
		linear_end: 0.0120
		num_timesteps_cond: 1
		log_every_t: 200
		timesteps: 1000
		first_stage_key: edited
		cond_stage_key: edit
		# image_size: 64
		# image_size: 32
		image_size: 16
		channels: 4
		cond_stage_trainable: false # Note: different from the one we trained before
		conditioning_key: hybrid
		monitor: val/loss_simple_ema
		scale_factor: 0.18215
		use_ema: true
		load_ema: true

		scheduler_config: # 10000 warmup steps
		target: ldm.lr_scheduler.LambdaLinearScheduler
		params:
		warm_up_steps: [ 0 ]
		cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
		f_start: [ 1.e-6 ]
		f_max: [ 1. ]
		f_min: [ 1. ]

		unet_config:
		target: ldm.modules.diffusionmodules.openaimodel.UNetModel
		params:
		image_size: 32 # unused
		in_channels: 8
		out_channels: 4
		model_channels: 320
		attention_resolutions: [ 4, 2, 1 ]
		num_res_blocks: 2
		channel_mult: [ 1, 2, 4, 4 ]
		num_heads: 8
		use_spatial_transformer: True
		transformer_depth: 1
		context_dim: 768
		use_checkpoint: True
		legacy: False

		first_stage_config:
		target: ldm.models.autoencoder.AutoencoderKL
		params:
		embed_dim: 4
		monitor: val/rec_loss
		ddconfig:
		double_z: true
		z_channels: 4
		resolution: 256
		in_channels: 3
		out_ch: 3
		ch: 128
		ch_mult:
		- 1
		- 2
		- 4
		- 4
		num_res_blocks: 2
		attn_resolutions: []
		dropout: 0.0
		lossconfig:
		target: torch.nn.Identity

		cond_stage_config:
		target: ldm.modules.encoders.modules.FrozenCLIPEmbedder

		data:
		target: main.DataModuleFromConfig
		params:
		batch_size: 128
		num_workers: 1
		wrap: false
		validation:
		target: edit_dataset.EditDataset
		params:
		path: data/clip-filtered-dataset
		cache_dir: data/
		cache_name: data_10k
		split: val
		min_text_sim: 0.2
		min_image_sim: 0.75
		min_direction_sim: 0.2
		max_samples_per_prompt: 1
		min_resize_res: 512
		max_resize_res: 512
		crop_res: 512
		output_as_edit: False
		real_input: True

v2-inference-v.yaml→configs/v1-inpainting-inference.yaml

+19 −17

Original line number	Diff line number	Diff line
		model:
		base_learning_rate: 1.0e-4
		target: ldm.models.diffusion.ddpm.LatentDiffusion
		base_learning_rate: 7.5e-05
		target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
		params:
		parameterization: "v"
		linear_start: 0.00085
		linear_end: 0.0120
		num_timesteps_cond: 1
		@@ -12,29 +11,36 @@ model:
		cond_stage_key: "txt"
		image_size: 64
		channels: 4
		cond_stage_trainable: false
		conditioning_key: crossattn
		cond_stage_trainable: false # Note: different from the one we trained before
		conditioning_key: hybrid # important
		monitor: val/loss_simple_ema
		scale_factor: 0.18215
		use_ema: False # we set this to false because this is an inference only config
		finetune_keys: null

		scheduler_config: # 10000 warmup steps
		target: ldm.lr_scheduler.LambdaLinearScheduler
		params:
		warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
		cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
		f_start: [ 1.e-6 ]
		f_max: [ 1. ]
		f_min: [ 1. ]

		unet_config:
		target: ldm.modules.diffusionmodules.openaimodel.UNetModel
		params:
		use_checkpoint: True
		use_fp16: True
		image_size: 32 # unused
		in_channels: 4
		in_channels: 9 # 4 data + 4 downscaled image + 1 mask
		out_channels: 4
		model_channels: 320
		attention_resolutions: [ 4, 2, 1 ]
		num_res_blocks: 2
		channel_mult: [ 1, 2, 4, 4 ]
		num_head_channels: 64 # need to fix for flash-attn
		num_heads: 8
		use_spatial_transformer: True
		use_linear_in_transformer: True
		transformer_depth: 1
		context_dim: 1024
		context_dim: 768
		use_checkpoint: True
		legacy: False

		first_stage_config:
		@@ -43,7 +49,6 @@ model:
		embed_dim: 4
		monitor: val/rec_loss
		ddconfig:
		#attn_type: "vanilla-xformers"
		double_z: true
		z_channels: 4
		resolution: 256
		@@ -62,7 +67,4 @@ model:
		target: torch.nn.Identity

		cond_stage_config:
		target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
		params:
		freeze: True
		layer: "penultimate"
		No newline at end of file
		target: ldm.modules.encoders.modules.FrozenCLIPEmbedder

modules/api/api.py

+3 −2

Original line number	Diff line number	Diff line
		@@ -18,7 +18,8 @@ from modules.textual_inversion.textual_inversion import create_embedding, train_
		from modules.textual_inversion.preprocess import preprocess
		from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork
		from PIL import PngImagePlugin,Image
		from modules.sd_models import checkpoints_list, find_checkpoint_config
		from modules.sd_models import checkpoints_list
		from modules.sd_models_config import find_checkpoint_config_near_filename
		from modules.realesrgan_model import get_realesrgan_models
		from modules import devices
		from typing import List
		@@ -387,7 +388,7 @@ class Api:
		]

		def get_sd_models(self):
		return [{"title": x.title, "model_name": x.model_name, "hash": x.shorthash, "sha256": x.sha256, "filename": x.filename, "config": find_checkpoint_config(x)} for x in checkpoints_list.values()]
		return [{"title": x.title, "model_name": x.model_name, "hash": x.shorthash, "sha256": x.sha256, "filename": x.filename, "config": find_checkpoint_config_near_filename(x)} for x in checkpoints_list.values()]

		def get_hypernetworks(self):
		return [{"name": name, "path": shared.hypernetworks[name]} for name in shared.hypernetworks]

modules/api/models.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -228,7 +228,7 @@ class SDModelItem(BaseModel):
		hash: Optional[str] = Field(title="Short hash")
		sha256: Optional[str] = Field(title="sha256 hash")
		filename: str = Field(title="Filename")
		config: str = Field(title="Config file")
		config: Optional[str] = Field(title="Config file")

		class HypernetworkItem(BaseModel):
		name: str = Field(title="Name")

modules/devices.py

+10 −4

Original line number	Diff line number	Diff line
		@@ -34,14 +34,18 @@ def get_cuda_device_string():
		return "cuda"


		def get_optimal_device():
		def get_optimal_device_name():
		if torch.cuda.is_available():
		return torch.device(get_cuda_device_string())
		return get_cuda_device_string()

		if has_mps():
		return torch.device("mps")
		return "mps"

		return "cpu"

		return cpu

		def get_optimal_device():
		return torch.device(get_optimal_device_name())


		def get_device_for(task):
		@@ -139,6 +143,8 @@ def test_for_nans(x, where):
		else:
		message = "A tensor with all NaNs was produced."

		message += " Use --disable-nan-check commandline argument to disable this check."

		raise NansException(message)

Admin message