Merge branch 'AUTOMATIC1111:master' into img2img-api-scripts (50e25362) · Commits · github_fork / Stable Diffusion Webui

README.md

+4 −6

Original line number	Diff line number	Diff line
		# Stable Diffusion web UI
		A browser interface based on Gradio library for Stable Diffusion.

		![](txt2img_Screenshot.png)

		Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Custom-Scripts) wiki page for extra scripts developed by users.
		![](screenshot.png)

		## Features
		[Detailed feature showcase with images](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features):
		@@ -97,9 +95,8 @@ Alternatively, use online services (like Google Colab):
		1. Install [Python 3.10.6](https://www.python.org/downloads/windows/), checking "Add Python to PATH"
		2. Install [git](https://git-scm.com/download/win).
		3. Download the stable-diffusion-webui repository, for example by running `git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git`.
		4. Place `model.ckpt` in the `models` directory (see [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) for where to get it).
		5. _(Optional)_ Place `GFPGANv1.4.pth` in the base directory, alongside `webui.py` (see [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) for where to get it).
		6. Run `webui-user.bat` from Windows Explorer as normal, non-administrator, user.
		4. Place stable diffusion checkpoint (`model.ckpt`) in the `models/Stable-diffusion` directory (see [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) for where to get it).
		5. Run `webui-user.bat` from Windows Explorer as normal, non-administrator, user.

		### Automatic Installation on Linux
		1. Install the dependencies:
		@@ -141,6 +138,7 @@ Licenses for borrowed code can be found in `Settings -> Licenses` screen, and al
		- Ideas for optimizations - https://github.com/basujindal/stable-diffusion
		- Cross Attention layer optimization - Doggettx - https://github.com/Doggettx/stable-diffusion, original idea for prompt editing.
		- Cross Attention layer optimization - InvokeAI, lstein - https://github.com/invoke-ai/InvokeAI (originally http://github.com/lstein/stable-diffusion)
		- Sub-quadratic Cross Attention layer optimization - Alex Birch (https://github.com/Birch-san/diffusers/pull/1), Amin Rezaei (https://github.com/AminRezaei0x443/memory-efficient-attention)
		- Textual Inversion - Rinon Gal - https://github.com/rinongal/textual_inversion (we're not using his code, but we are using his ideas).
		- Idea for SD upscale - https://github.com/jquesnelle/txt2imghd
		- Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot

html/licenses.html

+28 −1

Original line number	Diff line number	Diff line
		@@ -184,7 +184,7 @@ SOFTWARE.
		</pre>

		<h2><a href="https://github.com/JingyunLiang/SwinIR/blob/main/LICENSE">SwinIR</a></h2>
		<small>Code added by contirubtors, most likely copied from this repository.</small>
		<small>Code added by contributors, most likely copied from this repository.</small>

		<pre>
		Apache License
		@@ -390,3 +390,30 @@ SOFTWARE.
		limitations under the License.
		</pre>

		<h2><a href="https://github.com/AminRezaei0x443/memory-efficient-attention/blob/main/LICENSE">Memory Efficient Attention</a></h2>
		<small>The sub-quadratic cross attention optimization uses modified code from the Memory Efficient Attention package that Alex Birch optimized for 3D tensors. This license is updated to reflect that.</small>
		<pre>
		MIT License

		Copyright (c) 2023 Alex Birch
		Copyright (c) 2023 Amin Rezaei

		Permission is hereby granted, free of charge, to any person obtaining a copy
		of this software and associated documentation files (the "Software"), to deal
		in the Software without restriction, including without limitation the rights
		to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
		copies of the Software, and to permit persons to whom the Software is
		furnished to do so, subject to the following conditions:

		The above copyright notice and this permission notice shall be included in all
		copies or substantial portions of the Software.

		THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
		AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
		OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
		SOFTWARE.
		</pre>

modules/api/models.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -125,7 +125,7 @@ class ExtrasBaseRequest(BaseModel):
		gfpgan_visibility: float = Field(default=0, title="GFPGAN Visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of GFPGAN, values should be between 0 and 1.")
		codeformer_visibility: float = Field(default=0, title="CodeFormer Visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of CodeFormer, values should be between 0 and 1.")
		codeformer_weight: float = Field(default=0, title="CodeFormer Weight", ge=0, le=1, allow_inf_nan=False, description="Sets the weight of CodeFormer, values should be between 0 and 1.")
		upscaling_resize: float = Field(default=2, title="Upscaling Factor", ge=1, le=4, description="By how much to upscale the image, only used when resize_mode=0.")
		upscaling_resize: float = Field(default=2, title="Upscaling Factor", ge=1, le=8, description="By how much to upscale the image, only used when resize_mode=0.")
		upscaling_resize_w: int = Field(default=512, title="Target Width", ge=1, description="Target width for the upscaler to hit. Only used when resize_mode=1.")
		upscaling_resize_h: int = Field(default=512, title="Target Height", ge=1, description="Target height for the upscaler to hit. Only used when resize_mode=1.")
		upscaling_crop: bool = Field(default=True, title="Crop to fit", description="Should the upscaler crop the image to fit in the chosen size?")

modules/devices.py

+23 −5

Original line number	Diff line number	Diff line
		@@ -133,8 +133,26 @@ def numpy_fix(self, args, *kwargs):
		return orig_tensor_numpy(self, args, *kwargs)


		# MPS workaround for https://github.com/pytorch/pytorch/issues/89784
		orig_cumsum = torch.cumsum
		orig_Tensor_cumsum = torch.Tensor.cumsum
		def cumsum_fix(input, cumsum_func, args, *kwargs):
		if input.device.type == 'mps':
		output_dtype = kwargs.get('dtype', input.dtype)
		if any(output_dtype == broken_dtype for broken_dtype in [torch.bool, torch.int8, torch.int16, torch.int64]):
		return cumsum_func(input.cpu(), args, *kwargs).to(input.device)
		return cumsum_func(input, args, *kwargs)


		if has_mps():
		if version.parse(torch.__version__) < version.parse("1.13"):
		# PyTorch 1.13 doesn't need these fixes but unfortunately is slower and has regressions that prevent training from working
		if has_mps() and version.parse(torch.__version__) < version.parse("1.13"):
		torch.Tensor.to = tensor_to_fix
		torch.nn.functional.layer_norm = layer_norm_fix
		torch.Tensor.numpy = numpy_fix
		elif version.parse(torch.__version__) > version.parse("1.13.1"):
		if not torch.Tensor([1,2]).to(torch.device("mps")).equal(torch.Tensor([1,1]).to(torch.device("mps")).cumsum(0, dtype=torch.int16)):
		torch.cumsum = lambda input, args, kwargs: ( cumsum_fix(input, orig_cumsum, args, **kwargs) )
		torch.Tensor.cumsum = lambda self, args, kwargs: ( cumsum_fix(self, orig_Tensor_cumsum, args, **kwargs) )
		orig_narrow = torch.narrow
		torch.narrow = lambda args, kwargs: ( orig_narrow(args, **kwargs).clone() )

modules/hypernetworks/hypernetwork.py

+9 −2

Original line number	Diff line number	Diff line
		@@ -13,7 +13,7 @@ import tqdm
		from einops import rearrange, repeat
		from ldm.util import default
		from modules import devices, processing, sd_models, shared, sd_samplers
		from modules.textual_inversion import textual_inversion
		from modules.textual_inversion import textual_inversion, logging
		from modules.textual_inversion.learn_schedule import LearnRateScheduler
		from torch import einsum
		from torch.nn.init import normal_, xavier_normal_, xavier_uniform_, kaiming_normal_, kaiming_uniform_, zeros_
		@@ -458,6 +458,13 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,

		ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method)

		if shared.opts.save_training_settings_to_txt:
		saved_params = dict(
		model_name=checkpoint.model_name, model_hash=checkpoint.hash, num_of_dataset_images=len(ds),
		**{field: getattr(hypernetwork, field) for field in ['layer_structure', 'activation_func', 'weight_init', 'add_layer_norm', 'use_dropout', ]}
		)
		logging.save_settings_to_file(log_directory, {saved_params, locals()})

		latent_sampling_method = ds.latent_sampling_method

		dl = modules.textual_inversion.dataset.PersonalizedDataLoader(ds, latent_sampling_method=latent_sampling_method, batch_size=ds.batch_size, pin_memory=pin_memory)

Admin message