Merge branch 'dev' into report-error (52b8752e) · Commits · github_fork / Stable Diffusion Webui

extensions-builtin/LDSR/sd_hijack_autoencoder.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -10,7 +10,7 @@ from contextlib import contextmanager
		from torch.optim.lr_scheduler import LambdaLR

		from ldm.modules.ema import LitEma
		from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
		from vqvae_quantize import VectorQuantizer2 as VectorQuantizer
		from ldm.modules.diffusionmodules.model import Encoder, Decoder
		from ldm.util import instantiate_from_config

extensions-builtin/LDSR/vqvae_quantize.py

0 → 100644

+147 −0

Original line number	Diff line number	Diff line
		# Vendored from https://raw.githubusercontent.com/CompVis/taming-transformers/24268930bf1dce879235a7fddd0b2355b84d7ea6/taming/modules/vqvae/quantize.py,
		# where the license is as follows:
		#
		# Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer
		#
		# Permission is hereby granted, free of charge, to any person obtaining a copy
		# of this software and associated documentation files (the "Software"), to deal
		# in the Software without restriction, including without limitation the rights
		# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
		# copies of the Software, and to permit persons to whom the Software is
		# furnished to do so, subject to the following conditions:
		#
		# The above copyright notice and this permission notice shall be included in all
		# copies or substantial portions of the Software.
		#
		# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
		# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
		# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
		# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
		# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
		# OR OTHER DEALINGS IN THE SOFTWARE./

		import torch
		import torch.nn as nn
		import numpy as np
		from einops import rearrange


		class VectorQuantizer2(nn.Module):
		"""
		Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly
		avoids costly matrix multiplications and allows for post-hoc remapping of indices.
		"""

		# NOTE: due to a bug the beta term was applied to the wrong term. for
		# backwards compatibility we use the buggy version by default, but you can
		# specify legacy=False to fix it.
		def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random",
		sane_index_shape=False, legacy=True):
		super().__init__()
		self.n_e = n_e
		self.e_dim = e_dim
		self.beta = beta
		self.legacy = legacy

		self.embedding = nn.Embedding(self.n_e, self.e_dim)
		self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)

		self.remap = remap
		if self.remap is not None:
		self.register_buffer("used", torch.tensor(np.load(self.remap)))
		self.re_embed = self.used.shape[0]
		self.unknown_index = unknown_index # "random" or "extra" or integer
		if self.unknown_index == "extra":
		self.unknown_index = self.re_embed
		self.re_embed = self.re_embed + 1
		print(f"Remapping {self.n_e} indices to {self.re_embed} indices. "
		f"Using {self.unknown_index} for unknown indices.")
		else:
		self.re_embed = n_e

		self.sane_index_shape = sane_index_shape

		def remap_to_used(self, inds):
		ishape = inds.shape
		assert len(ishape) > 1
		inds = inds.reshape(ishape[0], -1)
		used = self.used.to(inds)
		match = (inds[:, :, None] == used[None, None, ...]).long()
		new = match.argmax(-1)
		unknown = match.sum(2) < 1
		if self.unknown_index == "random":
		new[unknown] = torch.randint(0, self.re_embed, size=new[unknown].shape).to(device=new.device)
		else:
		new[unknown] = self.unknown_index
		return new.reshape(ishape)

		def unmap_to_all(self, inds):
		ishape = inds.shape
		assert len(ishape) > 1
		inds = inds.reshape(ishape[0], -1)
		used = self.used.to(inds)
		if self.re_embed > self.used.shape[0]: # extra token
		inds[inds >= self.used.shape[0]] = 0 # simply set to zero
		back = torch.gather(used[None, :][inds.shape[0] * [0], :], 1, inds)
		return back.reshape(ishape)

		def forward(self, z, temp=None, rescale_logits=False, return_logits=False):
		assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel"
		assert rescale_logits is False, "Only for interface compatible with Gumbel"
		assert return_logits is False, "Only for interface compatible with Gumbel"
		# reshape z -> (batch, height, width, channel) and flatten
		z = rearrange(z, 'b c h w -> b h w c').contiguous()
		z_flattened = z.view(-1, self.e_dim)
		# distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z

		d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
		torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
		torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))

		min_encoding_indices = torch.argmin(d, dim=1)
		z_q = self.embedding(min_encoding_indices).view(z.shape)
		perplexity = None
		min_encodings = None

		# compute loss for embedding
		if not self.legacy:
		loss = self.beta * torch.mean((z_q.detach() - z) ** 2) + \
		torch.mean((z_q - z.detach()) ** 2)
		else:
		loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * \
		torch.mean((z_q - z.detach()) ** 2)

		# preserve gradients
		z_q = z + (z_q - z).detach()

		# reshape back to match original input shape
		z_q = rearrange(z_q, 'b h w c -> b c h w').contiguous()

		if self.remap is not None:
		min_encoding_indices = min_encoding_indices.reshape(z.shape[0], -1) # add batch axis
		min_encoding_indices = self.remap_to_used(min_encoding_indices)
		min_encoding_indices = min_encoding_indices.reshape(-1, 1) # flatten

		if self.sane_index_shape:
		min_encoding_indices = min_encoding_indices.reshape(
		z_q.shape[0], z_q.shape[2], z_q.shape[3])

		return z_q, loss, (perplexity, min_encodings, min_encoding_indices)

		def get_codebook_entry(self, indices, shape):
		# shape specifying (batch, height, width, channel)
		if self.remap is not None:
		indices = indices.reshape(shape[0], -1) # add batch axis
		indices = self.unmap_to_all(indices)
		indices = indices.reshape(-1) # flatten again

		# get quantized latent vectors
		z_q = self.embedding(indices)

		if shape is not None:
		z_q = z_q.view(shape)
		# reshape back to match original input shape
		z_q = z_q.permute(0, 3, 1, 2).contiguous()

		return z_q

extensions-builtin/canvas-zoom-and-pan/javascript/zoom.js

0 → 100644

+431 −0

Original line number	Diff line number	Diff line
		// Main

		// Helper functions
		// Get active tab
		function getActiveTab(elements, all = false) {
		const tabs = elements.img2imgTabs.querySelectorAll("button");

		if (all) return tabs;

		for (let tab of tabs) {
		if (tab.classList.contains("selected")) {
		return tab;
		}
		}
		}

		onUiLoaded(async() => {
		const hotkeysConfig = {
		resetZoom: "KeyR",
		fitToScreen: "KeyS",
		moveKey: "KeyF",
		overlap: "KeyO"
		};

		let isMoving = false;
		let mouseX, mouseY;

		const elementIDs = {
		sketch: "#img2img_sketch",
		inpaint: "#img2maskimg",
		inpaintSketch: "#inpaint_sketch",
		img2imgTabs: "#mode_img2img .tab-nav"
		};

		async function getElements() {
		const elements = await Promise.all(
		Object.values(elementIDs).map(id => document.querySelector(id))
		);
		return Object.fromEntries(
		Object.keys(elementIDs).map((key, index) => [key, elements[index]])
		);
		}

		const elements = await getElements();

		function applyZoomAndPan(targetElement, elemId) {
		targetElement.style.transformOrigin = "0 0";
		let [zoomLevel, panX, panY] = [1, 0, 0];
		let fullScreenMode = false;

		// In the course of research, it was found that the tag img is very harmful when zooming and creates white canvases. This hack allows you to almost never think about this problem, it has no effect on webui.
		function fixCanvas() {
		const activeTab = getActiveTab(elements).textContent.trim();

		if (activeTab !== "img2img") {
		const img = targetElement.querySelector(`${elemId} img`);

		if (img && img.style.display !== "none") {
		img.style.display = "none";
		img.style.visibility = "hidden";
		}
		}
		}

		// Reset the zoom level and pan position of the target element to their initial values
		function resetZoom() {
		zoomLevel = 1;
		panX = 0;
		panY = 0;

		fixCanvas();
		targetElement.style.transform = `scale(${zoomLevel}) translate(${panX}px, ${panY}px)`;

		const canvas = gradioApp().querySelector(
		`${elemId} canvas[key="interface"]`
		);

		toggleOverlap("off");
		fullScreenMode = false;

		if (
		canvas &&
		parseFloat(canvas.style.width) > 865 &&
		parseFloat(targetElement.style.width) > 865
		) {
		fitToElement();
		return;
		}

		targetElement.style.width = "";
		if (canvas) {
		targetElement.style.height = canvas.style.height;
		}
		}

		// Toggle the zIndex of the target element between two values, allowing it to overlap or be overlapped by other elements
		function toggleOverlap(forced = "") {
		const zIndex1 = "0";
		const zIndex2 = "998";

		targetElement.style.zIndex =
		targetElement.style.zIndex !== zIndex2 ? zIndex2 : zIndex1;

		if (forced === "off") {
		targetElement.style.zIndex = zIndex1;
		} else if (forced === "on") {
		targetElement.style.zIndex = zIndex2;
		}
		}

		// Adjust the brush size based on the deltaY value from a mouse wheel event
		function adjustBrushSize(
		elemId,
		deltaY,
		withoutValue = false,
		percentage = 5
		) {
		const input =
		gradioApp().querySelector(
		`${elemId} input[aria-label='Brush radius']`
		) \|\|
		gradioApp().querySelector(
		`${elemId} button[aria-label="Use brush"]`
		);

		if (input) {
		input.click();
		if (!withoutValue) {
		const maxValue =
		parseFloat(input.getAttribute("max")) \|\| 100;
		const changeAmount = maxValue * (percentage / 100);
		const newValue =
		parseFloat(input.value) +
		(deltaY > 0 ? -changeAmount : changeAmount);
		input.value = Math.min(Math.max(newValue, 0), maxValue);
		input.dispatchEvent(new Event("change"));
		}
		}
		}

		// Reset zoom when uploading a new image
		const fileInput = gradioApp().querySelector(
		`${elemId} input[type="file"][accept="image/*"].svelte-116rqfv`
		);
		fileInput.addEventListener("click", resetZoom);

		// Update the zoom level and pan position of the target element based on the values of the zoomLevel, panX and panY variables
		function updateZoom(newZoomLevel, mouseX, mouseY) {
		newZoomLevel = Math.max(0.5, Math.min(newZoomLevel, 15));
		panX += mouseX - (mouseX * newZoomLevel) / zoomLevel;
		panY += mouseY - (mouseY * newZoomLevel) / zoomLevel;

		targetElement.style.transformOrigin = "0 0";
		targetElement.style.transform = `translate(${panX}px, ${panY}px) scale(${newZoomLevel})`;

		toggleOverlap("on");
		return newZoomLevel;
		}

		// Change the zoom level based on user interaction
		function changeZoomLevel(operation, e) {
		if (e.shiftKey) {
		e.preventDefault();

		let zoomPosX, zoomPosY;
		let delta = 0.2;
		if (zoomLevel > 7) {
		delta = 0.9;
		} else if (zoomLevel > 2) {
		delta = 0.6;
		}

		zoomPosX = e.clientX;
		zoomPosY = e.clientY;

		fullScreenMode = false;
		zoomLevel = updateZoom(
		zoomLevel + (operation === "+" ? delta : -delta),
		zoomPosX - targetElement.getBoundingClientRect().left,
		zoomPosY - targetElement.getBoundingClientRect().top
		);
		}
		}

		/**
		* This function fits the target element to the screen by calculating
		* the required scale and offsets. It also updates the global variables
		* zoomLevel, panX, and panY to reflect the new state.
		*/

		function fitToElement() {
		//Reset Zoom
		targetElement.style.transform = `translate(${0}px, ${0}px) scale(${1})`;

		// Get element and screen dimensions
		const elementWidth = targetElement.offsetWidth;
		const elementHeight = targetElement.offsetHeight;
		const parentElement = targetElement.parentElement;
		const screenWidth = parentElement.clientWidth;
		const screenHeight = parentElement.clientHeight;

		// Get element's coordinates relative to the parent element
		const elementRect = targetElement.getBoundingClientRect();
		const parentRect = parentElement.getBoundingClientRect();
		const elementX = elementRect.x - parentRect.x;

		// Calculate scale and offsets
		const scaleX = screenWidth / elementWidth;
		const scaleY = screenHeight / elementHeight;
		const scale = Math.min(scaleX, scaleY);

		const transformOrigin =
		window.getComputedStyle(targetElement).transformOrigin;
		const [originX, originY] = transformOrigin.split(" ");
		const originXValue = parseFloat(originX);
		const originYValue = parseFloat(originY);

		const offsetX =
		(screenWidth - elementWidth * scale) / 2 -
		originXValue * (1 - scale);
		const offsetY =
		(screenHeight - elementHeight * scale) / 2.5 -
		originYValue * (1 - scale);

		// Apply scale and offsets to the element
		targetElement.style.transform = `translate(${offsetX}px, ${offsetY}px) scale(${scale})`;

		// Update global variables
		zoomLevel = scale;
		panX = offsetX;
		panY = offsetY;

		fullScreenMode = false;
		toggleOverlap("off");
		}

		/**
		* This function fits the target element to the screen by calculating
		* the required scale and offsets. It also updates the global variables
		* zoomLevel, panX, and panY to reflect the new state.
		*/

		// Fullscreen mode
		function fitToScreen() {
		const canvas = gradioApp().querySelector(
		`${elemId} canvas[key="interface"]`
		);

		if (!canvas) return;

		if (canvas.offsetWidth > 862) {
		targetElement.style.width = canvas.offsetWidth + "px";
		}

		if (fullScreenMode) {
		resetZoom();
		fullScreenMode = false;
		return;
		}

		//Reset Zoom
		targetElement.style.transform = `translate(${0}px, ${0}px) scale(${1})`;

		// Get scrollbar width to right-align the image
		const scrollbarWidth = window.innerWidth - document.documentElement.clientWidth;

		// Get element and screen dimensions
		const elementWidth = targetElement.offsetWidth;
		const elementHeight = targetElement.offsetHeight;
		const screenWidth = window.innerWidth - scrollbarWidth;
		const screenHeight = window.innerHeight;

		// Get element's coordinates relative to the page
		const elementRect = targetElement.getBoundingClientRect();
		const elementY = elementRect.y;
		const elementX = elementRect.x;

		// Calculate scale and offsets
		const scaleX = screenWidth / elementWidth;
		const scaleY = screenHeight / elementHeight;
		const scale = Math.min(scaleX, scaleY);

		// Get the current transformOrigin
		const computedStyle = window.getComputedStyle(targetElement);
		const transformOrigin = computedStyle.transformOrigin;
		const [originX, originY] = transformOrigin.split(" ");
		const originXValue = parseFloat(originX);
		const originYValue = parseFloat(originY);

		// Calculate offsets with respect to the transformOrigin
		const offsetX =
		(screenWidth - elementWidth * scale) / 2 -
		elementX -
		originXValue * (1 - scale);
		const offsetY =
		(screenHeight - elementHeight * scale) / 2 -
		elementY -
		originYValue * (1 - scale);

		// Apply scale and offsets to the element
		targetElement.style.transform = `translate(${offsetX}px, ${offsetY}px) scale(${scale})`;

		// Update global variables
		zoomLevel = scale;
		panX = offsetX;
		panY = offsetY;

		fullScreenMode = true;
		toggleOverlap("on");
		}

		// Handle keydown events
		function handleKeyDown(event) {
		const hotkeyActions = {
		[hotkeysConfig.resetZoom]: resetZoom,
		[hotkeysConfig.overlap]: toggleOverlap,
		[hotkeysConfig.fitToScreen]: fitToScreen
		// [hotkeysConfig.moveKey] : moveCanvas,
		};

		const action = hotkeyActions[event.code];
		if (action) {
		event.preventDefault();
		action(event);
		}
		}

		// Get Mouse position
		function getMousePosition(e) {
		mouseX = e.offsetX;
		mouseY = e.offsetY;
		}

		targetElement.addEventListener("mousemove", getMousePosition);

		// Handle events only inside the targetElement
		let isKeyDownHandlerAttached = false;

		function handleMouseMove() {
		if (!isKeyDownHandlerAttached) {
		document.addEventListener("keydown", handleKeyDown);
		isKeyDownHandlerAttached = true;
		}
		}

		function handleMouseLeave() {
		if (isKeyDownHandlerAttached) {
		document.removeEventListener("keydown", handleKeyDown);
		isKeyDownHandlerAttached = false;
		}
		}

		// Add mouse event handlers
		targetElement.addEventListener("mousemove", handleMouseMove);
		targetElement.addEventListener("mouseleave", handleMouseLeave);

		// Reset zoom when click on another tab
		elements.img2imgTabs.addEventListener("click", resetZoom);
		elements.img2imgTabs.addEventListener("click", () => {
		// targetElement.style.width = "";
		if (parseInt(targetElement.style.width) > 865) {
		setTimeout(fitToElement, 0);
		}
		});

		targetElement.addEventListener("wheel", e => {
		// change zoom level
		const operation = e.deltaY > 0 ? "-" : "+";
		changeZoomLevel(operation, e);

		// Handle brush size adjustment with ctrl key pressed
		if (e.ctrlKey \|\| e.metaKey) {
		e.preventDefault();

		// Increase or decrease brush size based on scroll direction
		adjustBrushSize(elemId, e.deltaY);
		}
		});

		/**
		* Handle the move event for pan functionality. Updates the panX and panY variables and applies the new transform to the target element.
		* @param {MouseEvent} e - The mouse event.
		*/
		function handleMoveKeyDown(e) {
		if (e.code === hotkeysConfig.moveKey) {
		if (!e.ctrlKey && !e.metaKey) {
		isMoving = true;
		}
		}
		}

		function handleMoveKeyUp(e) {
		if (e.code === hotkeysConfig.moveKey) {
		isMoving = false;
		}
		}

		document.addEventListener("keydown", handleMoveKeyDown);
		document.addEventListener("keyup", handleMoveKeyUp);

		// Detect zoom level and update the pan speed.
		function updatePanPosition(movementX, movementY) {
		let panSpeed = 1.5;

		if (zoomLevel > 8) {
		panSpeed = 2.5;
		}

		panX = panX + movementX * panSpeed;
		panY = panY + movementY * panSpeed;

		targetElement.style.transform = `translate(${panX}px, ${panY}px) scale(${zoomLevel})`;
		toggleOverlap("on");
		}

		function handleMoveByKey(e) {
		if (isMoving) {
		updatePanPosition(e.movementX, e.movementY);
		targetElement.style.pointerEvents = "none";
		} else {
		targetElement.style.pointerEvents = "auto";
		}
		}

		gradioApp().addEventListener("mousemove", handleMoveByKey);
		}

		applyZoomAndPan(elements.sketch, elementIDs.sketch);
		applyZoomAndPan(elements.inpaint, elementIDs.inpaint);
		applyZoomAndPan(elements.inpaintSketch, elementIDs.inpaintSketch);
		});

javascript/imageviewerGamepad.js

+7 −1

Original line number	Diff line number	Diff line
		let gamepads = [];

		window.addEventListener('gamepadconnected', (e) => {
		const index = e.gamepad.index;
		let isWaiting = false;
		setInterval(async() => {
		gamepads[index] = setInterval(async() => {
		if (!opts.js_modal_lightbox_gamepad \|\| isWaiting) return;
		const gamepad = navigator.getGamepads()[index];
		const xValue = gamepad.axes[0];
		@@ -24,6 +26,10 @@ window.addEventListener('gamepadconnected', (e) => {
		}, 10);
		});

		window.addEventListener('gamepaddisconnected', (e) => {
		clearInterval(gamepads[e.gamepad.index]);
		});

		/*
		Primarily for vr controller type pointer devices.
		I use the wheel event because there's currently no way to do it properly with web xr.

modules/api/api.py

+5 −0

Original line number	Diff line number	Diff line
		@@ -24,6 +24,7 @@ from modules.textual_inversion.preprocess import preprocess
		from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork
		from PIL import PngImagePlugin,Image
		from modules.sd_models import checkpoints_list, unload_model_weights, reload_model_weights
		from modules.sd_vae import vae_dict
		from modules.sd_models_config import find_checkpoint_config_near_filename
		from modules.realesrgan_model import get_realesrgan_models
		from modules import devices
		@@ -190,6 +191,7 @@ class Api:
		self.add_api_route("/sdapi/v1/samplers", self.get_samplers, methods=["GET"], response_model=List[models.SamplerItem])
		self.add_api_route("/sdapi/v1/upscalers", self.get_upscalers, methods=["GET"], response_model=List[models.UpscalerItem])
		self.add_api_route("/sdapi/v1/sd-models", self.get_sd_models, methods=["GET"], response_model=List[models.SDModelItem])
		self.add_api_route("/sdapi/v1/sd-vae", self.get_sd_vaes, methods=["GET"], response_model=List[models.SDVaeItem])
		self.add_api_route("/sdapi/v1/hypernetworks", self.get_hypernetworks, methods=["GET"], response_model=List[models.HypernetworkItem])
		self.add_api_route("/sdapi/v1/face-restorers", self.get_face_restorers, methods=["GET"], response_model=List[models.FaceRestorerItem])
		self.add_api_route("/sdapi/v1/realesrgan-models", self.get_realesrgan_models, methods=["GET"], response_model=List[models.RealesrganItem])
		@@ -542,6 +544,9 @@ class Api:
		def get_sd_models(self):
		return [{"title": x.title, "model_name": x.model_name, "hash": x.shorthash, "sha256": x.sha256, "filename": x.filename, "config": find_checkpoint_config_near_filename(x)} for x in checkpoints_list.values()]

		def get_sd_vaes(self):
		return [{"model_name": x, "filename": vae_dict[x]} for x in vae_dict.keys()]

		def get_hypernetworks(self):
		return [{"name": name, "path": shared.hypernetworks[name]} for name in shared.hypernetworks]

Admin message