moved deepdanbooru to pure pytorch implementation (c81d440d) · Commits · github_fork / Stable Diffusion Webui

README.md

+1 −1

Original line number	Diff line number	Diff line
		@@ -70,7 +70,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
		- separate prompts using uppercase `AND`
		- also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
		- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
		- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)
		- DeepDanbooru integration, creates danbooru style tags for anime prompts
		- [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args)
		- via extension: [History tab](https://github.com/yfszzx/stable-diffusion-webui-images-browser): view, direct and delete images conveniently within the UI
		- Generate forever option

launch.py

+0 −5

Original line number	Diff line number	Diff line
		@@ -134,7 +134,6 @@ def prepare_enviroment():

		gfpgan_package = os.environ.get('GFPGAN_PACKAGE', "git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379")
		clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1")
		deepdanbooru_package = os.environ.get('DEEPDANBOORU_PACKAGE', "git+https://github.com/KichangKim/DeepDanbooru.git@d91a2963bf87c6a770d74894667e9ffa9f6de7ff")

		xformers_windows_package = os.environ.get('XFORMERS_WINDOWS_PACKAGE', 'https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl')

		@@ -158,7 +157,6 @@ def prepare_enviroment():
		sys.argv, update_check = extract_arg(sys.argv, '--update-check')
		sys.argv, run_tests = extract_arg(sys.argv, '--tests')
		xformers = '--xformers' in sys.argv
		deepdanbooru = '--deepdanbooru' in sys.argv
		ngrok = '--ngrok' in sys.argv

		try:
		@@ -193,9 +191,6 @@ def prepare_enviroment():
		elif platform.system() == "Linux":
		run_pip("install xformers", "xformers")

		if not is_installed("deepdanbooru") and deepdanbooru:
		run_pip(f"install {deepdanbooru_package}#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru")

		if not is_installed("pyngrok") and ngrok:
		run_pip("install pyngrok", "ngrok")

modules/api/api.py

+2 −8

Original line number	Diff line number	Diff line
		@@ -9,7 +9,7 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials
		from secrets import compare_digest

		import modules.shared as shared
		from modules import sd_samplers
		from modules import sd_samplers, deepbooru
		from modules.api.models import *
		from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
		from modules.extras import run_extras, run_pnginfo
		@@ -18,9 +18,6 @@ from modules.sd_models import checkpoints_list
		from modules.realesrgan_model import get_realesrgan_models
		from typing import List

		if shared.cmd_opts.deepdanbooru:
		from modules.deepbooru import get_deepbooru_tags

		def upscaler_to_index(name: str):
		try:
		return [x.name.lower() for x in shared.sd_upscalers].index(name.lower())
		@@ -245,10 +242,7 @@ class Api:
		if interrogatereq.model == "clip":
		processed = shared.interrogator.interrogate(img)
		elif interrogatereq.model == "deepdanbooru":
		if shared.cmd_opts.deepdanbooru:
		processed = get_deepbooru_tags(img)
		else:
		raise HTTPException(status_code=404, detail="Model not found. Add --deepdanbooru when launching for using the model.")
		processed = deepbooru.model.tag(img)
		else:
		raise HTTPException(status_code=404, detail="Model not found")

modules/deepbooru.py

+91 −167

Original line number	Diff line number	Diff line
		import os.path
		from concurrent.futures import ProcessPoolExecutor
		import multiprocessing
		import time
		import os
		import re

		import torch
		from PIL import Image
		import numpy as np

		from modules import modelloader, paths, deepbooru_model, devices, images, shared

		re_special = re.compile(r'([\\()])')

		def get_deepbooru_tags(pil_image):
		"""
		This method is for running only one image at a time for simple use. Used to the img2img interrogate.
		"""
		from modules import shared # prevents circular reference

		try:
		create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts())
		return get_tags_from_process(pil_image)
		finally:
		release_process()


		OPT_INCLUDE_RANKS = "include_ranks"
		def create_deepbooru_opts():
		from modules import shared

		return {
		"use_spaces": shared.opts.deepbooru_use_spaces,
		"use_escape": shared.opts.deepbooru_escape,
		"alpha_sort": shared.opts.deepbooru_sort_alpha,
		OPT_INCLUDE_RANKS: shared.opts.interrogate_return_ranks,
		}


		def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts):
		model, tags = get_deepbooru_tags_model()
		while True: # while process is running, keep monitoring queue for new image
		pil_image = queue.get()
		if pil_image == "QUIT":
		break
		else:
		deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts)


		def create_deepbooru_process(threshold, deepbooru_opts):
		"""
		Creates deepbooru process. A queue is created to send images into the process. This enables multiple images
		to be processed in a row without reloading the model or creating a new process. To return the data, a shared
		dictionary is created to hold the tags created. To wait for tags to be returned, a value of -1 is assigned
		to the dictionary and the method adding the image to the queue should wait for this value to be updated with
		the tags.
		"""
		from modules import shared # prevents circular reference
		context = multiprocessing.get_context("spawn")
		shared.deepbooru_process_manager = context.Manager()
		shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue()
		shared.deepbooru_process_return = shared.deepbooru_process_manager.dict()
		shared.deepbooru_process_return["value"] = -1
		shared.deepbooru_process = context.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts))
		shared.deepbooru_process.start()


		def get_tags_from_process(image):
		from modules import shared

		shared.deepbooru_process_return["value"] = -1
		shared.deepbooru_process_queue.put(image)
		while shared.deepbooru_process_return["value"] == -1:
		time.sleep(0.2)
		caption = shared.deepbooru_process_return["value"]
		shared.deepbooru_process_return["value"] = -1

		return caption


		def release_process():
		"""
		Stops the deepbooru process to return used memory
		"""
		from modules import shared # prevents circular reference
		shared.deepbooru_process_queue.put("QUIT")
		shared.deepbooru_process.join()
		shared.deepbooru_process_queue = None
		shared.deepbooru_process = None
		shared.deepbooru_process_return = None
		shared.deepbooru_process_manager = None

		def get_deepbooru_tags_model():
		import deepdanbooru as dd
		import tensorflow as tf
		import numpy as np
		this_folder = os.path.dirname(__file__)
		model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
		if not os.path.exists(os.path.join(model_path, 'project.json')):
		# there is no point importing these every time
		import zipfile
		from basicsr.utils.download_util import load_file_from_url
		load_file_from_url(
		r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
		model_path)
		with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
		zip_ref.extractall(model_path)
		os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))

		tags = dd.project.load_tags_from_project(model_path)
		model = dd.project.load_model_from_project(
		model_path, compile_model=False
		)
		return model, tags

		class DeepDanbooru:
		def __init__(self):
		self.model = None

		def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts):
		import deepdanbooru as dd
		import tensorflow as tf
		import numpy as np
		def load(self):
		if self.model is not None:
		return

		alpha_sort = deepbooru_opts['alpha_sort']
		use_spaces = deepbooru_opts['use_spaces']
		use_escape = deepbooru_opts['use_escape']
		include_ranks = deepbooru_opts['include_ranks']

		width = model.input_shape[2]
		height = model.input_shape[1]
		image = np.array(pil_image)
		image = tf.image.resize(
		image,
		size=(height, width),
		method=tf.image.ResizeMethod.AREA,
		preserve_aspect_ratio=True,
		files = modelloader.load_models(
		model_path=os.path.join(paths.models_path, "torch_deepdanbooru"),
		model_url='https://github.com/AUTOMATIC1111/TorchDeepDanbooru/releases/download/v1/model-resnet_custom_v3.pt',
		ext_filter=".pt",
		download_name='model-resnet_custom_v3.pt',
		)
		image = image.numpy() # EagerTensor to np.array
		image = dd.image.transform_and_pad_image(image, width, height)
		image = image / 255.0
		image_shape = image.shape
		image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))

		y = model.predict(image)[0]
		self.model = deepbooru_model.DeepDanbooruModel()
		self.model.load_state_dict(torch.load(files[0], map_location="cpu"))

		result_dict = {}
		self.model.eval()
		self.model.to(devices.cpu, devices.dtype)

		for i, tag in enumerate(tags):
		result_dict[tag] = y[i]
		def start(self):
		self.load()
		self.model.to(devices.device)

		def stop(self):
		if not shared.opts.interrogate_keep_models_in_memory:
		self.model.to(devices.cpu)
		devices.torch_gc()

		def tag(self, pil_image):
		self.start()
		res = self.tag_multi(pil_image)
		self.stop()

		return res

		def tag_multi(self, pil_image, force_disable_ranks=False):
		threshold = shared.opts.interrogate_deepbooru_score_threshold
		use_spaces = shared.opts.deepbooru_use_spaces
		use_escape = shared.opts.deepbooru_escape
		alpha_sort = shared.opts.deepbooru_sort_alpha
		include_ranks = shared.opts.interrogate_return_ranks and not force_disable_ranks

		pic = images.resize_image(2, pil_image.convert("RGB"), 512, 512)
		a = np.expand_dims(np.array(pic, dtype=np.float32), 0) / 255

		with torch.no_grad(), devices.autocast():
		x = torch.from_numpy(a).cuda()
		y = self.model(x)[0].detach().cpu().numpy()

		probability_dict = {}

		for tag, probability in zip(self.model.tags, y):
		if probability < threshold:
		continue

		unsorted_tags_in_theshold = []
		result_tags_print = []
		for tag in tags:
		if result_dict[tag] >= threshold:
		if tag.startswith("rating:"):
		continue
		unsorted_tags_in_theshold.append((result_dict[tag], tag))
		result_tags_print.append(f'{result_dict[tag]} {tag}')

		# sort tags
		result_tags_out = []
		sort_ndx = 0
		probability_dict[tag] = probability

		if alpha_sort:
		sort_ndx = 1
		tags = sorted(probability_dict)
		else:
		tags = [tag for tag, _ in sorted(probability_dict.items(), key=lambda x: -x[1])]

		res = []

		# sort by reverse by likelihood and normal for alpha, and format tag text as requested
		unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort))
		for weight, tag in unsorted_tags_in_theshold:
		for tag in tags:
		probability = probability_dict[tag]
		tag_outformat = tag
		if use_spaces:
		tag_outformat = tag_outformat.replace('_', ' ')
		if use_escape:
		tag_outformat = re.sub(re_special, r'\\\1', tag_outformat)
		if include_ranks:
		tag_outformat = f"({tag_outformat}:{weight:.3f})"
		tag_outformat = f"({tag_outformat}:{probability:.3f})"

		res.append(tag_outformat)

		result_tags_out.append(tag_outformat)
		return ", ".join(res)

		print('\n'.join(sorted(result_tags_print, reverse=True)))

		return ', '.join(result_tags_out)
		model = DeepDanbooru()

modules/deepbooru_model.py

0 → 100644

+676 −0

File added.

Preview size limit exceeded, changes collapsed.

Admin message