Merge pull request #1752 from Greendayle/dev/deepdanbooru (e00b4df7) · Commits · github_fork / Stable Diffusion Webui

README.md

+2 −0

Original line number	Diff line number	Diff line
		@@ -66,6 +66,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
		- separate prompts using uppercase `AND`
		- also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
		- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
		- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)

		## Installation and Running
		Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
		@@ -123,4 +124,5 @@ The documentation was moved from this README over to the project's [wiki](https:
		- Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot
		- CLIP interrogator idea and borrowing some code - https://github.com/pharmapsychotic/clip-interrogator
		- Initial Gradio script - posted on 4chan by an Anonymous user. Thank you Anonymous user.
		- DeepDanbooru - interrogator for anime diffusors https://github.com/KichangKim/DeepDanbooru
		- (You)

launch.py

+4 −0

Original line number	Diff line number	Diff line
		@@ -33,6 +33,7 @@ def extract_arg(args, name):

		args, skip_torch_cuda_test = extract_arg(args, '--skip-torch-cuda-test')
		xformers = '--xformers' in args
		deepdanbooru = '--deepdanbooru' in args


		def repo_dir(name):
		@@ -132,6 +133,9 @@ if not is_installed("xformers") and xformers and platform.python_version().start
		elif platform.system() == "Linux":
		run_pip("install xformers", "xformers")

		if not is_installed("deepdanbooru") and deepdanbooru:
		run_pip("install git+https://github.com/KichangKim/DeepDanbooru.git@edf73df4cdaeea2cf00e9ac08bd8a9026b7a7b26#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru")

		os.makedirs(dir_repos, exist_ok=True)

		git_clone("https://github.com/CompVis/stable-diffusion.git", repo_dir('stable-diffusion'), "Stable Diffusion", stable_diffusion_commit_hash)

models/deepbooru/Put your deepbooru release project folder here.txt

0 → 100644

+0 −0

Empty file added.

modules/deepbooru.py

0 → 100644

+73 −0

Original line number	Diff line number	Diff line
		import os.path
		from concurrent.futures import ProcessPoolExecutor
		from multiprocessing import get_context


		def _load_tf_and_return_tags(pil_image, threshold):
		import deepdanbooru as dd
		import tensorflow as tf
		import numpy as np

		this_folder = os.path.dirname(__file__)
		model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
		if not os.path.exists(os.path.join(model_path, 'project.json')):
		# there is no point importing these every time
		import zipfile
		from basicsr.utils.download_util import load_file_from_url
		load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
		model_path)
		with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
		zip_ref.extractall(model_path)
		os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))

		tags = dd.project.load_tags_from_project(model_path)
		model = dd.project.load_model_from_project(
		model_path, compile_model=True
		)

		width = model.input_shape[2]
		height = model.input_shape[1]
		image = np.array(pil_image)
		image = tf.image.resize(
		image,
		size=(height, width),
		method=tf.image.ResizeMethod.AREA,
		preserve_aspect_ratio=True,
		)
		image = image.numpy() # EagerTensor to np.array
		image = dd.image.transform_and_pad_image(image, width, height)
		image = image / 255.0
		image_shape = image.shape
		image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))

		y = model.predict(image)[0]

		result_dict = {}

		for i, tag in enumerate(tags):
		result_dict[tag] = y[i]
		result_tags_out = []
		result_tags_print = []
		for tag in tags:
		if result_dict[tag] >= threshold:
		if tag.startswith("rating:"):
		continue
		result_tags_out.append(tag)
		result_tags_print.append(f'{result_dict[tag]} {tag}')

		print('\n'.join(sorted(result_tags_print, reverse=True)))

		return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ')


		def subprocess_init_no_cuda():
		import os
		os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


		def get_deepbooru_tags(pil_image, threshold=0.5):
		context = get_context('spawn')
		with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor:
		f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, )
		ret = f.result() # will rethrow any exceptions
		return ret
		No newline at end of file

modules/shared.py

+1 −0

Original line number	Diff line number	Diff line
		@@ -45,6 +45,7 @@ parser.add_argument("--swinir-models-path", type=str, help="Path to directory wi
		parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(models_path, 'LDSR'))
		parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers")
		parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
		parser.add_argument("--deepdanbooru", action='store_true', help="enable deepdanbooru interrogator")
		parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.")
		parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
		parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")

Admin message