Commit c81d440d authored by AUTOMATIC's avatar AUTOMATIC
Browse files

moved deepdanbooru to pure pytorch implementation

parent 47a44c7e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -70,7 +70,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
     - separate prompts using uppercase `AND`
     - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)
- DeepDanbooru integration, creates danbooru style tags for anime prompts
- [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args)
- via extension: [History tab](https://github.com/yfszzx/stable-diffusion-webui-images-browser): view, direct and delete images conveniently within the UI
- Generate forever option
+0 −5
Original line number Diff line number Diff line
@@ -134,7 +134,6 @@ def prepare_enviroment():

    gfpgan_package = os.environ.get('GFPGAN_PACKAGE', "git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379")
    clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1")
    deepdanbooru_package = os.environ.get('DEEPDANBOORU_PACKAGE', "git+https://github.com/KichangKim/DeepDanbooru.git@d91a2963bf87c6a770d74894667e9ffa9f6de7ff")

    xformers_windows_package = os.environ.get('XFORMERS_WINDOWS_PACKAGE', 'https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl')

@@ -158,7 +157,6 @@ def prepare_enviroment():
    sys.argv, update_check = extract_arg(sys.argv, '--update-check')
    sys.argv, run_tests = extract_arg(sys.argv, '--tests')
    xformers = '--xformers' in sys.argv
    deepdanbooru = '--deepdanbooru' in sys.argv
    ngrok = '--ngrok' in sys.argv

    try:
@@ -193,9 +191,6 @@ def prepare_enviroment():
        elif platform.system() == "Linux":
            run_pip("install xformers", "xformers")

    if not is_installed("deepdanbooru") and deepdanbooru:
        run_pip(f"install {deepdanbooru_package}#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru")

    if not is_installed("pyngrok") and ngrok:
        run_pip("install pyngrok", "ngrok")

+2 −8
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials
from secrets import compare_digest

import modules.shared as shared
from modules import sd_samplers
from modules import sd_samplers, deepbooru
from modules.api.models import *
from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
from modules.extras import run_extras, run_pnginfo
@@ -18,9 +18,6 @@ from modules.sd_models import checkpoints_list
from modules.realesrgan_model import get_realesrgan_models
from typing import List

if shared.cmd_opts.deepdanbooru:
    from modules.deepbooru import get_deepbooru_tags

def upscaler_to_index(name: str):
    try:
        return [x.name.lower() for x in shared.sd_upscalers].index(name.lower())
@@ -245,10 +242,7 @@ class Api:
            if interrogatereq.model == "clip":
                processed = shared.interrogator.interrogate(img)
            elif interrogatereq.model == "deepdanbooru":
                if shared.cmd_opts.deepdanbooru:
                    processed = get_deepbooru_tags(img)
                else:
                    raise HTTPException(status_code=404, detail="Model not found. Add --deepdanbooru when launching for using the model.")
                processed = deepbooru.model.tag(img)
            else:
                raise HTTPException(status_code=404, detail="Model not found")
        
+91 −167
Original line number Diff line number Diff line
import os.path
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
import time
import os
import re

import torch
from PIL import Image
import numpy as np

from modules import modelloader, paths, deepbooru_model, devices, images, shared

re_special = re.compile(r'([\\()])')

def get_deepbooru_tags(pil_image):
    """
    This method is for running only one image at a time for simple use.  Used to the img2img interrogate.
    """
    from modules import shared  # prevents circular reference

    try:
        create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts())
        return get_tags_from_process(pil_image)
    finally:
        release_process()


OPT_INCLUDE_RANKS = "include_ranks"
def create_deepbooru_opts():
    from modules import shared

    return {
        "use_spaces": shared.opts.deepbooru_use_spaces,
        "use_escape": shared.opts.deepbooru_escape,
        "alpha_sort": shared.opts.deepbooru_sort_alpha,
        OPT_INCLUDE_RANKS: shared.opts.interrogate_return_ranks,
    }


def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts):
    model, tags = get_deepbooru_tags_model()
    while True: # while process is running, keep monitoring queue for new image
        pil_image = queue.get()
        if pil_image == "QUIT":
            break
        else:
            deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts)


def create_deepbooru_process(threshold, deepbooru_opts):
    """
    Creates deepbooru process.  A queue is created to send images into the process.  This enables multiple images
    to be processed in a row without reloading the model or creating a new process.  To return the data, a shared
    dictionary is created to hold the tags created.  To wait for tags to be returned, a value of -1 is assigned
    to the dictionary and the method adding the image to the queue should wait for this value to be updated with
    the tags.
    """
    from modules import shared  # prevents circular reference
    context = multiprocessing.get_context("spawn")
    shared.deepbooru_process_manager = context.Manager()
    shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue()
    shared.deepbooru_process_return = shared.deepbooru_process_manager.dict()
    shared.deepbooru_process_return["value"] = -1
    shared.deepbooru_process = context.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts))
    shared.deepbooru_process.start()


def get_tags_from_process(image):
    from modules import shared

    shared.deepbooru_process_return["value"] = -1
    shared.deepbooru_process_queue.put(image)
    while shared.deepbooru_process_return["value"] == -1:
        time.sleep(0.2)
    caption = shared.deepbooru_process_return["value"]
    shared.deepbooru_process_return["value"] = -1

    return caption


def release_process():
    """
    Stops the deepbooru process to return used memory
    """
    from modules import shared  # prevents circular reference
    shared.deepbooru_process_queue.put("QUIT")
    shared.deepbooru_process.join()
    shared.deepbooru_process_queue = None
    shared.deepbooru_process = None
    shared.deepbooru_process_return = None
    shared.deepbooru_process_manager = None

def get_deepbooru_tags_model():
    import deepdanbooru as dd
    import tensorflow as tf
    import numpy as np
    this_folder = os.path.dirname(__file__)
    model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
    if not os.path.exists(os.path.join(model_path, 'project.json')):
        # there is no point importing these every time
        import zipfile
        from basicsr.utils.download_util import load_file_from_url
        load_file_from_url(
            r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
            model_path)
        with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
            zip_ref.extractall(model_path)
        os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))

    tags = dd.project.load_tags_from_project(model_path)
    model = dd.project.load_model_from_project(
        model_path, compile_model=False
    )
    return model, tags

class DeepDanbooru:
    def __init__(self):
        self.model = None

def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts):
    import deepdanbooru as dd
    import tensorflow as tf
    import numpy as np
    def load(self):
        if self.model is not None:
            return

    alpha_sort = deepbooru_opts['alpha_sort']
    use_spaces = deepbooru_opts['use_spaces']
    use_escape = deepbooru_opts['use_escape']
    include_ranks = deepbooru_opts['include_ranks']

    width = model.input_shape[2]
    height = model.input_shape[1]
    image = np.array(pil_image)
    image = tf.image.resize(
        image,
        size=(height, width),
        method=tf.image.ResizeMethod.AREA,
        preserve_aspect_ratio=True,
        files = modelloader.load_models(
            model_path=os.path.join(paths.models_path, "torch_deepdanbooru"),
            model_url='https://github.com/AUTOMATIC1111/TorchDeepDanbooru/releases/download/v1/model-resnet_custom_v3.pt',
            ext_filter=".pt",
            download_name='model-resnet_custom_v3.pt',
        )
    image = image.numpy()  # EagerTensor to np.array
    image = dd.image.transform_and_pad_image(image, width, height)
    image = image / 255.0
    image_shape = image.shape
    image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))

    y = model.predict(image)[0]
        self.model = deepbooru_model.DeepDanbooruModel()
        self.model.load_state_dict(torch.load(files[0], map_location="cpu"))

    result_dict = {}
        self.model.eval()
        self.model.to(devices.cpu, devices.dtype)

    for i, tag in enumerate(tags):
        result_dict[tag] = y[i]
    def start(self):
        self.load()
        self.model.to(devices.device)

    def stop(self):
        if not shared.opts.interrogate_keep_models_in_memory:
            self.model.to(devices.cpu)
            devices.torch_gc()

    def tag(self, pil_image):
        self.start()
        res = self.tag_multi(pil_image)
        self.stop()

        return res

    def tag_multi(self, pil_image, force_disable_ranks=False):
        threshold = shared.opts.interrogate_deepbooru_score_threshold
        use_spaces = shared.opts.deepbooru_use_spaces
        use_escape = shared.opts.deepbooru_escape
        alpha_sort = shared.opts.deepbooru_sort_alpha
        include_ranks = shared.opts.interrogate_return_ranks and not force_disable_ranks

        pic = images.resize_image(2, pil_image.convert("RGB"), 512, 512)
        a = np.expand_dims(np.array(pic, dtype=np.float32), 0) / 255

        with torch.no_grad(), devices.autocast():
            x = torch.from_numpy(a).cuda()
            y = self.model(x)[0].detach().cpu().numpy()

        probability_dict = {}

        for tag, probability in zip(self.model.tags, y):
            if probability < threshold:
                continue

    unsorted_tags_in_theshold = []
    result_tags_print = []
    for tag in tags:
        if result_dict[tag] >= threshold:
            if tag.startswith("rating:"):
                continue
            unsorted_tags_in_theshold.append((result_dict[tag], tag))
            result_tags_print.append(f'{result_dict[tag]} {tag}')

    # sort tags
    result_tags_out = []
    sort_ndx = 0
            probability_dict[tag] = probability

        if alpha_sort:
        sort_ndx = 1
            tags = sorted(probability_dict)
        else:
            tags = [tag for tag, _ in sorted(probability_dict.items(), key=lambda x: -x[1])]

        res = []

    # sort by reverse by likelihood and normal for alpha, and format tag text as requested
    unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort))
    for weight, tag in unsorted_tags_in_theshold:
        for tag in tags:
            probability = probability_dict[tag]
            tag_outformat = tag
            if use_spaces:
                tag_outformat = tag_outformat.replace('_', ' ')
            if use_escape:
                tag_outformat = re.sub(re_special, r'\\\1', tag_outformat)
            if include_ranks:
            tag_outformat = f"({tag_outformat}:{weight:.3f})"
                tag_outformat = f"({tag_outformat}:{probability:.3f})"

            res.append(tag_outformat)

        result_tags_out.append(tag_outformat)
        return ", ".join(res)

    print('\n'.join(sorted(result_tags_print, reverse=True)))

    return ', '.join(result_tags_out)
model = DeepDanbooru()
+676 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading