Commit c81d440d authored by AUTOMATIC's avatar AUTOMATIC
Browse files

moved deepdanbooru to pure pytorch implementation

parent 47a44c7e
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -70,7 +70,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
     - separate prompts using uppercase `AND`
     - separate prompts using uppercase `AND`
     - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
     - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)
- DeepDanbooru integration, creates danbooru style tags for anime prompts
- [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args)
- [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args)
- via extension: [History tab](https://github.com/yfszzx/stable-diffusion-webui-images-browser): view, direct and delete images conveniently within the UI
- via extension: [History tab](https://github.com/yfszzx/stable-diffusion-webui-images-browser): view, direct and delete images conveniently within the UI
- Generate forever option
- Generate forever option
+0 −5
Original line number Original line Diff line number Diff line
@@ -134,7 +134,6 @@ def prepare_enviroment():


    gfpgan_package = os.environ.get('GFPGAN_PACKAGE', "git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379")
    gfpgan_package = os.environ.get('GFPGAN_PACKAGE', "git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379")
    clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1")
    clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1")
    deepdanbooru_package = os.environ.get('DEEPDANBOORU_PACKAGE', "git+https://github.com/KichangKim/DeepDanbooru.git@d91a2963bf87c6a770d74894667e9ffa9f6de7ff")


    xformers_windows_package = os.environ.get('XFORMERS_WINDOWS_PACKAGE', 'https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl')
    xformers_windows_package = os.environ.get('XFORMERS_WINDOWS_PACKAGE', 'https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl')


@@ -158,7 +157,6 @@ def prepare_enviroment():
    sys.argv, update_check = extract_arg(sys.argv, '--update-check')
    sys.argv, update_check = extract_arg(sys.argv, '--update-check')
    sys.argv, run_tests = extract_arg(sys.argv, '--tests')
    sys.argv, run_tests = extract_arg(sys.argv, '--tests')
    xformers = '--xformers' in sys.argv
    xformers = '--xformers' in sys.argv
    deepdanbooru = '--deepdanbooru' in sys.argv
    ngrok = '--ngrok' in sys.argv
    ngrok = '--ngrok' in sys.argv


    try:
    try:
@@ -193,9 +191,6 @@ def prepare_enviroment():
        elif platform.system() == "Linux":
        elif platform.system() == "Linux":
            run_pip("install xformers", "xformers")
            run_pip("install xformers", "xformers")


    if not is_installed("deepdanbooru") and deepdanbooru:
        run_pip(f"install {deepdanbooru_package}#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru")

    if not is_installed("pyngrok") and ngrok:
    if not is_installed("pyngrok") and ngrok:
        run_pip("install pyngrok", "ngrok")
        run_pip("install pyngrok", "ngrok")


+2 −8
Original line number Original line Diff line number Diff line
@@ -9,7 +9,7 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials
from secrets import compare_digest
from secrets import compare_digest


import modules.shared as shared
import modules.shared as shared
from modules import sd_samplers
from modules import sd_samplers, deepbooru
from modules.api.models import *
from modules.api.models import *
from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
from modules.extras import run_extras, run_pnginfo
from modules.extras import run_extras, run_pnginfo
@@ -18,9 +18,6 @@ from modules.sd_models import checkpoints_list
from modules.realesrgan_model import get_realesrgan_models
from modules.realesrgan_model import get_realesrgan_models
from typing import List
from typing import List


if shared.cmd_opts.deepdanbooru:
    from modules.deepbooru import get_deepbooru_tags

def upscaler_to_index(name: str):
def upscaler_to_index(name: str):
    try:
    try:
        return [x.name.lower() for x in shared.sd_upscalers].index(name.lower())
        return [x.name.lower() for x in shared.sd_upscalers].index(name.lower())
@@ -245,10 +242,7 @@ class Api:
            if interrogatereq.model == "clip":
            if interrogatereq.model == "clip":
                processed = shared.interrogator.interrogate(img)
                processed = shared.interrogator.interrogate(img)
            elif interrogatereq.model == "deepdanbooru":
            elif interrogatereq.model == "deepdanbooru":
                if shared.cmd_opts.deepdanbooru:
                processed = deepbooru.model.tag(img)
                    processed = get_deepbooru_tags(img)
                else:
                    raise HTTPException(status_code=404, detail="Model not found. Add --deepdanbooru when launching for using the model.")
            else:
            else:
                raise HTTPException(status_code=404, detail="Model not found")
                raise HTTPException(status_code=404, detail="Model not found")
        
        
+91 −167
Original line number Original line Diff line number Diff line
import os.path
import os
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
import time
import re
import re


import torch
from PIL import Image
import numpy as np

from modules import modelloader, paths, deepbooru_model, devices, images, shared

re_special = re.compile(r'([\\()])')
re_special = re.compile(r'([\\()])')


def get_deepbooru_tags(pil_image):
    """
    This method is for running only one image at a time for simple use.  Used to the img2img interrogate.
    """
    from modules import shared  # prevents circular reference

    try:
        create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts())
        return get_tags_from_process(pil_image)
    finally:
        release_process()


OPT_INCLUDE_RANKS = "include_ranks"
def create_deepbooru_opts():
    from modules import shared

    return {
        "use_spaces": shared.opts.deepbooru_use_spaces,
        "use_escape": shared.opts.deepbooru_escape,
        "alpha_sort": shared.opts.deepbooru_sort_alpha,
        OPT_INCLUDE_RANKS: shared.opts.interrogate_return_ranks,
    }


def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts):
    model, tags = get_deepbooru_tags_model()
    while True: # while process is running, keep monitoring queue for new image
        pil_image = queue.get()
        if pil_image == "QUIT":
            break
        else:
            deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts)


def create_deepbooru_process(threshold, deepbooru_opts):
    """
    Creates deepbooru process.  A queue is created to send images into the process.  This enables multiple images
    to be processed in a row without reloading the model or creating a new process.  To return the data, a shared
    dictionary is created to hold the tags created.  To wait for tags to be returned, a value of -1 is assigned
    to the dictionary and the method adding the image to the queue should wait for this value to be updated with
    the tags.
    """
    from modules import shared  # prevents circular reference
    context = multiprocessing.get_context("spawn")
    shared.deepbooru_process_manager = context.Manager()
    shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue()
    shared.deepbooru_process_return = shared.deepbooru_process_manager.dict()
    shared.deepbooru_process_return["value"] = -1
    shared.deepbooru_process = context.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts))
    shared.deepbooru_process.start()


def get_tags_from_process(image):
    from modules import shared

    shared.deepbooru_process_return["value"] = -1
    shared.deepbooru_process_queue.put(image)
    while shared.deepbooru_process_return["value"] == -1:
        time.sleep(0.2)
    caption = shared.deepbooru_process_return["value"]
    shared.deepbooru_process_return["value"] = -1

    return caption


def release_process():
    """
    Stops the deepbooru process to return used memory
    """
    from modules import shared  # prevents circular reference
    shared.deepbooru_process_queue.put("QUIT")
    shared.deepbooru_process.join()
    shared.deepbooru_process_queue = None
    shared.deepbooru_process = None
    shared.deepbooru_process_return = None
    shared.deepbooru_process_manager = None

def get_deepbooru_tags_model():
    import deepdanbooru as dd
    import tensorflow as tf
    import numpy as np
    this_folder = os.path.dirname(__file__)
    model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
    if not os.path.exists(os.path.join(model_path, 'project.json')):
        # there is no point importing these every time
        import zipfile
        from basicsr.utils.download_util import load_file_from_url
        load_file_from_url(
            r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
            model_path)
        with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
            zip_ref.extractall(model_path)
        os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))

    tags = dd.project.load_tags_from_project(model_path)
    model = dd.project.load_model_from_project(
        model_path, compile_model=False
    )
    return model, tags


class DeepDanbooru:
    def __init__(self):
        self.model = None


def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts):
    def load(self):
    import deepdanbooru as dd
        if self.model is not None:
    import tensorflow as tf
            return
    import numpy as np


    alpha_sort = deepbooru_opts['alpha_sort']
        files = modelloader.load_models(
    use_spaces = deepbooru_opts['use_spaces']
            model_path=os.path.join(paths.models_path, "torch_deepdanbooru"),
    use_escape = deepbooru_opts['use_escape']
            model_url='https://github.com/AUTOMATIC1111/TorchDeepDanbooru/releases/download/v1/model-resnet_custom_v3.pt',
    include_ranks = deepbooru_opts['include_ranks']
            ext_filter=".pt",

            download_name='model-resnet_custom_v3.pt',
    width = model.input_shape[2]
    height = model.input_shape[1]
    image = np.array(pil_image)
    image = tf.image.resize(
        image,
        size=(height, width),
        method=tf.image.ResizeMethod.AREA,
        preserve_aspect_ratio=True,
        )
        )
    image = image.numpy()  # EagerTensor to np.array
    image = dd.image.transform_and_pad_image(image, width, height)
    image = image / 255.0
    image_shape = image.shape
    image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))


    y = model.predict(image)[0]
        self.model = deepbooru_model.DeepDanbooruModel()
        self.model.load_state_dict(torch.load(files[0], map_location="cpu"))


    result_dict = {}
        self.model.eval()
        self.model.to(devices.cpu, devices.dtype)


    for i, tag in enumerate(tags):
    def start(self):
        result_dict[tag] = y[i]
        self.load()
        self.model.to(devices.device)

    def stop(self):
        if not shared.opts.interrogate_keep_models_in_memory:
            self.model.to(devices.cpu)
            devices.torch_gc()

    def tag(self, pil_image):
        self.start()
        res = self.tag_multi(pil_image)
        self.stop()

        return res

    def tag_multi(self, pil_image, force_disable_ranks=False):
        threshold = shared.opts.interrogate_deepbooru_score_threshold
        use_spaces = shared.opts.deepbooru_use_spaces
        use_escape = shared.opts.deepbooru_escape
        alpha_sort = shared.opts.deepbooru_sort_alpha
        include_ranks = shared.opts.interrogate_return_ranks and not force_disable_ranks

        pic = images.resize_image(2, pil_image.convert("RGB"), 512, 512)
        a = np.expand_dims(np.array(pic, dtype=np.float32), 0) / 255

        with torch.no_grad(), devices.autocast():
            x = torch.from_numpy(a).cuda()
            y = self.model(x)[0].detach().cpu().numpy()

        probability_dict = {}

        for tag, probability in zip(self.model.tags, y):
            if probability < threshold:
                continue


    unsorted_tags_in_theshold = []
    result_tags_print = []
    for tag in tags:
        if result_dict[tag] >= threshold:
            if tag.startswith("rating:"):
            if tag.startswith("rating:"):
                continue
                continue
            unsorted_tags_in_theshold.append((result_dict[tag], tag))
            result_tags_print.append(f'{result_dict[tag]} {tag}')


    # sort tags
            probability_dict[tag] = probability
    result_tags_out = []

    sort_ndx = 0
        if alpha_sort:
        if alpha_sort:
        sort_ndx = 1
            tags = sorted(probability_dict)
        else:
            tags = [tag for tag, _ in sorted(probability_dict.items(), key=lambda x: -x[1])]

        res = []


    # sort by reverse by likelihood and normal for alpha, and format tag text as requested
        for tag in tags:
    unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort))
            probability = probability_dict[tag]
    for weight, tag in unsorted_tags_in_theshold:
            tag_outformat = tag
            tag_outformat = tag
            if use_spaces:
            if use_spaces:
                tag_outformat = tag_outformat.replace('_', ' ')
                tag_outformat = tag_outformat.replace('_', ' ')
            if use_escape:
            if use_escape:
                tag_outformat = re.sub(re_special, r'\\\1', tag_outformat)
                tag_outformat = re.sub(re_special, r'\\\1', tag_outformat)
            if include_ranks:
            if include_ranks:
            tag_outformat = f"({tag_outformat}:{weight:.3f})"
                tag_outformat = f"({tag_outformat}:{probability:.3f})"

            res.append(tag_outformat)


        result_tags_out.append(tag_outformat)
        return ", ".join(res)


    print('\n'.join(sorted(result_tags_print, reverse=True)))


    return ', '.join(result_tags_out)
model = DeepDanbooru()
+676 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading