Commit 36888092 authored by AUTOMATIC's avatar AUTOMATIC
Browse files

revert default cross attention optimization to Doggettx

make --disable-opt-split-attention command line option work again
parent f1533de9
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -62,7 +62,7 @@ parser.add_argument("--opt-split-attention-invokeai", action='store_true', help=
parser.add_argument("--opt-split-attention-v1", action='store_true', help="prefer older version of split attention optimization for automatic choice of optimization")
parser.add_argument("--opt-split-attention-v1", action='store_true', help="prefer older version of split attention optimization for automatic choice of optimization")
parser.add_argument("--opt-sdp-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization for automatic choice of optimization; requires PyTorch 2.*")
parser.add_argument("--opt-sdp-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization for automatic choice of optimization; requires PyTorch 2.*")
parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization without memory efficient attention for automatic choice of optimization, makes image generation deterministic; requires PyTorch 2.*")
parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="prefer scaled dot product cross-attention layer optimization without memory efficient attention for automatic choice of optimization, makes image generation deterministic; requires PyTorch 2.*")
parser.add_argument("--disable-opt-split-attention", action='store_true', help="does not do anything")
parser.add_argument("--disable-opt-split-attention", action='store_true', help="prefer no cross-attention layer optimization for automatic choice of optimization")
parser.add_argument("--disable-nan-check", action='store_true', help="do not check if produced images/latent spaces have nans; useful for running without a checkpoint in CI")
parser.add_argument("--disable-nan-check", action='store_true', help="do not check if produced images/latent spaces have nans; useful for running without a checkpoint in CI")
parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower)
parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower)
parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests")
parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests")
+2 −0
Original line number Original line Diff line number Diff line
@@ -68,6 +68,8 @@ def apply_optimizations(option=None):


    if selection == "None":
    if selection == "None":
        matching_optimizer = None
        matching_optimizer = None
    elif selection == "Automatic" and shared.cmd_opts.disable_opt_split_attention:
        matching_optimizer = None
    elif matching_optimizer is None:
    elif matching_optimizer is None:
        matching_optimizer = optimizers[0]
        matching_optimizer = optimizers[0]


+3 −3
Original line number Original line Diff line number Diff line
@@ -57,7 +57,7 @@ class SdOptimizationSdpNoMem(SdOptimization):
    name = "sdp-no-mem"
    name = "sdp-no-mem"
    label = "scaled dot product without memory efficient attention"
    label = "scaled dot product without memory efficient attention"
    cmd_opt = "opt_sdp_no_mem_attention"
    cmd_opt = "opt_sdp_no_mem_attention"
    priority = 90
    priority = 80


    def is_available(self):
    def is_available(self):
        return hasattr(torch.nn.functional, "scaled_dot_product_attention") and callable(torch.nn.functional.scaled_dot_product_attention)
        return hasattr(torch.nn.functional, "scaled_dot_product_attention") and callable(torch.nn.functional.scaled_dot_product_attention)
@@ -71,7 +71,7 @@ class SdOptimizationSdp(SdOptimizationSdpNoMem):
    name = "sdp"
    name = "sdp"
    label = "scaled dot product"
    label = "scaled dot product"
    cmd_opt = "opt_sdp_attention"
    cmd_opt = "opt_sdp_attention"
    priority = 80
    priority = 70


    def apply(self):
    def apply(self):
        ldm.modules.attention.CrossAttention.forward = scaled_dot_product_attention_forward
        ldm.modules.attention.CrossAttention.forward = scaled_dot_product_attention_forward
@@ -114,7 +114,7 @@ class SdOptimizationInvokeAI(SdOptimization):
class SdOptimizationDoggettx(SdOptimization):
class SdOptimizationDoggettx(SdOptimization):
    name = "Doggettx"
    name = "Doggettx"
    cmd_opt = "opt_split_attention"
    cmd_opt = "opt_split_attention"
    priority = 20
    priority = 90


    def apply(self):
    def apply(self):
        ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward
        ldm.modules.attention.CrossAttention.forward = split_cross_attention_forward