getting SD2.1 to run on SDXL repo (af081211) · Commits · github_fork / Stable Diffusion Webui

modules/launch_utils.py

+3 −0

Original line number	Diff line number	Diff line
		@@ -235,11 +235,13 @@ def prepare_environment():
		openclip_package = os.environ.get('OPENCLIP_PACKAGE', "https://github.com/mlfoundations/open_clip/archive/bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b.zip")

		stable_diffusion_repo = os.environ.get('STABLE_DIFFUSION_REPO', "https://github.com/Stability-AI/stablediffusion.git")
		stable_diffusion_xl_repo = os.environ.get('STABLE_DIFFUSION_XL_REPO', "https://github.com/Stability-AI/generative-models.git")
		k_diffusion_repo = os.environ.get('K_DIFFUSION_REPO', 'https://github.com/crowsonkb/k-diffusion.git')
		codeformer_repo = os.environ.get('CODEFORMER_REPO', 'https://github.com/sczhou/CodeFormer.git')
		blip_repo = os.environ.get('BLIP_REPO', 'https://github.com/salesforce/BLIP.git')

		stable_diffusion_commit_hash = os.environ.get('STABLE_DIFFUSION_COMMIT_HASH', "cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf")
		stable_diffusion_xl_commit_hash = os.environ.get('STABLE_DIFFUSION_XL_COMMIT_HASH', "5c10deee76adad0032b412294130090932317a87")
		k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "c9fe758757e022f05ca5a53fa8fac28889e4f1cf")
		codeformer_commit_hash = os.environ.get('CODEFORMER_COMMIT_HASH', "c5b4593074ba6214284d6acd5f1719b6c5d739af")
		blip_commit_hash = os.environ.get('BLIP_COMMIT_HASH', "48211a1594f1321b00f14c9f7a5b4813144b2fb9")
		@@ -297,6 +299,7 @@ def prepare_environment():
		os.makedirs(os.path.join(script_path, dir_repos), exist_ok=True)

		git_clone(stable_diffusion_repo, repo_dir('stable-diffusion-stability-ai'), "Stable Diffusion", stable_diffusion_commit_hash)
		git_clone(stable_diffusion_xl_repo, repo_dir('generative-models'), "Stable Diffusion XL", stable_diffusion_xl_commit_hash)
		git_clone(k_diffusion_repo, repo_dir('k-diffusion'), "K-diffusion", k_diffusion_commit_hash)
		git_clone(codeformer_repo, repo_dir('CodeFormer'), "CodeFormer", codeformer_commit_hash)
		git_clone(blip_repo, repo_dir('BLIP'), "BLIP", blip_commit_hash)

modules/paths.py

+1 −0

Original line number	Diff line number	Diff line
		@@ -20,6 +20,7 @@ assert sd_path is not None, f"Couldn't find Stable Diffusion in any of: {possibl

		path_dirs = [
		(sd_path, 'ldm', 'Stable Diffusion', []),
		(os.path.join(sd_path, '../generative-models'), 'sgm', 'Stable Diffusion XL', []),
		(os.path.join(sd_path, '../CodeFormer'), 'inference_codeformer.py', 'CodeFormer', []),
		(os.path.join(sd_path, '../BLIP'), 'models/blip.py', 'BLIP', []),
		(os.path.join(sd_path, '../k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]),

modules/prompt_parser.py

+52 −12

Original line number	Diff line number	Diff line
		@@ -144,7 +144,12 @@ def get_learned_conditioning(model, prompts, steps):

		cond_schedule = []
		for i, (end_at_step, _) in enumerate(prompt_schedule):
		cond_schedule.append(ScheduledPromptConditioning(end_at_step, conds[i]))
		if isinstance(conds, dict):
		cond = {k: v[i] for k, v in conds.items()}
		else:
		cond = conds[i]

		cond_schedule.append(ScheduledPromptConditioning(end_at_step, cond))

		cache[prompt] = cond_schedule
		res.append(cond_schedule)
		@@ -214,20 +219,57 @@ def get_multicond_learned_conditioning(model, prompts, steps) -> MulticondLearne
		return MulticondLearnedConditioning(shape=(len(prompts),), batch=res)


		class DictWithShape(dict):
		def __init__(self, x, shape):
		super().__init__()
		self.update(x)

		@property
		def shape(self):
		return self["crossattn"].shape


		def reconstruct_cond_batch(c: List[List[ScheduledPromptConditioning]], current_step):
		param = c[0][0].cond
		is_dict = isinstance(param, dict)

		if is_dict:
		dict_cond = param
		res = {k: torch.zeros((len(c),) + param.shape, device=param.device, dtype=param.dtype) for k, param in dict_cond.items()}
		res = DictWithShape(res, (len(c),) + dict_cond['crossattn'].shape)
		else:
		res = torch.zeros((len(c),) + param.shape, device=param.device, dtype=param.dtype)

		for i, cond_schedule in enumerate(c):
		target_index = 0
		for current, entry in enumerate(cond_schedule):
		if current_step <= entry.end_at_step:
		target_index = current
		break

		if is_dict:
		for k, param in cond_schedule[target_index].cond.items():
		res[k][i] = param
		else:
		res[i] = cond_schedule[target_index].cond

		return res


		def stack_conds(tensors):
		# if prompts have wildly different lengths above the limit we'll get tensors of different shapes
		# and won't be able to torch.stack them. So this fixes that.
		token_count = max([x.shape[0] for x in tensors])
		for i in range(len(tensors)):
		if tensors[i].shape[0] != token_count:
		last_vector = tensors[i][-1:]
		last_vector_repeated = last_vector.repeat([token_count - tensors[i].shape[0], 1])
		tensors[i] = torch.vstack([tensors[i], last_vector_repeated])

		return torch.stack(tensors)



		def reconstruct_multicond_batch(c: MulticondLearnedConditioning, current_step):
		param = c.batch[0][0].schedules[0].cond

		@@ -249,16 +291,14 @@ def reconstruct_multicond_batch(c: MulticondLearnedConditioning, current_step):

		conds_list.append(conds_for_batch)

		# if prompts have wildly different lengths above the limit we'll get tensors fo different shapes
		# and won't be able to torch.stack them. So this fixes that.
		token_count = max([x.shape[0] for x in tensors])
		for i in range(len(tensors)):
		if tensors[i].shape[0] != token_count:
		last_vector = tensors[i][-1:]
		last_vector_repeated = last_vector.repeat([token_count - tensors[i].shape[0], 1])
		tensors[i] = torch.vstack([tensors[i], last_vector_repeated])
		if isinstance(tensors[0], dict):
		keys = list(tensors[0].keys())
		stacked = {k: stack_conds([x[k] for x in tensors]) for k in keys}
		stacked = DictWithShape(stacked, stacked['crossattn'].shape)
		else:
		stacked = stack_conds(tensors).to(device=param.device, dtype=param.dtype)

		return conds_list, torch.stack(tensors).to(device=param.device, dtype=param.dtype)
		return conds_list, stacked


		re_attention = re.compile(r"""

modules/sd_hijack.py

+9 −0

Original line number	Diff line number	Diff line
		@@ -166,6 +166,15 @@ class StableDiffusionModelHijack:
		undo_optimizations()

		def hijack(self, m):
		conditioner = getattr(m, 'conditioner', None)
		if conditioner:
		for i in range(len(conditioner.embedders)):
		embedder = conditioner.embedders[i]
		if type(embedder).__name__ == 'FrozenOpenCLIPEmbedder':
		embedder.model.token_embedding = EmbeddingsWithFixes(embedder.model.token_embedding, self)
		m.cond_stage_model = sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords(embedder, self)
		conditioner.embedders[i] = m.cond_stage_model

		if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation:
		model_embeddings = m.cond_stage_model.roberta.embeddings
		model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.word_embeddings, self)

modules/sd_hijack_open_clip.py

+4 −0

Original line number	Diff line number	Diff line
		@@ -16,6 +16,10 @@ class FrozenOpenCLIPEmbedderWithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWit
		self.id_end = tokenizer.encoder["<end_of_text>"]
		self.id_pad = 0

		self.is_trainable = getattr(wrapped, 'is_trainable', False)
		self.input_key = getattr(wrapped, 'input_key', 'txt')
		self.legacy_ucg_val = None

		def tokenize(self, texts):
		assert not opts.use_old_emphasis_implementation, 'Old emphasis implementation not supported for Open Clip'

Admin message