add safetensors support for model merging #4869 (dac9b6f1) · Commits · github_fork / Stable Diffusion Webui

modules/extras.py

+14 −12

Original line number	Original line	Diff line number	Diff line
	@@ -20,6 +20,7 @@ import modules.codeformer_model
	import piexif		import piexif
	import piexif.helper		import piexif.helper
	import gradio as gr		import gradio as gr
			import safetensors.torch


	class LruCache(OrderedDict):		class LruCache(OrderedDict):
	@@ -249,7 +250,7 @@ def run_pnginfo(image):
	return '', geninfo, info		return '', geninfo, info


	def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_name, interp_method, multiplier, save_as_half, custom_name):		def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_name, interp_method, multiplier, save_as_half, custom_name, checkpoint_format):
	def weighted_sum(theta0, theta1, alpha):		def weighted_sum(theta0, theta1, alpha):
	return ((1 - alpha) * theta0) + (alpha * theta1)		return ((1 - alpha) * theta0) + (alpha * theta1)

	@@ -264,19 +265,15 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
	teritary_model_info = sd_models.checkpoints_list.get(teritary_model_name, None)		teritary_model_info = sd_models.checkpoints_list.get(teritary_model_name, None)

	print(f"Loading {primary_model_info.filename}...")		print(f"Loading {primary_model_info.filename}...")
	primary_model = torch.load(primary_model_info.filename, map_location='cpu')		theta_0 = sd_models.read_state_dict(primary_model_info.filename, map_location='cpu')
	theta_0 = sd_models.get_state_dict_from_checkpoint(primary_model)

	print(f"Loading {secondary_model_info.filename}...")		print(f"Loading {secondary_model_info.filename}...")
	secondary_model = torch.load(secondary_model_info.filename, map_location='cpu')		theta_1 = sd_models.read_state_dict(secondary_model_info.filename, map_location='cpu')
	theta_1 = sd_models.get_state_dict_from_checkpoint(secondary_model)

	if teritary_model_info is not None:		if teritary_model_info is not None:
	print(f"Loading {teritary_model_info.filename}...")		print(f"Loading {teritary_model_info.filename}...")
	teritary_model = torch.load(teritary_model_info.filename, map_location='cpu')		theta_2 = sd_models.read_state_dict(teritary_model_info.filename, map_location='cpu')
	theta_2 = sd_models.get_state_dict_from_checkpoint(teritary_model)
	else:		else:
	teritary_model = None
	theta_2 = None		theta_2 = None

	theta_funcs = {		theta_funcs = {
	@@ -295,7 +292,7 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
	theta_1[key] = theta_func1(theta_1[key], t2)		theta_1[key] = theta_func1(theta_1[key], t2)
	else:		else:
	theta_1[key] = torch.zeros_like(theta_1[key])		theta_1[key] = torch.zeros_like(theta_1[key])
	del theta_2, teritary_model		del theta_2

	for key in tqdm.tqdm(theta_0.keys()):		for key in tqdm.tqdm(theta_0.keys()):
	if 'model' in key and key in theta_1:		if 'model' in key and key in theta_1:
	@@ -314,12 +311,17 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam

	ckpt_dir = shared.cmd_opts.ckpt_dir or sd_models.model_path		ckpt_dir = shared.cmd_opts.ckpt_dir or sd_models.model_path

	filename = primary_model_info.model_name + '_' + str(round(1-multiplier, 2)) + '-' + secondary_model_info.model_name + '_' + str(round(multiplier, 2)) + '-' + interp_method.replace(" ", "_") + '-merged.ckpt'		filename = primary_model_info.model_name + '_' + str(round(1-multiplier, 2)) + '-' + secondary_model_info.model_name + '_' + str(round(multiplier, 2)) + '-' + interp_method.replace(" ", "_") + '-merged.' + checkpoint_format
	filename = filename if custom_name == '' else (custom_name + '.ckpt')		filename = filename if custom_name == '' else (custom_name + '.' + checkpoint_format)
	output_modelname = os.path.join(ckpt_dir, filename)		output_modelname = os.path.join(ckpt_dir, filename)

	print(f"Saving to {output_modelname}...")		print(f"Saving to {output_modelname}...")
	torch.save(primary_model, output_modelname)
			_, extension = os.path.splitext(output_modelname)
			if extension.lower() == ".safetensors":
			safetensors.torch.save_file(theta_0, output_modelname, metadata={"format": "pt"})
			else:
			torch.save(theta_0, output_modelname)

	sd_models.list_models()		sd_models.list_models()

modules/sd_models.py

+15 −11

Original line number	Original line	Diff line number	Diff line
	@@ -160,6 +160,20 @@ def get_state_dict_from_checkpoint(pl_sd):
	return pl_sd		return pl_sd


			def read_state_dict(checkpoint_file, print_global_state=False, map_location=None):
			_, extension = os.path.splitext(checkpoint_file)
			if extension.lower() == ".safetensors":
			pl_sd = safetensors.torch.load_file(checkpoint_file, device=map_location or shared.weight_load_location)
			else:
			pl_sd = torch.load(checkpoint_file, map_location=map_location or shared.weight_load_location)

			if print_global_state and "global_step" in pl_sd:
			print(f"Global Step: {pl_sd['global_step']}")

			sd = get_state_dict_from_checkpoint(pl_sd)
			return sd


	def load_model_weights(model, checkpoint_info, vae_file="auto"):		def load_model_weights(model, checkpoint_info, vae_file="auto"):
	checkpoint_file = checkpoint_info.filename		checkpoint_file = checkpoint_info.filename
	sd_model_hash = checkpoint_info.hash		sd_model_hash = checkpoint_info.hash
	@@ -174,17 +188,7 @@ def load_model_weights(model, checkpoint_info, vae_file="auto"):
	# load from file		# load from file
	print(f"Loading weights [{sd_model_hash}] from {checkpoint_file}")		print(f"Loading weights [{sd_model_hash}] from {checkpoint_file}")

	_, extension = os.path.splitext(checkpoint_file)		sd = read_state_dict(checkpoint_file)
	if extension.lower() == ".safetensors":
	pl_sd = safetensors.torch.load_file(checkpoint_file, device=shared.weight_load_location)
	else:
	pl_sd = torch.load(checkpoint_file, map_location=shared.weight_load_location)

	if "global_step" in pl_sd:
	print(f"Global Step: {pl_sd['global_step']}")

	sd = get_state_dict_from_checkpoint(pl_sd)
	del pl_sd
	model.load_state_dict(sd, strict=False)		model.load_state_dict(sd, strict=False)
	del sd		del sd

modules/ui.py

+6 −1

Original line number	Original line	Diff line number	Diff line
	@@ -1164,7 +1164,11 @@ def create_ui(wrap_gradio_gpu_call):
	custom_name = gr.Textbox(label="Custom Name (Optional)")		custom_name = gr.Textbox(label="Custom Name (Optional)")
	interp_amount = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Multiplier (M) - set to 0 to get model A', value=0.3)		interp_amount = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Multiplier (M) - set to 0 to get model A', value=0.3)
	interp_method = gr.Radio(choices=["Weighted sum", "Add difference"], value="Weighted sum", label="Interpolation Method")		interp_method = gr.Radio(choices=["Weighted sum", "Add difference"], value="Weighted sum", label="Interpolation Method")

			with gr.Row():
			checkpoint_format = gr.Radio(choices=["ckpt", "safetensors"], value="ckpt", label="Checkpoint format")
	save_as_half = gr.Checkbox(value=False, label="Save as float16")		save_as_half = gr.Checkbox(value=False, label="Save as float16")

	modelmerger_merge = gr.Button(elem_id="modelmerger_merge", label="Merge", variant='primary')		modelmerger_merge = gr.Button(elem_id="modelmerger_merge", label="Merge", variant='primary')

	with gr.Column(variant='panel'):		with gr.Column(variant='panel'):
	@@ -1692,6 +1696,7 @@ def create_ui(wrap_gradio_gpu_call):
	interp_amount,		interp_amount,
	save_as_half,		save_as_half,
	custom_name,		custom_name,
			checkpoint_format,
	],		],
	outputs=[		outputs=[
	submit_result,		submit_result,

Admin message