Commit d208383d authored by Phillip Lougher's avatar Phillip Lougher
Browse files

Squashfs: add multi-threaded decompression using percpu variable



Add a multi-threaded decompression implementation which uses
percpu variables.

Using percpu variables has advantages and disadvantages over
implementations which do not use percpu variables.

Advantages:
  * the nature of percpu variables ensures decompression is
    load-balanced across the multiple cores.
  * simplicity.

Disadvantages: it limits decompression to one thread per core.

Signed-off-by: default avatarPhillip Lougher <phillip@squashfs.org.uk>
parent cd59c2ec
Loading
Loading
Loading
Loading
+44 −13
Original line number Original line Diff line number Diff line
@@ -25,6 +25,50 @@ config SQUASHFS


	  If unsure, say N.
	  If unsure, say N.


choice
	prompt "Decompressor parallelisation options"
	depends on SQUASHFS
	help
	  Squashfs now supports three parallelisation options for
	  decompression.  Each one exhibits various trade-offs between
	  decompression performance and CPU and memory usage.

	  If in doubt, select "Single threaded compression"

config SQUASHFS_DECOMP_SINGLE
	bool "Single threaded compression"
	help
	  Traditionally Squashfs has used single-threaded decompression.
	  Only one block (data or metadata) can be decompressed at any
	  one time.  This limits CPU and memory usage to a minimum.

config SQUASHFS_DECOMP_MULTI
	bool "Use multiple decompressors for parallel I/O"
	help
	  By default Squashfs uses a single decompressor but it gives
	  poor performance on parallel I/O workloads when using multiple CPU
	  machines due to waiting on decompressor availability.

	  If you have a parallel I/O workload and your system has enough memory,
	  using this option may improve overall I/O performance.

	  This decompressor implementation uses up to two parallel
	  decompressors per core.  It dynamically allocates decompressors
	  on a demand basis.

config SQUASHFS_DECOMP_MULTI_PERCPU
	bool "Use percpu multiple decompressors for parallel I/O"
	help
	  By default Squashfs uses a single decompressor but it gives
	  poor performance on parallel I/O workloads when using multiple CPU
	  machines due to waiting on decompressor availability.

	  This decompressor implementation uses a maximum of one
	  decompressor per core.  It uses percpu variables to ensure
	  decompression is load-balanced across the cores.

endchoice

config SQUASHFS_XATTR
config SQUASHFS_XATTR
	bool "Squashfs XATTR support"
	bool "Squashfs XATTR support"
	depends on SQUASHFS
	depends on SQUASHFS
@@ -63,19 +107,6 @@ config SQUASHFS_LZO


	  If unsure, say N.
	  If unsure, say N.


config SQUASHFS_MULTI_DECOMPRESSOR
	bool "Use multiple decompressors for handling parallel I/O"
	depends on SQUASHFS
	help
	  By default Squashfs uses a single decompressor but it gives
	  poor performance on parallel I/O workloads when using multiple CPU
	  machines due to waiting on decompressor availability.

	  If you have a parallel I/O workload and your system has enough memory,
	  using this option may improve overall I/O performance.

	  If unsure, say N.

config SQUASHFS_XZ
config SQUASHFS_XZ
	bool "Include support for XZ compressed file systems"
	bool "Include support for XZ compressed file systems"
	depends on SQUASHFS
	depends on SQUASHFS
+3 −7
Original line number Original line Diff line number Diff line
@@ -5,14 +5,10 @@
obj-$(CONFIG_SQUASHFS) += squashfs.o
obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o decompressor.o
squashfs-y += namei.o super.o symlink.o decompressor.o

squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o
squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o

ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR
	squashfs-y		+= decompressor_multi.o
else
	squashfs-y		+= decompressor_single.o
endif
+98 −0
Original line number Original line Diff line number Diff line
/*
 * Copyright (c) 2013
 * Phillip Lougher <phillip@squashfs.org.uk>
 *
 * This work is licensed under the terms of the GNU GPL, version 2. See
 * the COPYING file in the top-level directory.
 */

#include <linux/types.h>
#include <linux/slab.h>
#include <linux/percpu.h>
#include <linux/buffer_head.h>

#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
#include "decompressor.h"
#include "squashfs.h"

/*
 * This file implements multi-threaded decompression using percpu
 * variables, one thread per cpu core.
 */

struct squashfs_stream {
	void		*stream;
};

void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
						void *comp_opts)
{
	struct squashfs_stream *stream;
	struct squashfs_stream __percpu *percpu;
	int err, cpu;

	percpu = alloc_percpu(struct squashfs_stream);
	if (percpu == NULL)
		return ERR_PTR(-ENOMEM);

	for_each_possible_cpu(cpu) {
		stream = per_cpu_ptr(percpu, cpu);
		stream->stream = msblk->decompressor->init(msblk, comp_opts);
		if (IS_ERR(stream->stream)) {
			err = PTR_ERR(stream->stream);
			goto out;
		}
	}

	kfree(comp_opts);
	return (__force void *) percpu;

out:
	for_each_possible_cpu(cpu) {
		stream = per_cpu_ptr(percpu, cpu);
		if (!IS_ERR_OR_NULL(stream->stream))
			msblk->decompressor->free(stream->stream);
	}
	free_percpu(percpu);
	return ERR_PTR(err);
}

void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
	struct squashfs_stream __percpu *percpu =
			(struct squashfs_stream __percpu *) msblk->stream;
	struct squashfs_stream *stream;
	int cpu;

	if (msblk->stream) {
		for_each_possible_cpu(cpu) {
			stream = per_cpu_ptr(percpu, cpu);
			msblk->decompressor->free(stream->stream);
		}
		free_percpu(percpu);
	}
}

int squashfs_decompress(struct squashfs_sb_info *msblk,
	void **buffer, struct buffer_head **bh, int b, int offset, int length,
	int srclength, int pages)
{
	struct squashfs_stream __percpu *percpu =
			(struct squashfs_stream __percpu *) msblk->stream;
	struct squashfs_stream *stream = get_cpu_ptr(percpu);
	int res = msblk->decompressor->decompress(msblk, stream->stream, buffer,
		bh, b, offset, length, srclength, pages);
	put_cpu_ptr(stream);

	if (res < 0)
		ERROR("%s decompression failed, data probably corrupt\n",
			msblk->decompressor->name);

	return res;
}

int squashfs_max_decompressors(void)
{
	return num_possible_cpus();
}