Commit a42dde04 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Linus Torvalds
Browse files

mm: bdi: allow setting a maximum for the bdi dirty limit



Add "max_ratio" to /sys/class/bdi.  This indicates the maximum percentage of
the global dirty threshold allocated to this bdi.

[mszeredi@suse.cz]

 - fix parsing in max_ratio_store().
 - export bdi_set_max_ratio() to modules
 - limit bdi_dirty with bdi->max_ratio
 - document new sysfs attribute

Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@suse.cz>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 189d3c4a
Loading
Loading
Loading
Loading
+8 −1
Original line number Original line Diff line number Diff line
@@ -49,4 +49,11 @@ min_ratio (read-write)
	Minimal percentage of global dirty threshold allocated to this
	Minimal percentage of global dirty threshold allocated to this
	bdi.  If the value written to this file would make the the sum
	bdi.  If the value written to this file would make the the sum
	of all min_ratio values exceed 100, then EINVAL is returned.
	of all min_ratio values exceed 100, then EINVAL is returned.
	The default is zero
	If min_ratio would become larger than the current max_ratio,
	then also EINVAL is returned.  The default is zero

max_ratio (read-write)

	Maximal percentage of global dirty threshold allocated to this
	bdi.  If max_ratio would become smaller than the current
	min_ratio, then EINVAL is returned.  The default is 100
+2 −0
Original line number Original line Diff line number Diff line
@@ -52,6 +52,7 @@ struct backing_dev_info {
	int dirty_exceeded;
	int dirty_exceeded;


	unsigned int min_ratio;
	unsigned int min_ratio;
	unsigned int max_ratio, max_prop_frac;


	struct device *dev;
	struct device *dev;
};
};
@@ -140,6 +141,7 @@ static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi)
}
}


int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);


/*
/*
 * Flags in backing_dev_info::capability
 * Flags in backing_dev_info::capability
+13 −0
Original line number Original line Diff line number Diff line
@@ -77,6 +77,19 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
	local_irq_restore(flags);
	local_irq_restore(flags);
}
}


/*
 * Limit the time part in order to ensure there are some bits left for the
 * cycle counter and fraction multiply.
 */
#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)

#define PROP_FRAC_SHIFT		(BITS_PER_LONG - PROP_MAX_SHIFT - 1)
#define PROP_FRAC_BASE		(1UL << PROP_FRAC_SHIFT)

void __prop_inc_percpu_max(struct prop_descriptor *pd,
			   struct prop_local_percpu *pl, long frac);


/*
/*
 * ----- SINGLE ------
 * ----- SINGLE ------
 */
 */
+32 −6
Original line number Original line Diff line number Diff line
@@ -73,12 +73,6 @@
#include <linux/proportions.h>
#include <linux/proportions.h>
#include <linux/rcupdate.h>
#include <linux/rcupdate.h>


/*
 * Limit the time part in order to ensure there are some bits left for the
 * cycle counter.
 */
#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)

int prop_descriptor_init(struct prop_descriptor *pd, int shift)
int prop_descriptor_init(struct prop_descriptor *pd, int shift)
{
{
	int err;
	int err;
@@ -267,6 +261,38 @@ void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
	prop_put_global(pd, pg);
	prop_put_global(pd, pg);
}
}


/*
 * identical to __prop_inc_percpu, except that it limits this pl's fraction to
 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
 */
void __prop_inc_percpu_max(struct prop_descriptor *pd,
			   struct prop_local_percpu *pl, long frac)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_percpu(pg, pl);

	if (unlikely(frac != PROP_FRAC_BASE)) {
		unsigned long period_2 = 1UL << (pg->shift - 1);
		unsigned long counter_mask = period_2 - 1;
		unsigned long global_count;
		long numerator, denominator;

		numerator = percpu_counter_read_positive(&pl->events);
		global_count = percpu_counter_read(&pg->events);
		denominator = period_2 + (global_count & counter_mask);

		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
			goto out_put;
	}

	percpu_counter_add(&pl->events, 1);
	percpu_counter_add(&pg->events, 1);

out_put:
	prop_put_global(pd, pg);
}

/*
/*
 * Obtain a fraction of this proportion
 * Obtain a fraction of this proportion
 *
 *
+21 −0
Original line number Original line Diff line number Diff line
@@ -73,6 +73,24 @@ static ssize_t min_ratio_store(struct device *dev,
}
}
BDI_SHOW(min_ratio, bdi->min_ratio)
BDI_SHOW(min_ratio, bdi->min_ratio)


static ssize_t max_ratio_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	struct backing_dev_info *bdi = dev_get_drvdata(dev);
	char *end;
	unsigned int ratio;
	ssize_t ret = -EINVAL;

	ratio = simple_strtoul(buf, &end, 10);
	if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) {
		ret = bdi_set_max_ratio(bdi, ratio);
		if (!ret)
			ret = count;
	}
	return ret;
}
BDI_SHOW(max_ratio, bdi->max_ratio)

#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)


static struct device_attribute bdi_dev_attrs[] = {
static struct device_attribute bdi_dev_attrs[] = {
@@ -82,6 +100,7 @@ static struct device_attribute bdi_dev_attrs[] = {
	__ATTR_RO(dirty_kb),
	__ATTR_RO(dirty_kb),
	__ATTR_RO(bdi_dirty_kb),
	__ATTR_RO(bdi_dirty_kb),
	__ATTR_RW(min_ratio),
	__ATTR_RW(min_ratio),
	__ATTR_RW(max_ratio),
	__ATTR_NULL,
	__ATTR_NULL,
};
};


@@ -147,6 +166,8 @@ int bdi_init(struct backing_dev_info *bdi)
	bdi->dev = NULL;
	bdi->dev = NULL;


	bdi->min_ratio = 0;
	bdi->min_ratio = 0;
	bdi->max_ratio = 100;
	bdi->max_prop_frac = PROP_FRAC_BASE;


	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
		err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0);
		err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0);
Loading