Commit 3bcebc5b authored by Hans Holmberg's avatar Hans Holmberg Committed by Jens Axboe
Browse files

lightnvm: pblk: set conservative threshold for user writes



In a worst-case scenario (random writes), OP% of sectors
in each line will be invalid, and we will then need
to move data out of 100/OP% lines to free a single line.

So, to prevent the possibility of running out of lines,
temporarily block user writes when there is less than
100/OP% free lines.

Also ensure that pblk creation does not produce instances
with insufficient over provisioning.

Insufficient over-provising is not a problem on real hardware,
but often an issue when running QEMU simulations (with few lines).
100 lines is enough to create a sane instance with the standard
(11%) over provisioning.

Signed-off-by: default avatarHans Holmberg <hans.holmberg@cnexlabs.com>
Reviewed-by: default avatarJavier González <javier@javigon.com>
Signed-off-by: default avatarMatias Bjørling <mb@lightnvm.io>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 525f7bb2
Loading
Loading
Loading
Loading
+31 −9
Original line number Original line Diff line number Diff line
@@ -635,7 +635,7 @@ static unsigned int calc_emeta_len(struct pblk *pblk)
	return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
	return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
}
}


static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
{
{
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_tgt_dev *dev = pblk->dev;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -643,23 +643,41 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
	struct nvm_geo *geo = &dev->geo;
	struct nvm_geo *geo = &dev->geo;
	sector_t provisioned;
	sector_t provisioned;
	int sec_meta, blk_meta;
	int sec_meta, blk_meta;
	int minimum;


	if (geo->op == NVM_TARGET_DEFAULT_OP)
	if (geo->op == NVM_TARGET_DEFAULT_OP)
		pblk->op = PBLK_DEFAULT_OP;
		pblk->op = PBLK_DEFAULT_OP;
	else
	else
		pblk->op = geo->op;
		pblk->op = geo->op;


	provisioned = nr_free_blks;
	minimum = pblk_get_min_chks(pblk);
	provisioned = nr_free_chks;
	provisioned *= (100 - pblk->op);
	provisioned *= (100 - pblk->op);
	sector_div(provisioned, 100);
	sector_div(provisioned, 100);


	pblk->op_blks = nr_free_blks - provisioned;
	if ((nr_free_chks - provisioned) < minimum) {
		if (geo->op != NVM_TARGET_DEFAULT_OP) {
			pblk_err(pblk, "OP too small to create a sane instance\n");
			return -EINTR;
		}

		/* If the user did not specify an OP value, and PBLK_DEFAULT_OP
		 * is not enough, calculate and set sane value
		 */

		provisioned = nr_free_chks - minimum;
		pblk->op =  (100 * minimum) / nr_free_chks;
		pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n",
				pblk->op);
	}

	pblk->op_blks = nr_free_chks - provisioned;


	/* Internally pblk manages all free blocks, but all calculations based
	/* Internally pblk manages all free blocks, but all calculations based
	 * on user capacity consider only provisioned blocks
	 * on user capacity consider only provisioned blocks
	 */
	 */
	pblk->rl.total_blocks = nr_free_blks;
	pblk->rl.total_blocks = nr_free_chks;
	pblk->rl.nr_secs = nr_free_blks * geo->clba;
	pblk->rl.nr_secs = nr_free_chks * geo->clba;


	/* Consider sectors used for metadata */
	/* Consider sectors used for metadata */
	sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
	sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
@@ -667,8 +685,10 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)


	pblk->capacity = (provisioned - blk_meta) * geo->clba;
	pblk->capacity = (provisioned - blk_meta) * geo->clba;


	atomic_set(&pblk->rl.free_blocks, nr_free_blks);
	atomic_set(&pblk->rl.free_blocks, nr_free_chks);
	atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
	atomic_set(&pblk->rl.free_user_blocks, nr_free_chks);

	return 0;
}
}


static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line,
static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line,
@@ -984,7 +1004,7 @@ static int pblk_lines_init(struct pblk *pblk)
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line *line;
	struct pblk_line *line;
	void *chunk_meta;
	void *chunk_meta;
	long nr_free_chks = 0;
	int nr_free_chks = 0;
	int i, ret;
	int i, ret;


	ret = pblk_line_meta_init(pblk);
	ret = pblk_line_meta_init(pblk);
@@ -1031,7 +1051,9 @@ static int pblk_lines_init(struct pblk *pblk)
		goto fail_free_lines;
		goto fail_free_lines;
	}
	}


	pblk_set_provision(pblk, nr_free_chks);
	ret = pblk_set_provision(pblk, nr_free_chks);
	if (ret)
		goto fail_free_lines;


	vfree(chunk_meta);
	vfree(chunk_meta);
	return 0;
	return 0;
+2 −3
Original line number Original line Diff line number Diff line
@@ -214,11 +214,10 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
	struct nvm_geo *geo = &dev->geo;
	struct nvm_geo *geo = &dev->geo;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_meta *lm = &pblk->lm;
	struct pblk_line_meta *lm = &pblk->lm;
	int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
	int sec_meta, blk_meta;
	int sec_meta, blk_meta;

	unsigned int rb_windows;
	unsigned int rb_windows;



	/* Consider sectors used for metadata */
	/* Consider sectors used for metadata */
	sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
	sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
	blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
	blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
@@ -226,7 +225,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
	rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
	rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
	rl->high_pw = get_count_order(rl->high);
	rl->high_pw = get_count_order(rl->high);


	rl->rsv_blocks = min_blocks;
	rl->rsv_blocks = pblk_get_min_chks(pblk);


	/* This will always be a power-of-2 */
	/* This will always be a power-of-2 */
	rb_windows = budget / NVM_MAX_VLBA;
	rb_windows = budget / NVM_MAX_VLBA;
+11 −1
Original line number Original line Diff line number Diff line
@@ -905,7 +905,6 @@ int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta);
#define PBLK_GC_MAX_READERS 8	/* Max number of outstanding GC reader jobs */
#define PBLK_GC_MAX_READERS 8	/* Max number of outstanding GC reader jobs */
#define PBLK_GC_RQ_QD 128	/* Queue depth for inflight GC requests */
#define PBLK_GC_RQ_QD 128	/* Queue depth for inflight GC requests */
#define PBLK_GC_L_QD 4		/* Queue depth for inflight GC lines */
#define PBLK_GC_L_QD 4		/* Queue depth for inflight GC lines */
#define PBLK_GC_RSV_LINE 1	/* Reserved lines for GC */


int pblk_gc_init(struct pblk *pblk);
int pblk_gc_init(struct pblk *pblk);
void pblk_gc_exit(struct pblk *pblk, bool graceful);
void pblk_gc_exit(struct pblk *pblk, bool graceful);
@@ -1370,4 +1369,15 @@ static inline char *pblk_disk_name(struct pblk *pblk)


	return disk->disk_name;
	return disk->disk_name;
}
}

static inline unsigned int pblk_get_min_chks(struct pblk *pblk)
{
	struct pblk_line_meta *lm = &pblk->lm;
	/* In a worst-case scenario every line will have OP invalid sectors.
	 * We will then need a minimum of 1/OP lines to free up a single line
	 */

	return DIV_ROUND_UP(100, pblk->op) * lm->blk_per_line;

}
#endif /* PBLK_H_ */
#endif /* PBLK_H_ */