Skip to content

Commit 4853aba

Browse files
JeffMoyerJens Axboe
authored andcommitted
block: fix flush machinery for stacking drivers with differring flush flags
Commit ae1b153, block: reimplement FLUSH/FUA to support merge, introduced a performance regression when running any sort of fsyncing workload using dm-multipath and certain storage (in our case, an HP EVA). The test I ran was fs_mark, and it dropped from ~800 files/sec on ext4 to ~100 files/sec. It turns out that dm-multipath always advertised flush+fua support, and passed commands on down the stack, where those flags used to get stripped off. The above commit changed that behavior: static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; while (1) { - while (!list_empty(&q->queue_head)) { + if (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || - (rq->cmd_flags & REQ_FLUSH_SEQ)) - return rq; - rq = blk_do_flush(q, rq); - if (rq) - return rq; + return rq; } Note that previously, a command would come in here, have REQ_FLUSH|REQ_FUA set, and then get handed off to blk_do_flush: struct request *blk_do_flush(struct request_queue *q, struct request *rq) { unsigned int fflags = q->flush_flags; /* may change, cache it */ bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); unsigned skip = 0; ... if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { rq->cmd_flags &= ~REQ_FLUSH; if (!has_fua) rq->cmd_flags &= ~REQ_FUA; return rq; } So, the flush machinery was bypassed in such cases (q->flush_flags == 0 && rq->cmd_flags & (REQ_FLUSH|REQ_FUA)). Now, however, we don't get into the flush machinery at all. Instead, __elv_next_request just hands a request with flush and fua bits set to the scsi_request_fn, even if the underlying request_queue does not support flush or fua. The agreed upon approach is to fix the flush machinery to allow stacking. While this isn't used in practice (since there is only one request-based dm target, and that target will now reflect the flush flags of the underlying device), it does future-proof the solution, and make it function as designed. In order to make this work, I had to add a field to the struct request, inside the flush structure (to store the original req->end_io). Shaohua had suggested overloading the union with rb_node and completion_data, but the completion data is used by device mapper and can also be used by other drivers. So, I didn't see a way around the additional field. I tested this patch on an HP EVA with both ext4 and xfs, and it recovers the lost performance. Comments and other testers, as always, are appreciated. Cheers, Jeff Signed-off-by: Jeff Moyer <[email protected]> Acked-by: Tejun Heo <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent bcf30e7 commit 4853aba

File tree

4 files changed

+25
-6
lines changed

4 files changed

+25
-6
lines changed

block/blk-core.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,6 +1700,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
17001700
int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
17011701
{
17021702
unsigned long flags;
1703+
int where = ELEVATOR_INSERT_BACK;
17031704

17041705
if (blk_rq_check_limits(q, rq))
17051706
return -EIO;
@@ -1716,7 +1717,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
17161717
*/
17171718
BUG_ON(blk_queued_rq(rq));
17181719

1719-
add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
1720+
if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
1721+
where = ELEVATOR_INSERT_FLUSH;
1722+
1723+
add_acct_request(q, rq, where);
17201724
spin_unlock_irqrestore(q->queue_lock, flags);
17211725

17221726
return 0;
@@ -2273,7 +2277,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
22732277
* %false - we are done with this request
22742278
* %true - still buffers pending for this request
22752279
**/
2276-
static bool __blk_end_bidi_request(struct request *rq, int error,
2280+
bool __blk_end_bidi_request(struct request *rq, int error,
22772281
unsigned int nr_bytes, unsigned int bidi_bytes)
22782282
{
22792283
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

block/blk-flush.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
123123

124124
/* make @rq a normal request */
125125
rq->cmd_flags &= ~REQ_FLUSH_SEQ;
126-
rq->end_io = NULL;
126+
rq->end_io = rq->flush.saved_end_io;
127127
}
128128

129129
/**
@@ -301,9 +301,6 @@ void blk_insert_flush(struct request *rq)
301301
unsigned int fflags = q->flush_flags; /* may change, cache */
302302
unsigned int policy = blk_flush_policy(fflags, rq);
303303

304-
BUG_ON(rq->end_io);
305-
BUG_ON(!rq->bio || rq->bio != rq->biotail);
306-
307304
/*
308305
* @policy now records what operations need to be done. Adjust
309306
* REQ_FLUSH and FUA for the driver.
@@ -312,6 +309,19 @@ void blk_insert_flush(struct request *rq)
312309
if (!(fflags & REQ_FUA))
313310
rq->cmd_flags &= ~REQ_FUA;
314311

312+
/*
313+
* An empty flush handed down from a stacking driver may
314+
* translate into nothing if the underlying device does not
315+
* advertise a write-back cache. In this case, simply
316+
* complete the request.
317+
*/
318+
if (!policy) {
319+
__blk_end_bidi_request(rq, 0, 0, 0);
320+
return;
321+
}
322+
323+
BUG_ON(!rq->bio || rq->bio != rq->biotail);
324+
315325
/*
316326
* If there's data but flush is not necessary, the request can be
317327
* processed directly without going through flush machinery. Queue
@@ -320,6 +330,7 @@ void blk_insert_flush(struct request *rq)
320330
if ((policy & REQ_FSEQ_DATA) &&
321331
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
322332
list_add_tail(&rq->queuelist, &q->queue_head);
333+
blk_run_queue_async(q);
323334
return;
324335
}
325336

@@ -330,6 +341,7 @@ void blk_insert_flush(struct request *rq)
330341
memset(&rq->flush, 0, sizeof(rq->flush));
331342
INIT_LIST_HEAD(&rq->flush.list);
332343
rq->cmd_flags |= REQ_FLUSH_SEQ;
344+
rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
333345
rq->end_io = flush_data_end_io;
334346

335347
blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);

block/blk.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
1717
struct bio *bio);
1818
void blk_dequeue_request(struct request *rq);
1919
void __blk_queue_free_tags(struct request_queue *q);
20+
bool __blk_end_bidi_request(struct request *rq, int error,
21+
unsigned int nr_bytes, unsigned int bidi_bytes);
2022

2123
void blk_rq_timed_out_timer(unsigned long data);
2224
void blk_delete_timer(struct request *);

include/linux/blkdev.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ struct request {
118118
struct {
119119
unsigned int seq;
120120
struct list_head list;
121+
rq_end_io_fn *saved_end_io;
121122
} flush;
122123
};
123124

0 commit comments

Comments
 (0)