mirror of
https://github.com/lkl/linux.git
synced 2025-12-19 16:13:19 +09:00
Merge tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- drbd patches, bringing us closer to unifying the out-of-tree version
and the in tree one (Andreas, Christoph)
- support for auto-quiesce for the s390 dasd driver (Stefan)
- MD pull request via Song:
- md/bitmap: Optimal last page size (Jon Derrick)
- Various raid10 fixes (Yu Kuai, Li Nan)
- md: add error_handlers for raid0 and linear (Mariusz Tkaczyk)
- NVMe pull request via Christoph:
- Drop redundant pci_enable_pcie_error_reporting (Bjorn Helgaas)
- Validate nvmet module parameters (Chaitanya Kulkarni)
- Fence TCP socket on receive error (Chris Leech)
- Fix async event trace event (Keith Busch)
- Minor cleanups (Chaitanya Kulkarni, zhenwei pi)
- Fix and cleanup nvmet Identify handling (Damien Le Moal,
Christoph Hellwig)
- Fix double blk_mq_complete_request race in the timeout handler
(Lei Yin)
- Fix irq locking in nvme-fcloop (Ming Lei)
- Remove queue mapping helper for rdma devices (Sagi Grimberg)
- use structured request attribute checks for nbd (Jakub)
- fix blk-crypto race conditions between keyslot management (Eric)
- add sed-opal support for reading read locking range attributes
(Ondrej)
- make fault injection configurable for null_blk (Akinobu)
- clean up the request insertion API (Christoph)
- clean up the queue running API (Christoph)
- blkg config helper cleanups (Tejun)
- lazy init support for blk-iolatency (Tejun)
- various fixes and tweaks to ublk (Ming)
- remove hybrid polling. It hasn't really been useful since we got
async polled IO support, and these days we don't support sync polled
IO at all (Keith)
- misc fixes, cleanups, improvements (Zhong, Ondrej, Colin, Chengming,
Chaitanya, me)
* tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux: (118 commits)
nbd: fix incomplete validation of ioctl arg
ublk: don't return 0 in case of any failure
sed-opal: geometry feature reporting command
null_blk: Always check queue mode setting from configfs
block: ublk: switch to ioctl command encoding
blk-mq: fix the blk_mq_add_to_requeue_list call in blk_kick_flush
block, bfq: Fix division by zero error on zero wsum
fault-inject: fix build error when FAULT_INJECTION_CONFIGFS=y and CONFIGFS_FS=m
block: store bdev->bd_disk->fops->submit_bio state in bdev
block: re-arrange the struct block_device fields for better layout
md/raid5: remove unused working_disks variable
md/raid10: don't call bio_start_io_acct twice for bio which experienced read error
md/raid10: fix memleak of md thread
md/raid10: fix memleak for 'conf->bio_split'
md/raid10: fix leak of 'r10bio->remaining' for recovery
md/raid10: don't BUG_ON() in raise_barrier()
md: fix soft lockup in status_resync
md: add error_handlers for raid0 and linear
md: Use optimal I/O size for last bitmap page
md: Fix types in sb writer
...
This commit is contained in:
@@ -215,11 +215,6 @@ config BLK_MQ_VIRTIO
|
||||
depends on VIRTIO
|
||||
default y
|
||||
|
||||
config BLK_MQ_RDMA
|
||||
bool
|
||||
depends on INFINIBAND
|
||||
default y
|
||||
|
||||
config BLK_PM
|
||||
def_bool PM
|
||||
|
||||
|
||||
@@ -30,7 +30,6 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
|
||||
obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o
|
||||
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
|
||||
obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
|
||||
obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o
|
||||
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
|
||||
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
|
||||
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
|
||||
|
||||
@@ -419,6 +419,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
|
||||
bdev->bd_inode = inode;
|
||||
bdev->bd_queue = disk->queue;
|
||||
bdev->bd_stats = alloc_percpu(struct disk_stats);
|
||||
bdev->bd_has_submit_bio = false;
|
||||
if (!bdev->bd_stats) {
|
||||
iput(inode);
|
||||
return NULL;
|
||||
|
||||
@@ -497,17 +497,11 @@ static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
|
||||
bgd = kzalloc(sizeof(*bgd), gfp);
|
||||
if (!bgd)
|
||||
return NULL;
|
||||
|
||||
bgd->weight = CGROUP_WEIGHT_DFL;
|
||||
return &bgd->pd;
|
||||
}
|
||||
|
||||
static void bfq_cpd_init(struct blkcg_policy_data *cpd)
|
||||
{
|
||||
struct bfq_group_data *d = cpd_to_bfqgd(cpd);
|
||||
|
||||
d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
|
||||
CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
|
||||
}
|
||||
|
||||
static void bfq_cpd_free(struct blkcg_policy_data *cpd)
|
||||
{
|
||||
kfree(cpd_to_bfqgd(cpd));
|
||||
@@ -1111,9 +1105,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
|
||||
struct bfq_group *bfqg;
|
||||
u64 v;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
if (sscanf(ctx.body, "%llu", &v) == 1) {
|
||||
/* require "default" on dfl */
|
||||
@@ -1135,7 +1131,7 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
|
||||
ret = 0;
|
||||
}
|
||||
out:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
@@ -1301,8 +1297,6 @@ struct blkcg_policy blkcg_policy_bfq = {
|
||||
.legacy_cftypes = bfq_blkcg_legacy_files,
|
||||
|
||||
.cpd_alloc_fn = bfq_cpd_alloc,
|
||||
.cpd_init_fn = bfq_cpd_init,
|
||||
.cpd_bind_fn = bfq_cpd_init,
|
||||
.cpd_free_fn = bfq_cpd_free,
|
||||
|
||||
.pd_alloc_fn = bfq_pd_alloc,
|
||||
|
||||
@@ -129,7 +129,6 @@
|
||||
#include "elevator.h"
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "bfq-iosched.h"
|
||||
#include "blk-wbt.h"
|
||||
@@ -649,6 +648,8 @@ retry:
|
||||
sched_data->service_tree[i].wsum;
|
||||
}
|
||||
}
|
||||
if (!wsum)
|
||||
continue;
|
||||
limit = DIV_ROUND_CLOSEST(limit * entity->weight, wsum);
|
||||
if (entity->allocated >= limit) {
|
||||
bfq_log_bfqq(bfqq->bfqd, bfqq,
|
||||
@@ -6232,7 +6233,7 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
|
||||
static struct bfq_queue *bfq_init_rq(struct request *rq);
|
||||
|
||||
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head)
|
||||
blk_insert_t flags)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct bfq_data *bfqd = q->elevator->elevator_data;
|
||||
@@ -6255,11 +6256,10 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
if (!bfqq || at_head) {
|
||||
if (at_head)
|
||||
list_add(&rq->queuelist, &bfqd->dispatch);
|
||||
else
|
||||
list_add_tail(&rq->queuelist, &bfqd->dispatch);
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD) {
|
||||
list_add(&rq->queuelist, &bfqd->dispatch);
|
||||
} else if (!bfqq) {
|
||||
list_add_tail(&rq->queuelist, &bfqd->dispatch);
|
||||
} else {
|
||||
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
|
||||
/*
|
||||
@@ -6289,14 +6289,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
}
|
||||
|
||||
static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list, bool at_head)
|
||||
struct list_head *list,
|
||||
blk_insert_t flags)
|
||||
{
|
||||
while (!list_empty(list)) {
|
||||
struct request *rq;
|
||||
|
||||
rq = list_first_entry(list, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
bfq_insert_request(hctx, rq, at_head);
|
||||
bfq_insert_request(hctx, rq, flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
|
||||
#define BFQ_DEFAULT_QUEUE_IOPRIO 4
|
||||
|
||||
#define BFQ_WEIGHT_LEGACY_DFL 100
|
||||
#define BFQ_DEFAULT_GRP_IOPRIO 0
|
||||
#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
|
||||
|
||||
|
||||
@@ -33,7 +33,6 @@
|
||||
#include "blk-cgroup.h"
|
||||
#include "blk-ioprio.h"
|
||||
#include "blk-throttle.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
/*
|
||||
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
|
||||
@@ -693,69 +692,93 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
|
||||
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
|
||||
|
||||
/**
|
||||
* blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
|
||||
* @inputp: input string pointer
|
||||
* blkg_conf_init - initialize a blkg_conf_ctx
|
||||
* @ctx: blkg_conf_ctx to initialize
|
||||
* @input: input string
|
||||
*
|
||||
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update
|
||||
* from @input and get and return the matching bdev. *@inputp is
|
||||
* updated to point past the device node prefix. Returns an ERR_PTR()
|
||||
* value on error.
|
||||
*
|
||||
* Use this function iff blkg_conf_prep() can't be used for some reason.
|
||||
* Initialize @ctx which can be used to parse blkg config input string @input.
|
||||
* Once initialized, @ctx can be used with blkg_conf_open_bdev() and
|
||||
* blkg_conf_prep(), and must be cleaned up with blkg_conf_exit().
|
||||
*/
|
||||
struct block_device *blkcg_conf_open_bdev(char **inputp)
|
||||
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input)
|
||||
{
|
||||
char *input = *inputp;
|
||||
*ctx = (struct blkg_conf_ctx){ .input = input };
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_init);
|
||||
|
||||
/**
|
||||
* blkg_conf_open_bdev - parse and open bdev for per-blkg config update
|
||||
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
|
||||
*
|
||||
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update from
|
||||
* @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is
|
||||
* set to point past the device node prefix.
|
||||
*
|
||||
* This function may be called multiple times on @ctx and the extra calls become
|
||||
* NOOPs. blkg_conf_prep() implicitly calls this function. Use this function
|
||||
* explicitly if bdev access is needed without resolving the blkcg / policy part
|
||||
* of @ctx->input. Returns -errno on error.
|
||||
*/
|
||||
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
|
||||
{
|
||||
char *input = ctx->input;
|
||||
unsigned int major, minor;
|
||||
struct block_device *bdev;
|
||||
int key_len;
|
||||
|
||||
if (ctx->bdev)
|
||||
return 0;
|
||||
|
||||
if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
|
||||
return ERR_PTR(-EINVAL);
|
||||
return -EINVAL;
|
||||
|
||||
input += key_len;
|
||||
if (!isspace(*input))
|
||||
return ERR_PTR(-EINVAL);
|
||||
return -EINVAL;
|
||||
input = skip_spaces(input);
|
||||
|
||||
bdev = blkdev_get_no_open(MKDEV(major, minor));
|
||||
if (!bdev)
|
||||
return ERR_PTR(-ENODEV);
|
||||
return -ENODEV;
|
||||
if (bdev_is_partition(bdev)) {
|
||||
blkdev_put_no_open(bdev);
|
||||
return ERR_PTR(-ENODEV);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
*inputp = input;
|
||||
return bdev;
|
||||
ctx->body = input;
|
||||
ctx->bdev = bdev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_conf_prep - parse and prepare for per-blkg config update
|
||||
* @blkcg: target block cgroup
|
||||
* @pol: target policy
|
||||
* @input: input string
|
||||
* @ctx: blkg_conf_ctx to be filled
|
||||
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
|
||||
*
|
||||
* Parse per-blkg config update from @input and initialize @ctx with the
|
||||
* result. @ctx->blkg points to the blkg to be updated and @ctx->body the
|
||||
* part of @input following MAJ:MIN. This function returns with RCU read
|
||||
* lock and queue lock held and must be paired with blkg_conf_finish().
|
||||
* Parse per-blkg config update from @ctx->input and initialize @ctx
|
||||
* accordingly. On success, @ctx->body points to the part of @ctx->input
|
||||
* following MAJ:MIN, @ctx->bdev points to the target block device and
|
||||
* @ctx->blkg to the blkg being configured.
|
||||
*
|
||||
* blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this
|
||||
* function returns with queue lock held and must be followed by
|
||||
* blkg_conf_exit().
|
||||
*/
|
||||
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
char *input, struct blkg_conf_ctx *ctx)
|
||||
__acquires(rcu) __acquires(&bdev->bd_queue->queue_lock)
|
||||
struct blkg_conf_ctx *ctx)
|
||||
__acquires(&bdev->bd_queue->queue_lock)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct gendisk *disk;
|
||||
struct request_queue *q;
|
||||
struct blkcg_gq *blkg;
|
||||
int ret;
|
||||
|
||||
bdev = blkcg_conf_open_bdev(&input);
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
disk = bdev->bd_disk;
|
||||
ret = blkg_conf_open_bdev(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
disk = ctx->bdev->bd_disk;
|
||||
q = disk->queue;
|
||||
|
||||
/*
|
||||
@@ -766,7 +789,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
rcu_read_lock();
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
|
||||
if (!blkcg_policy_enabled(q, pol)) {
|
||||
@@ -795,7 +817,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
|
||||
/* Drop locks to do new blkg allocation with GFP_KERNEL. */
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
new_blkg = blkg_alloc(pos, disk, GFP_KERNEL);
|
||||
if (unlikely(!new_blkg)) {
|
||||
@@ -809,7 +830,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
goto fail_exit_queue;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
|
||||
if (!blkcg_policy_enabled(q, pol)) {
|
||||
@@ -836,20 +856,16 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
}
|
||||
success:
|
||||
blk_queue_exit(q);
|
||||
ctx->bdev = bdev;
|
||||
ctx->blkg = blkg;
|
||||
ctx->body = input;
|
||||
return 0;
|
||||
|
||||
fail_preloaded:
|
||||
radix_tree_preload_end();
|
||||
fail_unlock:
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
rcu_read_unlock();
|
||||
fail_exit_queue:
|
||||
blk_queue_exit(q);
|
||||
fail:
|
||||
blkdev_put_no_open(bdev);
|
||||
/*
|
||||
* If queue was bypassing, we should retry. Do so after a
|
||||
* short msleep(). It isn't strictly necessary but queue
|
||||
@@ -865,20 +881,27 @@ fail:
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_prep);
|
||||
|
||||
/**
|
||||
* blkg_conf_finish - finish up per-blkg config update
|
||||
* @ctx: blkg_conf_ctx initialized by blkg_conf_prep()
|
||||
* blkg_conf_exit - clean up per-blkg config update
|
||||
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
|
||||
*
|
||||
* Finish up after per-blkg config update. This function must be paired
|
||||
* with blkg_conf_prep().
|
||||
* Clean up after per-blkg config update. This function must be called on all
|
||||
* blkg_conf_ctx's initialized with blkg_conf_init().
|
||||
*/
|
||||
void blkg_conf_finish(struct blkg_conf_ctx *ctx)
|
||||
__releases(&ctx->bdev->bd_queue->queue_lock) __releases(rcu)
|
||||
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
|
||||
__releases(&ctx->bdev->bd_queue->queue_lock)
|
||||
{
|
||||
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
|
||||
rcu_read_unlock();
|
||||
blkdev_put_no_open(ctx->bdev);
|
||||
if (ctx->blkg) {
|
||||
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
|
||||
ctx->blkg = NULL;
|
||||
}
|
||||
|
||||
if (ctx->bdev) {
|
||||
blkdev_put_no_open(ctx->bdev);
|
||||
ctx->body = NULL;
|
||||
ctx->bdev = NULL;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_finish);
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_exit);
|
||||
|
||||
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
@@ -1289,8 +1312,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
blkcg->cpd[i] = cpd;
|
||||
cpd->blkcg = blkcg;
|
||||
cpd->plid = i;
|
||||
if (pol->cpd_init_fn)
|
||||
pol->cpd_init_fn(cpd);
|
||||
}
|
||||
|
||||
spin_lock_init(&blkcg->lock);
|
||||
@@ -1368,14 +1389,8 @@ int blkcg_init_disk(struct gendisk *disk)
|
||||
if (ret)
|
||||
goto err_ioprio_exit;
|
||||
|
||||
ret = blk_iolatency_init(disk);
|
||||
if (ret)
|
||||
goto err_throtl_exit;
|
||||
|
||||
return 0;
|
||||
|
||||
err_throtl_exit:
|
||||
blk_throtl_exit(disk);
|
||||
err_ioprio_exit:
|
||||
blk_ioprio_exit(disk);
|
||||
err_destroy_all:
|
||||
@@ -1391,30 +1406,9 @@ err_unlock:
|
||||
void blkcg_exit_disk(struct gendisk *disk)
|
||||
{
|
||||
blkg_destroy_all(disk);
|
||||
rq_qos_exit(disk->queue);
|
||||
blk_throtl_exit(disk);
|
||||
}
|
||||
|
||||
static void blkcg_bind(struct cgroup_subsys_state *root_css)
|
||||
{
|
||||
int i;
|
||||
|
||||
mutex_lock(&blkcg_pol_mutex);
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
struct blkcg *blkcg;
|
||||
|
||||
if (!pol || !pol->cpd_bind_fn)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node)
|
||||
if (blkcg->cpd[pol->plid])
|
||||
pol->cpd_bind_fn(blkcg->cpd[pol->plid]);
|
||||
}
|
||||
mutex_unlock(&blkcg_pol_mutex);
|
||||
}
|
||||
|
||||
static void blkcg_exit(struct task_struct *tsk)
|
||||
{
|
||||
if (tsk->throttle_disk)
|
||||
@@ -1428,7 +1422,6 @@ struct cgroup_subsys io_cgrp_subsys = {
|
||||
.css_offline = blkcg_css_offline,
|
||||
.css_free = blkcg_css_free,
|
||||
.css_rstat_flush = blkcg_rstat_flush,
|
||||
.bind = blkcg_bind,
|
||||
.dfl_cftypes = blkcg_files,
|
||||
.legacy_cftypes = blkcg_legacy_files,
|
||||
.legacy_name = "blkio",
|
||||
@@ -1666,8 +1659,6 @@ int blkcg_policy_register(struct blkcg_policy *pol)
|
||||
blkcg->cpd[pol->plid] = cpd;
|
||||
cpd->blkcg = blkcg;
|
||||
cpd->plid = pol->plid;
|
||||
if (pol->cpd_init_fn)
|
||||
pol->cpd_init_fn(cpd);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -174,9 +174,7 @@ struct blkcg_policy {
|
||||
|
||||
/* operations */
|
||||
blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
|
||||
blkcg_pol_init_cpd_fn *cpd_init_fn;
|
||||
blkcg_pol_free_cpd_fn *cpd_free_fn;
|
||||
blkcg_pol_bind_cpd_fn *cpd_bind_fn;
|
||||
|
||||
blkcg_pol_alloc_pd_fn *pd_alloc_fn;
|
||||
blkcg_pol_init_pd_fn *pd_init_fn;
|
||||
@@ -209,15 +207,17 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
|
||||
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
|
||||
|
||||
struct blkg_conf_ctx {
|
||||
char *input;
|
||||
char *body;
|
||||
struct block_device *bdev;
|
||||
struct blkcg_gq *blkg;
|
||||
char *body;
|
||||
};
|
||||
|
||||
struct block_device *blkcg_conf_open_bdev(char **inputp);
|
||||
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
|
||||
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
|
||||
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
char *input, struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
|
||||
struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_exit(struct blkg_conf_ctx *ctx);
|
||||
|
||||
/**
|
||||
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
|
||||
|
||||
@@ -263,13 +263,7 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
|
||||
|
||||
static void blk_free_queue(struct request_queue *q)
|
||||
{
|
||||
if (q->poll_stat)
|
||||
blk_stat_remove_callback(q, q->poll_cb);
|
||||
blk_stat_free_callback(q->poll_cb);
|
||||
|
||||
blk_free_queue_stats(q->stats);
|
||||
kfree(q->poll_stat);
|
||||
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_release(q);
|
||||
|
||||
@@ -593,14 +587,14 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
|
||||
|
||||
static void __submit_bio(struct bio *bio)
|
||||
{
|
||||
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||
|
||||
if (unlikely(!blk_crypto_bio_prep(&bio)))
|
||||
return;
|
||||
|
||||
if (!disk->fops->submit_bio) {
|
||||
if (!bio->bi_bdev->bd_has_submit_bio) {
|
||||
blk_mq_submit_bio(bio);
|
||||
} else if (likely(bio_queue_enter(bio) == 0)) {
|
||||
struct gendisk *disk = bio->bi_bdev->bd_disk;
|
||||
|
||||
disk->fops->submit_bio(bio);
|
||||
blk_queue_exit(disk->queue);
|
||||
}
|
||||
@@ -704,7 +698,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
|
||||
*/
|
||||
if (current->bio_list)
|
||||
bio_list_add(¤t->bio_list[0], bio);
|
||||
else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
|
||||
else if (!bio->bi_bdev->bd_has_submit_bio)
|
||||
__submit_bio_noacct_mq(bio);
|
||||
else
|
||||
__submit_bio_noacct(bio);
|
||||
|
||||
@@ -65,6 +65,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
|
||||
return rq->crypt_ctx;
|
||||
}
|
||||
|
||||
static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
|
||||
{
|
||||
return rq->crypt_keyslot;
|
||||
}
|
||||
|
||||
blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
|
||||
const struct blk_crypto_key *key,
|
||||
struct blk_crypto_keyslot **slot_ptr);
|
||||
@@ -119,6 +124,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BLK_INLINE_ENCRYPTION */
|
||||
|
||||
void __bio_crypt_advance(struct bio *bio, unsigned int bytes);
|
||||
@@ -153,14 +163,21 @@ static inline bool blk_crypto_bio_prep(struct bio **bio_ptr)
|
||||
return true;
|
||||
}
|
||||
|
||||
blk_status_t __blk_crypto_init_request(struct request *rq);
|
||||
static inline blk_status_t blk_crypto_init_request(struct request *rq)
|
||||
blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq);
|
||||
static inline blk_status_t blk_crypto_rq_get_keyslot(struct request *rq)
|
||||
{
|
||||
if (blk_crypto_rq_is_encrypted(rq))
|
||||
return __blk_crypto_init_request(rq);
|
||||
return __blk_crypto_rq_get_keyslot(rq);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
void __blk_crypto_rq_put_keyslot(struct request *rq);
|
||||
static inline void blk_crypto_rq_put_keyslot(struct request *rq)
|
||||
{
|
||||
if (blk_crypto_rq_has_keyslot(rq))
|
||||
__blk_crypto_rq_put_keyslot(rq);
|
||||
}
|
||||
|
||||
void __blk_crypto_free_request(struct request *rq);
|
||||
static inline void blk_crypto_free_request(struct request *rq)
|
||||
{
|
||||
@@ -188,21 +205,6 @@ static inline int blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_crypto_insert_cloned_request - Prepare a cloned request to be inserted
|
||||
* into a request queue.
|
||||
* @rq: the request being queued
|
||||
*
|
||||
* Return: BLK_STS_OK on success, nonzero on error.
|
||||
*/
|
||||
static inline blk_status_t blk_crypto_insert_cloned_request(struct request *rq)
|
||||
{
|
||||
|
||||
if (blk_crypto_rq_is_encrypted(rq))
|
||||
return blk_crypto_init_request(rq);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK
|
||||
|
||||
int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num);
|
||||
|
||||
@@ -227,14 +227,13 @@ EXPORT_SYMBOL_GPL(blk_crypto_keyslot_index);
|
||||
* @profile: the crypto profile of the device the key will be used on
|
||||
* @key: the key that will be used
|
||||
* @slot_ptr: If a keyslot is allocated, an opaque pointer to the keyslot struct
|
||||
* will be stored here; otherwise NULL will be stored here.
|
||||
* will be stored here. blk_crypto_put_keyslot() must be called
|
||||
* later to release it. Otherwise, NULL will be stored here.
|
||||
*
|
||||
* If the device has keyslots, this gets a keyslot that's been programmed with
|
||||
* the specified key. If the key is already in a slot, this reuses it;
|
||||
* otherwise this waits for a slot to become idle and programs the key into it.
|
||||
*
|
||||
* This must be paired with a call to blk_crypto_put_keyslot().
|
||||
*
|
||||
* Context: Process context. Takes and releases profile->lock.
|
||||
* Return: BLK_STS_OK on success, meaning that either a keyslot was allocated or
|
||||
* one wasn't needed; or a blk_status_t error on failure.
|
||||
@@ -312,20 +311,15 @@ success:
|
||||
|
||||
/**
|
||||
* blk_crypto_put_keyslot() - Release a reference to a keyslot
|
||||
* @slot: The keyslot to release the reference of (may be NULL).
|
||||
* @slot: The keyslot to release the reference of
|
||||
*
|
||||
* Context: Any context.
|
||||
*/
|
||||
void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot)
|
||||
{
|
||||
struct blk_crypto_profile *profile;
|
||||
struct blk_crypto_profile *profile = slot->profile;
|
||||
unsigned long flags;
|
||||
|
||||
if (!slot)
|
||||
return;
|
||||
|
||||
profile = slot->profile;
|
||||
|
||||
if (atomic_dec_and_lock_irqsave(&slot->slot_refs,
|
||||
&profile->idle_slots_lock, flags)) {
|
||||
list_add_tail(&slot->idle_slot_node, &profile->idle_slots);
|
||||
@@ -354,28 +348,16 @@ bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* __blk_crypto_evict_key() - Evict a key from a device.
|
||||
* @profile: the crypto profile of the device
|
||||
* @key: the key to evict. It must not still be used in any I/O.
|
||||
*
|
||||
* If the device has keyslots, this finds the keyslot (if any) that contains the
|
||||
* specified key and calls the driver's keyslot_evict function to evict it.
|
||||
*
|
||||
* Otherwise, this just calls the driver's keyslot_evict function if it is
|
||||
* implemented, passing just the key (without any particular keyslot). This
|
||||
* allows layered devices to evict the key from their underlying devices.
|
||||
*
|
||||
* Context: Process context. Takes and releases profile->lock.
|
||||
* Return: 0 on success or if there's no keyslot with the specified key, -EBUSY
|
||||
* if the keyslot is still in use, or another -errno value on other
|
||||
* error.
|
||||
/*
|
||||
* This is an internal function that evicts a key from an inline encryption
|
||||
* device that can be either a real device or the blk-crypto-fallback "device".
|
||||
* It is used only by blk_crypto_evict_key(); see that function for details.
|
||||
*/
|
||||
int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
|
||||
const struct blk_crypto_key *key)
|
||||
{
|
||||
struct blk_crypto_keyslot *slot;
|
||||
int err = 0;
|
||||
int err;
|
||||
|
||||
if (profile->num_slots == 0) {
|
||||
if (profile->ll_ops.keyslot_evict) {
|
||||
@@ -389,22 +371,30 @@ int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
|
||||
|
||||
blk_crypto_hw_enter(profile);
|
||||
slot = blk_crypto_find_keyslot(profile, key);
|
||||
if (!slot)
|
||||
goto out_unlock;
|
||||
if (!slot) {
|
||||
/*
|
||||
* Not an error, since a key not in use by I/O is not guaranteed
|
||||
* to be in a keyslot. There can be more keys than keyslots.
|
||||
*/
|
||||
err = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) {
|
||||
/* BUG: key is still in use by I/O */
|
||||
err = -EBUSY;
|
||||
goto out_unlock;
|
||||
goto out_remove;
|
||||
}
|
||||
err = profile->ll_ops.keyslot_evict(profile, key,
|
||||
blk_crypto_keyslot_index(slot));
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
|
||||
out_remove:
|
||||
/*
|
||||
* Callers free the key even on error, so unlink the key from the hash
|
||||
* table and clear slot->key even on error.
|
||||
*/
|
||||
hlist_del(&slot->hash_node);
|
||||
slot->key = NULL;
|
||||
err = 0;
|
||||
out_unlock:
|
||||
out:
|
||||
blk_crypto_hw_exit(profile);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-crypto-profile.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "blk-crypto-internal.h"
|
||||
@@ -224,27 +225,27 @@ static bool bio_crypt_check_alignment(struct bio *bio)
|
||||
return true;
|
||||
}
|
||||
|
||||
blk_status_t __blk_crypto_init_request(struct request *rq)
|
||||
blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq)
|
||||
{
|
||||
return blk_crypto_get_keyslot(rq->q->crypto_profile,
|
||||
rq->crypt_ctx->bc_key,
|
||||
&rq->crypt_keyslot);
|
||||
}
|
||||
|
||||
/**
|
||||
* __blk_crypto_free_request - Uninitialize the crypto fields of a request.
|
||||
*
|
||||
* @rq: The request whose crypto fields to uninitialize.
|
||||
*
|
||||
* Completely uninitializes the crypto fields of a request. If a keyslot has
|
||||
* been programmed into some inline encryption hardware, that keyslot is
|
||||
* released. The rq->crypt_ctx is also freed.
|
||||
*/
|
||||
void __blk_crypto_free_request(struct request *rq)
|
||||
void __blk_crypto_rq_put_keyslot(struct request *rq)
|
||||
{
|
||||
blk_crypto_put_keyslot(rq->crypt_keyslot);
|
||||
rq->crypt_keyslot = NULL;
|
||||
}
|
||||
|
||||
void __blk_crypto_free_request(struct request *rq)
|
||||
{
|
||||
/* The keyslot, if one was needed, should have been released earlier. */
|
||||
if (WARN_ON_ONCE(rq->crypt_keyslot))
|
||||
__blk_crypto_rq_put_keyslot(rq);
|
||||
|
||||
mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool);
|
||||
blk_crypto_rq_set_defaults(rq);
|
||||
rq->crypt_ctx = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -399,30 +400,39 @@ int blk_crypto_start_using_key(struct block_device *bdev,
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_crypto_evict_key() - Evict a key from any inline encryption hardware
|
||||
* it may have been programmed into
|
||||
* @bdev: The block_device who's associated inline encryption hardware this key
|
||||
* might have been programmed into
|
||||
* @key: The key to evict
|
||||
* blk_crypto_evict_key() - Evict a blk_crypto_key from a block_device
|
||||
* @bdev: a block_device on which I/O using the key may have been done
|
||||
* @key: the key to evict
|
||||
*
|
||||
* Upper layers (filesystems) must call this function to ensure that a key is
|
||||
* evicted from any hardware that it might have been programmed into. The key
|
||||
* must not be in use by any in-flight IO when this function is called.
|
||||
* For a given block_device, this function removes the given blk_crypto_key from
|
||||
* the keyslot management structures and evicts it from any underlying hardware
|
||||
* keyslot(s) or blk-crypto-fallback keyslot it may have been programmed into.
|
||||
*
|
||||
* Return: 0 on success or if the key wasn't in any keyslot; -errno on error.
|
||||
* Upper layers must call this before freeing the blk_crypto_key. It must be
|
||||
* called for every block_device the key may have been used on. The key must no
|
||||
* longer be in use by any I/O when this function is called.
|
||||
*
|
||||
* Context: May sleep.
|
||||
*/
|
||||
int blk_crypto_evict_key(struct block_device *bdev,
|
||||
const struct blk_crypto_key *key)
|
||||
void blk_crypto_evict_key(struct block_device *bdev,
|
||||
const struct blk_crypto_key *key)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
int err;
|
||||
|
||||
if (blk_crypto_config_supported_natively(bdev, &key->crypto_cfg))
|
||||
return __blk_crypto_evict_key(q->crypto_profile, key);
|
||||
|
||||
err = __blk_crypto_evict_key(q->crypto_profile, key);
|
||||
else
|
||||
err = blk_crypto_fallback_evict_key(key);
|
||||
/*
|
||||
* If the block_device didn't support the key, then blk-crypto-fallback
|
||||
* may have been used, so try to evict the key from blk-crypto-fallback.
|
||||
* An error can only occur here if the key failed to be evicted from a
|
||||
* keyslot (due to a hardware or driver issue) or is allegedly still in
|
||||
* use by I/O (due to a kernel bug). Even in these cases, the key is
|
||||
* still unlinked from the keyslot management structures, and the caller
|
||||
* is allowed and expected to free it right away. There's nothing
|
||||
* callers can do to handle errors, so just log them and return void.
|
||||
*/
|
||||
return blk_crypto_fallback_evict_key(key);
|
||||
if (err)
|
||||
pr_warn_ratelimited("%pg: error %d evicting key\n", bdev, err);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_crypto_evict_key);
|
||||
|
||||
@@ -68,12 +68,10 @@
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/part_stat.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
/* PREFLUSH/FUA sequences */
|
||||
@@ -138,11 +136,6 @@ static void blk_flush_restore_request(struct request *rq)
|
||||
rq->end_io = rq->flush.saved_end_io;
|
||||
}
|
||||
|
||||
static void blk_flush_queue_rq(struct request *rq, bool add_front)
|
||||
{
|
||||
blk_mq_add_to_requeue_list(rq, add_front, true);
|
||||
}
|
||||
|
||||
static void blk_account_io_flush(struct request *rq)
|
||||
{
|
||||
struct block_device *part = rq->q->disk->part0;
|
||||
@@ -195,7 +188,8 @@ static void blk_flush_complete_seq(struct request *rq,
|
||||
|
||||
case REQ_FSEQ_DATA:
|
||||
list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
|
||||
blk_flush_queue_rq(rq, true);
|
||||
blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
|
||||
blk_mq_kick_requeue_list(q);
|
||||
break;
|
||||
|
||||
case REQ_FSEQ_DONE:
|
||||
@@ -352,7 +346,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
|
||||
smp_wmb();
|
||||
req_ref_set(flush_rq, 1);
|
||||
|
||||
blk_flush_queue_rq(flush_rq, false);
|
||||
blk_mq_add_to_requeue_list(flush_rq, 0);
|
||||
blk_mq_kick_requeue_list(q);
|
||||
}
|
||||
|
||||
static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
|
||||
@@ -396,6 +391,7 @@ void blk_insert_flush(struct request *rq)
|
||||
unsigned long fflags = q->queue_flags; /* may change, cache */
|
||||
unsigned int policy = blk_flush_policy(fflags, rq);
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
/*
|
||||
* @policy now records what operations need to be done. Adjust
|
||||
@@ -432,7 +428,8 @@ void blk_insert_flush(struct request *rq)
|
||||
*/
|
||||
if ((policy & REQ_FSEQ_DATA) &&
|
||||
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
|
||||
blk_mq_request_bypass_insert(rq, false, true);
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -3106,9 +3106,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err;
|
||||
|
||||
iocg = blkg_to_iocg(ctx.blkg);
|
||||
|
||||
@@ -3127,12 +3129,14 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
|
||||
weight_updated(iocg, &now);
|
||||
spin_unlock(&iocg->ioc->lock);
|
||||
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return nbytes;
|
||||
|
||||
einval:
|
||||
blkg_conf_finish(&ctx);
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
@@ -3189,19 +3193,22 @@ static const match_table_t qos_tokens = {
|
||||
static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct blkg_conf_ctx ctx;
|
||||
struct gendisk *disk;
|
||||
struct ioc *ioc;
|
||||
u32 qos[NR_QOS_PARAMS];
|
||||
bool enable, user;
|
||||
char *p;
|
||||
char *body, *p;
|
||||
int ret;
|
||||
|
||||
bdev = blkcg_conf_open_bdev(&input);
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
blkg_conf_init(&ctx, input);
|
||||
|
||||
disk = bdev->bd_disk;
|
||||
ret = blkg_conf_open_bdev(&ctx);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
body = ctx.body;
|
||||
disk = ctx.bdev->bd_disk;
|
||||
if (!queue_is_mq(disk->queue)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto err;
|
||||
@@ -3223,7 +3230,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
enable = ioc->enabled;
|
||||
user = ioc->user_qos_params;
|
||||
|
||||
while ((p = strsep(&input, " \t\n"))) {
|
||||
while ((p = strsep(&body, " \t\n"))) {
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
char buf[32];
|
||||
int tok;
|
||||
@@ -3313,7 +3320,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
blk_mq_unquiesce_queue(disk->queue);
|
||||
blk_mq_unfreeze_queue(disk->queue);
|
||||
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return nbytes;
|
||||
einval:
|
||||
spin_unlock_irq(&ioc->lock);
|
||||
@@ -3323,7 +3330,7 @@ einval:
|
||||
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3376,19 +3383,22 @@ static const match_table_t i_lcoef_tokens = {
|
||||
static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct blkg_conf_ctx ctx;
|
||||
struct request_queue *q;
|
||||
struct ioc *ioc;
|
||||
u64 u[NR_I_LCOEFS];
|
||||
bool user;
|
||||
char *p;
|
||||
char *body, *p;
|
||||
int ret;
|
||||
|
||||
bdev = blkcg_conf_open_bdev(&input);
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
blkg_conf_init(&ctx, input);
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
ret = blkg_conf_open_bdev(&ctx);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
body = ctx.body;
|
||||
q = bdev_get_queue(ctx.bdev);
|
||||
if (!queue_is_mq(q)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto err;
|
||||
@@ -3396,7 +3406,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
||||
|
||||
ioc = q_to_ioc(q);
|
||||
if (!ioc) {
|
||||
ret = blk_iocost_init(bdev->bd_disk);
|
||||
ret = blk_iocost_init(ctx.bdev->bd_disk);
|
||||
if (ret)
|
||||
goto err;
|
||||
ioc = q_to_ioc(q);
|
||||
@@ -3409,7 +3419,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
||||
memcpy(u, ioc->params.i_lcoefs, sizeof(u));
|
||||
user = ioc->user_cost_model;
|
||||
|
||||
while ((p = strsep(&input, " \t\n"))) {
|
||||
while ((p = strsep(&body, " \t\n"))) {
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
char buf[32];
|
||||
int tok;
|
||||
@@ -3456,7 +3466,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return nbytes;
|
||||
|
||||
einval:
|
||||
@@ -3467,7 +3477,7 @@ einval:
|
||||
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
blkdev_put_no_open(bdev);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -755,7 +755,7 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
int blk_iolatency_init(struct gendisk *disk)
|
||||
static int blk_iolatency_init(struct gendisk *disk)
|
||||
{
|
||||
struct blk_iolatency *blkiolat;
|
||||
int ret;
|
||||
@@ -824,6 +824,29 @@ static void iolatency_clear_scaling(struct blkcg_gq *blkg)
|
||||
}
|
||||
}
|
||||
|
||||
static int blk_iolatency_try_init(struct blkg_conf_ctx *ctx)
|
||||
{
|
||||
static DEFINE_MUTEX(init_mutex);
|
||||
int ret;
|
||||
|
||||
ret = blkg_conf_open_bdev(ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* blk_iolatency_init() may fail after rq_qos_add() succeeds which can
|
||||
* confuse iolat_rq_qos() test. Make the test and init atomic.
|
||||
*/
|
||||
mutex_lock(&init_mutex);
|
||||
|
||||
if (!iolat_rq_qos(ctx->bdev->bd_queue))
|
||||
ret = blk_iolatency_init(ctx->bdev->bd_disk);
|
||||
|
||||
mutex_unlock(&init_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
@@ -836,9 +859,15 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
||||
u64 oldval;
|
||||
int ret;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blk_iolatency_try_init(&ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, &ctx);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
iolat = blkg_to_lat(ctx.blkg);
|
||||
p = ctx.body;
|
||||
@@ -874,7 +903,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
||||
iolatency_clear_scaling(blkg);
|
||||
ret = 0;
|
||||
out:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
@@ -967,7 +996,7 @@ static void iolatency_pd_init(struct blkg_policy_data *pd)
|
||||
{
|
||||
struct iolatency_grp *iolat = pd_to_lat(pd);
|
||||
struct blkcg_gq *blkg = lat_to_blkg(iolat);
|
||||
struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
|
||||
struct rq_qos *rqos = iolat_rq_qos(blkg->q);
|
||||
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
int cpu;
|
||||
|
||||
@@ -867,6 +867,8 @@ static struct request *attempt_merge(struct request_queue *q,
|
||||
if (!blk_discard_mergable(req))
|
||||
elv_merge_requests(q, req, next);
|
||||
|
||||
blk_crypto_rq_put_keyslot(next);
|
||||
|
||||
/*
|
||||
* 'next' is going away, so update stats accordingly
|
||||
*/
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/group_cpus.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
|
||||
|
||||
@@ -7,41 +7,14 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
|
||||
{
|
||||
if (stat->nr_samples) {
|
||||
seq_printf(m, "samples=%d, mean=%llu, min=%llu, max=%llu",
|
||||
stat->nr_samples, stat->mean, stat->min, stat->max);
|
||||
} else {
|
||||
seq_puts(m, "samples=0");
|
||||
}
|
||||
}
|
||||
|
||||
static int queue_poll_stat_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
int bucket;
|
||||
|
||||
if (!q->poll_stat)
|
||||
return 0;
|
||||
|
||||
for (bucket = 0; bucket < (BLK_MQ_POLL_STATS_BKTS / 2); bucket++) {
|
||||
seq_printf(m, "read (%d Bytes): ", 1 << (9 + bucket));
|
||||
print_stat(m, &q->poll_stat[2 * bucket]);
|
||||
seq_puts(m, "\n");
|
||||
|
||||
seq_printf(m, "write (%d Bytes): ", 1 << (9 + bucket));
|
||||
print_stat(m, &q->poll_stat[2 * bucket + 1]);
|
||||
seq_puts(m, "\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -282,7 +255,6 @@ static const char *const rqf_name[] = {
|
||||
RQF_NAME(STATS),
|
||||
RQF_NAME(SPECIAL_PAYLOAD),
|
||||
RQF_NAME(ZONE_WRITE_LOCKED),
|
||||
RQF_NAME(MQ_POLL_SLEPT),
|
||||
RQF_NAME(TIMED_OUT),
|
||||
RQF_NAME(ELV),
|
||||
RQF_NAME(RESV),
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
*/
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-mq-pci.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2017 Sagi Grimberg.
|
||||
*/
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-mq-rdma.h>
|
||||
#include <rdma/ib_verbs.h>
|
||||
|
||||
/**
|
||||
* blk_mq_rdma_map_queues - provide a default queue mapping for rdma device
|
||||
* @map: CPU to hardware queue map.
|
||||
* @dev: rdma device to provide a mapping for.
|
||||
* @first_vec: first interrupt vectors to use for queues (usually 0)
|
||||
*
|
||||
* This function assumes the rdma device @dev has at least as many available
|
||||
* interrupt vetors as @set has queues. It will then query it's affinity mask
|
||||
* and built queue mapping that maps a queue to the CPUs that have irq affinity
|
||||
* for the corresponding vector.
|
||||
*
|
||||
* In case either the driver passed a @dev with less vectors than
|
||||
* @set->nr_hw_queues, or @dev does not provide an affinity mask for a
|
||||
* vector, we fallback to the naive mapping.
|
||||
*/
|
||||
void blk_mq_rdma_map_queues(struct blk_mq_queue_map *map,
|
||||
struct ib_device *dev, int first_vec)
|
||||
{
|
||||
const struct cpumask *mask;
|
||||
unsigned int queue, cpu;
|
||||
|
||||
for (queue = 0; queue < map->nr_queues; queue++) {
|
||||
mask = ib_get_vector_affinity(dev, first_vec + queue);
|
||||
if (!mask)
|
||||
goto fallback;
|
||||
|
||||
for_each_cpu(cpu, mask)
|
||||
map->mq_map[cpu] = map->queue_offset + queue;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
fallback:
|
||||
blk_mq_map_queues(map);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_rdma_map_queues);
|
||||
@@ -6,7 +6,6 @@
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/list_sort.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
@@ -15,7 +14,6 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-wbt.h"
|
||||
|
||||
/*
|
||||
@@ -271,9 +269,7 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
|
||||
static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
const bool has_sched = q->elevator;
|
||||
int ret = 0;
|
||||
bool need_dispatch = false;
|
||||
LIST_HEAD(rq_list);
|
||||
|
||||
/*
|
||||
@@ -302,23 +298,22 @@ static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
*/
|
||||
if (!list_empty(&rq_list)) {
|
||||
blk_mq_sched_mark_restart_hctx(hctx);
|
||||
if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
|
||||
if (has_sched)
|
||||
ret = blk_mq_do_dispatch_sched(hctx);
|
||||
else
|
||||
ret = blk_mq_do_dispatch_ctx(hctx);
|
||||
}
|
||||
} else if (has_sched) {
|
||||
ret = blk_mq_do_dispatch_sched(hctx);
|
||||
} else if (hctx->dispatch_busy) {
|
||||
/* dequeue request one by one from sw queue if queue is busy */
|
||||
ret = blk_mq_do_dispatch_ctx(hctx);
|
||||
if (!blk_mq_dispatch_rq_list(hctx, &rq_list, 0))
|
||||
return 0;
|
||||
need_dispatch = true;
|
||||
} else {
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
|
||||
need_dispatch = hctx->dispatch_busy;
|
||||
}
|
||||
|
||||
return ret;
|
||||
if (hctx->queue->elevator)
|
||||
return blk_mq_do_dispatch_sched(hctx);
|
||||
|
||||
/* dequeue request one by one from sw queue if queue is busy */
|
||||
if (need_dispatch)
|
||||
return blk_mq_do_dispatch_ctx(hctx);
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
@@ -384,116 +379,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
|
||||
|
||||
static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq)
|
||||
{
|
||||
/*
|
||||
* dispatch flush and passthrough rq directly
|
||||
*
|
||||
* passthrough request has to be added to hctx->dispatch directly.
|
||||
* For some reason, device may be in one situation which can't
|
||||
* handle FS request, so STS_RESOURCE is always returned and the
|
||||
* FS request will be added to hctx->dispatch. However passthrough
|
||||
* request may be required at that time for fixing the problem. If
|
||||
* passthrough request is added to scheduler queue, there isn't any
|
||||
* chance to dispatch it given we prioritize requests in hctx->dispatch.
|
||||
*/
|
||||
if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
||||
bool run_queue, bool async)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
|
||||
|
||||
if (blk_mq_sched_bypass_insert(hctx, rq)) {
|
||||
/*
|
||||
* Firstly normal IO request is inserted to scheduler queue or
|
||||
* sw queue, meantime we add flush request to dispatch queue(
|
||||
* hctx->dispatch) directly and there is at most one in-flight
|
||||
* flush request for each hw queue, so it doesn't matter to add
|
||||
* flush request to tail or front of the dispatch queue.
|
||||
*
|
||||
* Secondly in case of NCQ, flush request belongs to non-NCQ
|
||||
* command, and queueing it will fail when there is any
|
||||
* in-flight normal IO request(NCQ command). When adding flush
|
||||
* rq to the front of hctx->dispatch, it is easier to introduce
|
||||
* extra time to flush rq's latency because of S_SCHED_RESTART
|
||||
* compared with adding to the tail of dispatch queue, then
|
||||
* chance of flush merge is increased, and less flush requests
|
||||
* will be issued to controller. It is observed that ~10% time
|
||||
* is saved in blktests block/004 on disk attached to AHCI/NCQ
|
||||
* drive when adding flush rq to the front of hctx->dispatch.
|
||||
*
|
||||
* Simply queue flush rq to the front of hctx->dispatch so that
|
||||
* intensive flush workloads can benefit in case of NCQ HW.
|
||||
*/
|
||||
at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
|
||||
blk_mq_request_bypass_insert(rq, at_head, false);
|
||||
goto run;
|
||||
}
|
||||
|
||||
if (e) {
|
||||
LIST_HEAD(list);
|
||||
|
||||
list_add(&rq->queuelist, &list);
|
||||
e->type->ops.insert_requests(hctx, &list, at_head);
|
||||
} else {
|
||||
spin_lock(&ctx->lock);
|
||||
__blk_mq_insert_request(hctx, rq, at_head);
|
||||
spin_unlock(&ctx->lock);
|
||||
}
|
||||
|
||||
run:
|
||||
if (run_queue)
|
||||
blk_mq_run_hw_queue(hctx, async);
|
||||
}
|
||||
|
||||
void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *ctx,
|
||||
struct list_head *list, bool run_queue_async)
|
||||
{
|
||||
struct elevator_queue *e;
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
/*
|
||||
* blk_mq_sched_insert_requests() is called from flush plug
|
||||
* context only, and hold one usage counter to prevent queue
|
||||
* from being released.
|
||||
*/
|
||||
percpu_ref_get(&q->q_usage_counter);
|
||||
|
||||
e = hctx->queue->elevator;
|
||||
if (e) {
|
||||
e->type->ops.insert_requests(hctx, list, false);
|
||||
} else {
|
||||
/*
|
||||
* try to issue requests directly if the hw queue isn't
|
||||
* busy in case of 'none' scheduler, and this way may save
|
||||
* us one extra enqueue & dequeue to sw queue.
|
||||
*/
|
||||
if (!hctx->dispatch_busy && !run_queue_async) {
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_try_issue_list_directly(hctx, list));
|
||||
if (list_empty(list))
|
||||
goto out;
|
||||
}
|
||||
blk_mq_insert_requests(hctx, ctx, list);
|
||||
}
|
||||
|
||||
blk_mq_run_hw_queue(hctx, run_queue_async);
|
||||
out:
|
||||
percpu_ref_put(&q->q_usage_counter);
|
||||
}
|
||||
|
||||
static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
|
||||
struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx)
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "elevator.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
#define MAX_SCHED_RQ (16 * BLKDEV_DEFAULT_RQ)
|
||||
|
||||
@@ -17,12 +16,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
|
||||
void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx);
|
||||
void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
|
||||
|
||||
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
||||
bool run_queue, bool async);
|
||||
void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *ctx,
|
||||
struct list_head *list, bool run_queue_async);
|
||||
|
||||
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
|
||||
|
||||
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
|
||||
|
||||
@@ -10,10 +10,8 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
static void blk_mq_sysfs_release(struct kobject *kobj)
|
||||
{
|
||||
|
||||
@@ -9,12 +9,10 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/delay.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
/*
|
||||
* Recalculate wakeup batch when tag is shared by hctx.
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef INT_BLK_MQ_TAG_H
|
||||
#define INT_BLK_MQ_TAG_H
|
||||
|
||||
struct blk_mq_alloc_data;
|
||||
|
||||
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
|
||||
unsigned int reserved_tags,
|
||||
int node, int alloc_policy);
|
||||
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
|
||||
extern int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
|
||||
struct sbitmap_queue *breserved_tags,
|
||||
unsigned int queue_depth,
|
||||
unsigned int reserved,
|
||||
int node, int alloc_policy);
|
||||
|
||||
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
|
||||
unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
|
||||
unsigned int *offset);
|
||||
extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
|
||||
unsigned int tag);
|
||||
void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags);
|
||||
extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_tags **tags,
|
||||
unsigned int depth, bool can_grow);
|
||||
extern void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set,
|
||||
unsigned int size);
|
||||
extern void blk_mq_tag_update_sched_shared_tags(struct request_queue *q);
|
||||
|
||||
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
|
||||
static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
|
||||
struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!hctx)
|
||||
return &bt->ws[0];
|
||||
return sbq_wait_ptr(bt, &hctx->wait_index);
|
||||
}
|
||||
|
||||
enum {
|
||||
BLK_MQ_NO_TAG = -1U,
|
||||
BLK_MQ_TAG_MIN = 1,
|
||||
BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1,
|
||||
};
|
||||
|
||||
extern void __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
|
||||
extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
|
||||
|
||||
static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
|
||||
__blk_mq_tag_busy(hctx);
|
||||
}
|
||||
|
||||
static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
|
||||
return;
|
||||
|
||||
__blk_mq_tag_idle(hctx);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
|
||||
unsigned int tag)
|
||||
{
|
||||
return tag < tags->nr_reserved_tags;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -3,7 +3,6 @@
|
||||
* Copyright (c) 2016 Christoph Hellwig.
|
||||
*/
|
||||
#include <linux/device.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-mq-virtio.h>
|
||||
#include <linux/virtio_config.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
667
block/blk-mq.c
667
block/blk-mq.c
@@ -32,12 +32,10 @@
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/t10-pi.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-pm.h"
|
||||
#include "blk-stat.h"
|
||||
#include "blk-mq-sched.h"
|
||||
@@ -46,51 +44,19 @@
|
||||
|
||||
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
|
||||
|
||||
static void blk_mq_poll_stats_start(struct request_queue *q);
|
||||
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
|
||||
|
||||
static int blk_mq_poll_stats_bkt(const struct request *rq)
|
||||
{
|
||||
int ddir, sectors, bucket;
|
||||
|
||||
ddir = rq_data_dir(rq);
|
||||
sectors = blk_rq_stats_sectors(rq);
|
||||
|
||||
bucket = ddir + 2 * ilog2(sectors);
|
||||
|
||||
if (bucket < 0)
|
||||
return -1;
|
||||
else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
|
||||
return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
|
||||
|
||||
return bucket;
|
||||
}
|
||||
|
||||
#define BLK_QC_T_SHIFT 16
|
||||
#define BLK_QC_T_INTERNAL (1U << 31)
|
||||
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
|
||||
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list);
|
||||
|
||||
static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
|
||||
blk_qc_t qc)
|
||||
{
|
||||
return xa_load(&q->hctx_table,
|
||||
(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT);
|
||||
}
|
||||
|
||||
static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
|
||||
blk_qc_t qc)
|
||||
{
|
||||
unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1);
|
||||
|
||||
if (qc & BLK_QC_T_INTERNAL)
|
||||
return blk_mq_tag_to_rq(hctx->sched_tags, tag);
|
||||
return blk_mq_tag_to_rq(hctx->tags, tag);
|
||||
return xa_load(&q->hctx_table, qc);
|
||||
}
|
||||
|
||||
static inline blk_qc_t blk_rq_to_qc(struct request *rq)
|
||||
{
|
||||
return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) |
|
||||
(rq->tag != -1 ?
|
||||
rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL));
|
||||
return rq->mq_hctx->queue_num;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -840,6 +806,12 @@ static void blk_complete_request(struct request *req)
|
||||
req->q->integrity.profile->complete_fn(req, total_bytes);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Upper layers may call blk_crypto_evict_key() anytime after the last
|
||||
* bio_endio(). Therefore, the keyslot must be released before that.
|
||||
*/
|
||||
blk_crypto_rq_put_keyslot(req);
|
||||
|
||||
blk_account_io_completion(req, total_bytes);
|
||||
|
||||
do {
|
||||
@@ -905,6 +877,13 @@ bool blk_update_request(struct request *req, blk_status_t error,
|
||||
req->q->integrity.profile->complete_fn(req, nr_bytes);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Upper layers may call blk_crypto_evict_key() anytime after the last
|
||||
* bio_endio(). Therefore, the keyslot must be released before that.
|
||||
*/
|
||||
if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req))
|
||||
__blk_crypto_rq_put_keyslot(req);
|
||||
|
||||
if (unlikely(error && !blk_rq_is_passthrough(req) &&
|
||||
!(req->rq_flags & RQF_QUIET)) &&
|
||||
!test_bit(GD_DEAD, &req->q->disk->state)) {
|
||||
@@ -976,17 +955,6 @@ bool blk_update_request(struct request *req, blk_status_t error,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_update_request);
|
||||
|
||||
static void __blk_account_io_done(struct request *req, u64 now)
|
||||
{
|
||||
const int sgrp = op_stat_group(req_op(req));
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, true);
|
||||
part_stat_inc(req->part, ios[sgrp]);
|
||||
part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
static inline void blk_account_io_done(struct request *req, u64 now)
|
||||
{
|
||||
/*
|
||||
@@ -995,40 +963,41 @@ static inline void blk_account_io_done(struct request *req, u64 now)
|
||||
* containing request is enough.
|
||||
*/
|
||||
if (blk_do_io_stat(req) && req->part &&
|
||||
!(req->rq_flags & RQF_FLUSH_SEQ))
|
||||
__blk_account_io_done(req, now);
|
||||
}
|
||||
!(req->rq_flags & RQF_FLUSH_SEQ)) {
|
||||
const int sgrp = op_stat_group(req_op(req));
|
||||
|
||||
static void __blk_account_io_start(struct request *rq)
|
||||
{
|
||||
/*
|
||||
* All non-passthrough requests are created from a bio with one
|
||||
* exception: when a flush command that is part of a flush sequence
|
||||
* generated by the state machine in blk-flush.c is cloned onto the
|
||||
* lower device by dm-multipath we can get here without a bio.
|
||||
*/
|
||||
if (rq->bio)
|
||||
rq->part = rq->bio->bi_bdev;
|
||||
else
|
||||
rq->part = rq->q->disk->part0;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(rq->part, jiffies, false);
|
||||
part_stat_unlock();
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, true);
|
||||
part_stat_inc(req->part, ios[sgrp]);
|
||||
part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void blk_account_io_start(struct request *req)
|
||||
{
|
||||
if (blk_do_io_stat(req))
|
||||
__blk_account_io_start(req);
|
||||
if (blk_do_io_stat(req)) {
|
||||
/*
|
||||
* All non-passthrough requests are created from a bio with one
|
||||
* exception: when a flush command that is part of a flush sequence
|
||||
* generated by the state machine in blk-flush.c is cloned onto the
|
||||
* lower device by dm-multipath we can get here without a bio.
|
||||
*/
|
||||
if (req->bio)
|
||||
req->part = req->bio->bi_bdev;
|
||||
else
|
||||
req->part = req->q->disk->part0;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, false);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
|
||||
{
|
||||
if (rq->rq_flags & RQF_STATS) {
|
||||
blk_mq_poll_stats_start(rq->q);
|
||||
if (rq->rq_flags & RQF_STATS)
|
||||
blk_stat_add(rq, now);
|
||||
}
|
||||
|
||||
blk_mq_sched_completed_request(rq, now);
|
||||
blk_account_io_done(rq, now);
|
||||
@@ -1322,6 +1291,8 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
||||
*/
|
||||
void blk_execute_rq_nowait(struct request *rq, bool at_head)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
WARN_ON(irqs_disabled());
|
||||
WARN_ON(!blk_rq_is_passthrough(rq));
|
||||
|
||||
@@ -1332,10 +1303,13 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head)
|
||||
* device, directly accessing the plug instead of using blk_mq_plug()
|
||||
* should not have any consequences.
|
||||
*/
|
||||
if (current->plug)
|
||||
if (current->plug && !at_head) {
|
||||
blk_add_rq_to_plug(current->plug, rq);
|
||||
else
|
||||
blk_mq_sched_insert_request(rq, at_head, true, false);
|
||||
return;
|
||||
}
|
||||
|
||||
blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
|
||||
|
||||
@@ -1383,6 +1357,7 @@ static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
|
||||
*/
|
||||
blk_status_t blk_execute_rq(struct request *rq, bool at_head)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
struct blk_rq_wait wait = {
|
||||
.done = COMPLETION_INITIALIZER_ONSTACK(wait.done),
|
||||
};
|
||||
@@ -1394,7 +1369,8 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
|
||||
rq->end_io = blk_end_sync_rq;
|
||||
|
||||
blk_account_io_start(rq);
|
||||
blk_mq_sched_insert_request(rq, at_head, true, false);
|
||||
blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
|
||||
if (blk_rq_is_poll(rq)) {
|
||||
blk_rq_poll_completion(rq, &wait.done);
|
||||
@@ -1434,12 +1410,17 @@ static void __blk_mq_requeue_request(struct request *rq)
|
||||
|
||||
void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
|
||||
__blk_mq_requeue_request(rq);
|
||||
|
||||
/* this request will be re-inserted to io scheduler queue */
|
||||
blk_mq_sched_requeue_request(rq);
|
||||
|
||||
blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
|
||||
blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
|
||||
|
||||
if (kick_requeue_list)
|
||||
blk_mq_kick_requeue_list(q);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_requeue_request);
|
||||
|
||||
@@ -1455,33 +1436,33 @@ static void blk_mq_requeue_work(struct work_struct *work)
|
||||
spin_unlock_irq(&q->requeue_lock);
|
||||
|
||||
list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
|
||||
if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP)))
|
||||
continue;
|
||||
|
||||
rq->rq_flags &= ~RQF_SOFTBARRIER;
|
||||
list_del_init(&rq->queuelist);
|
||||
/*
|
||||
* If RQF_DONTPREP, rq has contained some driver specific
|
||||
* data, so insert it to hctx dispatch list to avoid any
|
||||
* merge.
|
||||
* If RQF_DONTPREP ist set, the request has been started by the
|
||||
* driver already and might have driver-specific data allocated
|
||||
* already. Insert it into the hctx dispatch list to avoid
|
||||
* block layer merges for the request.
|
||||
*/
|
||||
if (rq->rq_flags & RQF_DONTPREP)
|
||||
blk_mq_request_bypass_insert(rq, false, false);
|
||||
else
|
||||
blk_mq_sched_insert_request(rq, true, false, false);
|
||||
if (rq->rq_flags & RQF_DONTPREP) {
|
||||
rq->rq_flags &= ~RQF_SOFTBARRIER;
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
} else if (rq->rq_flags & RQF_SOFTBARRIER) {
|
||||
rq->rq_flags &= ~RQF_SOFTBARRIER;
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
|
||||
}
|
||||
}
|
||||
|
||||
while (!list_empty(&rq_list)) {
|
||||
rq = list_entry(rq_list.next, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_sched_insert_request(rq, false, false, false);
|
||||
blk_mq_insert_request(rq, 0);
|
||||
}
|
||||
|
||||
blk_mq_run_hw_queues(q, false);
|
||||
}
|
||||
|
||||
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
|
||||
bool kick_requeue_list)
|
||||
void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
unsigned long flags;
|
||||
@@ -1493,16 +1474,13 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
|
||||
BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
|
||||
|
||||
spin_lock_irqsave(&q->requeue_lock, flags);
|
||||
if (at_head) {
|
||||
if (insert_flags & BLK_MQ_INSERT_AT_HEAD) {
|
||||
rq->rq_flags |= RQF_SOFTBARRIER;
|
||||
list_add(&rq->queuelist, &q->requeue_list);
|
||||
} else {
|
||||
list_add_tail(&rq->queuelist, &q->requeue_list);
|
||||
}
|
||||
spin_unlock_irqrestore(&q->requeue_lock, flags);
|
||||
|
||||
if (kick_requeue_list)
|
||||
blk_mq_kick_requeue_list(q);
|
||||
}
|
||||
|
||||
void blk_mq_kick_requeue_list(struct request_queue *q)
|
||||
@@ -2158,24 +2136,6 @@ out:
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* __blk_mq_run_hw_queue - Run a hardware queue.
|
||||
* @hctx: Pointer to the hardware queue to run.
|
||||
*
|
||||
* Send pending requests to the hardware.
|
||||
*/
|
||||
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
/*
|
||||
* We can't run the queue inline with ints disabled. Ensure that
|
||||
* we catch bad users of this early.
|
||||
*/
|
||||
WARN_ON_ONCE(in_interrupt());
|
||||
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_sched_dispatch_requests(hctx));
|
||||
}
|
||||
|
||||
static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);
|
||||
@@ -2231,32 +2191,6 @@ select_cpu:
|
||||
return next_cpu;
|
||||
}
|
||||
|
||||
/**
|
||||
* __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue.
|
||||
* @hctx: Pointer to the hardware queue to run.
|
||||
* @async: If we want to run the queue asynchronously.
|
||||
* @msecs: Milliseconds of delay to wait before running the queue.
|
||||
*
|
||||
* If !@async, try to run the queue now. Else, run the queue asynchronously and
|
||||
* with a delay of @msecs.
|
||||
*/
|
||||
static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
|
||||
unsigned long msecs)
|
||||
{
|
||||
if (unlikely(blk_mq_hctx_stopped(hctx)))
|
||||
return;
|
||||
|
||||
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
|
||||
if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
|
||||
__blk_mq_run_hw_queue(hctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
|
||||
msecs_to_jiffies(msecs));
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
|
||||
* @hctx: Pointer to the hardware queue to run.
|
||||
@@ -2266,7 +2200,10 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
|
||||
*/
|
||||
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
|
||||
{
|
||||
__blk_mq_delay_run_hw_queue(hctx, true, msecs);
|
||||
if (unlikely(blk_mq_hctx_stopped(hctx)))
|
||||
return;
|
||||
kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
|
||||
msecs_to_jiffies(msecs));
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
|
||||
|
||||
@@ -2283,6 +2220,11 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
{
|
||||
bool need_run;
|
||||
|
||||
/*
|
||||
* We can't run the queue inline with interrupts disabled.
|
||||
*/
|
||||
WARN_ON_ONCE(!async && in_interrupt());
|
||||
|
||||
/*
|
||||
* When queue is quiesced, we may be switching io scheduler, or
|
||||
* updating nr_hw_queues, or other things, and we can't run queue
|
||||
@@ -2295,8 +2237,17 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
need_run = !blk_queue_quiesced(hctx->queue) &&
|
||||
blk_mq_hctx_has_pending(hctx));
|
||||
|
||||
if (need_run)
|
||||
__blk_mq_delay_run_hw_queue(hctx, async, 0);
|
||||
if (!need_run)
|
||||
return;
|
||||
|
||||
if (async || (hctx->flags & BLK_MQ_F_BLOCKING) ||
|
||||
!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
|
||||
blk_mq_delay_run_hw_queue(hctx, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_sched_dispatch_requests(hctx));
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_run_hw_queue);
|
||||
|
||||
@@ -2461,79 +2412,51 @@ EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
|
||||
|
||||
static void blk_mq_run_work_fn(struct work_struct *work)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct blk_mq_hw_ctx *hctx =
|
||||
container_of(work, struct blk_mq_hw_ctx, run_work.work);
|
||||
|
||||
hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
|
||||
|
||||
/*
|
||||
* If we are stopped, don't run the queue.
|
||||
*/
|
||||
if (blk_mq_hctx_stopped(hctx))
|
||||
return;
|
||||
|
||||
__blk_mq_run_hw_queue(hctx);
|
||||
}
|
||||
|
||||
static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq,
|
||||
bool at_head)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
enum hctx_type type = hctx->type;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
if (at_head)
|
||||
list_add(&rq->queuelist, &ctx->rq_lists[type]);
|
||||
else
|
||||
list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
|
||||
}
|
||||
|
||||
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
__blk_mq_insert_req_list(hctx, rq, at_head);
|
||||
blk_mq_hctx_mark_pending(hctx, ctx);
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_sched_dispatch_requests(hctx));
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_request_bypass_insert - Insert a request at dispatch list.
|
||||
* @rq: Pointer to request to be inserted.
|
||||
* @at_head: true if the request should be inserted at the head of the list.
|
||||
* @run_queue: If we should run the hardware queue after inserting the request.
|
||||
* @flags: BLK_MQ_INSERT_*
|
||||
*
|
||||
* Should only be used carefully, when the caller knows we want to
|
||||
* bypass a potential IO scheduler on the target device.
|
||||
*/
|
||||
void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
|
||||
bool run_queue)
|
||||
void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
spin_lock(&hctx->lock);
|
||||
if (at_head)
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD)
|
||||
list_add(&rq->queuelist, &hctx->dispatch);
|
||||
else
|
||||
list_add_tail(&rq->queuelist, &hctx->dispatch);
|
||||
spin_unlock(&hctx->lock);
|
||||
|
||||
if (run_queue)
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
}
|
||||
|
||||
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct list_head *list)
|
||||
|
||||
static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *ctx, struct list_head *list,
|
||||
bool run_queue_async)
|
||||
{
|
||||
struct request *rq;
|
||||
enum hctx_type type = hctx->type;
|
||||
|
||||
/*
|
||||
* Try to issue requests directly if the hw queue isn't busy to save an
|
||||
* extra enqueue & dequeue to the sw queue.
|
||||
*/
|
||||
if (!hctx->dispatch_busy && !run_queue_async) {
|
||||
blk_mq_run_dispatch_ops(hctx->queue,
|
||||
blk_mq_try_issue_list_directly(hctx, list));
|
||||
if (list_empty(list))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* preemption doesn't flush plug list, so it's possible ctx->cpu is
|
||||
* offline now
|
||||
@@ -2547,6 +2470,70 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
list_splice_tail_init(list, &ctx->rq_lists[type]);
|
||||
blk_mq_hctx_mark_pending(hctx, ctx);
|
||||
spin_unlock(&ctx->lock);
|
||||
out:
|
||||
blk_mq_run_hw_queue(hctx, run_queue_async);
|
||||
}
|
||||
|
||||
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
if (blk_rq_is_passthrough(rq)) {
|
||||
/*
|
||||
* Passthrough request have to be added to hctx->dispatch
|
||||
* directly. The device may be in a situation where it can't
|
||||
* handle FS request, and always returns BLK_STS_RESOURCE for
|
||||
* them, which gets them added to hctx->dispatch.
|
||||
*
|
||||
* If a passthrough request is required to unblock the queues,
|
||||
* and it is added to the scheduler queue, there is no chance to
|
||||
* dispatch it given we prioritize requests in hctx->dispatch.
|
||||
*/
|
||||
blk_mq_request_bypass_insert(rq, flags);
|
||||
} else if (rq->rq_flags & RQF_FLUSH_SEQ) {
|
||||
/*
|
||||
* Firstly normal IO request is inserted to scheduler queue or
|
||||
* sw queue, meantime we add flush request to dispatch queue(
|
||||
* hctx->dispatch) directly and there is at most one in-flight
|
||||
* flush request for each hw queue, so it doesn't matter to add
|
||||
* flush request to tail or front of the dispatch queue.
|
||||
*
|
||||
* Secondly in case of NCQ, flush request belongs to non-NCQ
|
||||
* command, and queueing it will fail when there is any
|
||||
* in-flight normal IO request(NCQ command). When adding flush
|
||||
* rq to the front of hctx->dispatch, it is easier to introduce
|
||||
* extra time to flush rq's latency because of S_SCHED_RESTART
|
||||
* compared with adding to the tail of dispatch queue, then
|
||||
* chance of flush merge is increased, and less flush requests
|
||||
* will be issued to controller. It is observed that ~10% time
|
||||
* is saved in blktests block/004 on disk attached to AHCI/NCQ
|
||||
* drive when adding flush rq to the front of hctx->dispatch.
|
||||
*
|
||||
* Simply queue flush rq to the front of hctx->dispatch so that
|
||||
* intensive flush workloads can benefit in case of NCQ HW.
|
||||
*/
|
||||
blk_mq_request_bypass_insert(rq, BLK_MQ_INSERT_AT_HEAD);
|
||||
} else if (q->elevator) {
|
||||
LIST_HEAD(list);
|
||||
|
||||
WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG);
|
||||
|
||||
list_add(&rq->queuelist, &list);
|
||||
q->elevator->type->ops.insert_requests(hctx, &list, flags);
|
||||
} else {
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
spin_lock(&ctx->lock);
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD)
|
||||
list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]);
|
||||
else
|
||||
list_add_tail(&rq->queuelist,
|
||||
&ctx->rq_lists[hctx->type]);
|
||||
blk_mq_hctx_mark_pending(hctx, ctx);
|
||||
spin_unlock(&ctx->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
|
||||
@@ -2600,49 +2587,19 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq,
|
||||
bool bypass_insert, bool last)
|
||||
static bool blk_mq_get_budget_and_tag(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
bool run_queue = true;
|
||||
int budget_token;
|
||||
|
||||
/*
|
||||
* RCU or SRCU read lock is needed before checking quiesced flag.
|
||||
*
|
||||
* When queue is stopped or quiesced, ignore 'bypass_insert' from
|
||||
* blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
|
||||
* and avoid driver to try to dispatch again.
|
||||
*/
|
||||
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
|
||||
run_queue = false;
|
||||
bypass_insert = false;
|
||||
goto insert;
|
||||
}
|
||||
|
||||
if ((rq->rq_flags & RQF_ELV) && !bypass_insert)
|
||||
goto insert;
|
||||
|
||||
budget_token = blk_mq_get_dispatch_budget(q);
|
||||
budget_token = blk_mq_get_dispatch_budget(rq->q);
|
||||
if (budget_token < 0)
|
||||
goto insert;
|
||||
|
||||
return false;
|
||||
blk_mq_set_rq_budget_token(rq, budget_token);
|
||||
|
||||
if (!blk_mq_get_driver_tag(rq)) {
|
||||
blk_mq_put_dispatch_budget(q, budget_token);
|
||||
goto insert;
|
||||
blk_mq_put_dispatch_budget(rq->q, budget_token);
|
||||
return false;
|
||||
}
|
||||
|
||||
return __blk_mq_issue_directly(hctx, rq, last);
|
||||
insert:
|
||||
if (bypass_insert)
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
blk_mq_sched_insert_request(rq, false, run_queue, false);
|
||||
|
||||
return BLK_STS_OK;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2658,18 +2615,46 @@ insert:
|
||||
static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq)
|
||||
{
|
||||
blk_status_t ret =
|
||||
__blk_mq_try_issue_directly(hctx, rq, false, true);
|
||||
blk_status_t ret;
|
||||
|
||||
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
|
||||
blk_mq_request_bypass_insert(rq, false, true);
|
||||
else if (ret != BLK_STS_OK)
|
||||
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = __blk_mq_issue_directly(hctx, rq, true);
|
||||
switch (ret) {
|
||||
case BLK_STS_OK:
|
||||
break;
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_DEV_RESOURCE:
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
break;
|
||||
default:
|
||||
blk_mq_end_request(rq, ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
|
||||
{
|
||||
return __blk_mq_try_issue_directly(rq->mq_hctx, rq, true, last);
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
if (!blk_mq_get_budget_and_tag(rq))
|
||||
return BLK_STS_RESOURCE;
|
||||
return __blk_mq_issue_directly(hctx, rq, last);
|
||||
}
|
||||
|
||||
static void blk_mq_plug_issue_direct(struct blk_plug *plug)
|
||||
@@ -2697,7 +2682,8 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug)
|
||||
break;
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_DEV_RESOURCE:
|
||||
blk_mq_request_bypass_insert(rq, false, true);
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
goto out;
|
||||
default:
|
||||
blk_mq_end_request(rq, ret);
|
||||
@@ -2743,7 +2729,16 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
|
||||
|
||||
plug->mq_list = requeue_list;
|
||||
trace_block_unplug(this_hctx->queue, depth, !from_sched);
|
||||
blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, from_sched);
|
||||
|
||||
percpu_ref_get(&this_hctx->queue->q_usage_counter);
|
||||
if (this_hctx->queue->elevator) {
|
||||
this_hctx->queue->elevator->type->ops.insert_requests(this_hctx,
|
||||
&list, 0);
|
||||
blk_mq_run_hw_queue(this_hctx, from_sched);
|
||||
} else {
|
||||
blk_mq_insert_requests(this_hctx, this_ctx, &list, from_sched);
|
||||
}
|
||||
percpu_ref_put(&this_hctx->queue->q_usage_counter);
|
||||
}
|
||||
|
||||
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
@@ -2789,7 +2784,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
} while (!rq_list_empty(plug->mq_list));
|
||||
}
|
||||
|
||||
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list)
|
||||
{
|
||||
int queued = 0;
|
||||
@@ -2807,8 +2802,9 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
break;
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_DEV_RESOURCE:
|
||||
blk_mq_request_bypass_insert(rq, false,
|
||||
list_empty(list));
|
||||
blk_mq_request_bypass_insert(rq, 0);
|
||||
if (list_empty(list))
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
goto out;
|
||||
default:
|
||||
blk_mq_end_request(rq, ret);
|
||||
@@ -2934,6 +2930,7 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
struct blk_plug *plug = blk_mq_plug(bio);
|
||||
const int is_sync = op_is_sync(bio->bi_opf);
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct request *rq;
|
||||
unsigned int nr_segs = 1;
|
||||
blk_status_t ret;
|
||||
@@ -2965,7 +2962,7 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
|
||||
blk_mq_bio_to_request(rq, bio, nr_segs);
|
||||
|
||||
ret = blk_crypto_init_request(rq);
|
||||
ret = blk_crypto_rq_get_keyslot(rq);
|
||||
if (ret != BLK_STS_OK) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
@@ -2978,15 +2975,19 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
return;
|
||||
}
|
||||
|
||||
if (plug)
|
||||
if (plug) {
|
||||
blk_add_rq_to_plug(plug, rq);
|
||||
else if ((rq->rq_flags & RQF_ELV) ||
|
||||
(rq->mq_hctx->dispatch_busy &&
|
||||
(q->nr_hw_queues == 1 || !is_sync)))
|
||||
blk_mq_sched_insert_request(rq, false, true, true);
|
||||
else
|
||||
blk_mq_run_dispatch_ops(rq->q,
|
||||
blk_mq_try_issue_directly(rq->mq_hctx, rq));
|
||||
return;
|
||||
}
|
||||
|
||||
hctx = rq->mq_hctx;
|
||||
if ((rq->rq_flags & RQF_ELV) ||
|
||||
(hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {
|
||||
blk_mq_insert_request(rq, 0);
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
} else {
|
||||
blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_MQ_STACKING
|
||||
@@ -3034,8 +3035,9 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
|
||||
if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (blk_crypto_insert_cloned_request(rq))
|
||||
return BLK_STS_IOERR;
|
||||
ret = blk_crypto_rq_get_keyslot(rq);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
|
||||
blk_account_io_start(rq);
|
||||
|
||||
@@ -4206,14 +4208,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
/* mark the queue as mq asap */
|
||||
q->mq_ops = set->ops;
|
||||
|
||||
q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
|
||||
blk_mq_poll_stats_bkt,
|
||||
BLK_MQ_POLL_STATS_BKTS, q);
|
||||
if (!q->poll_cb)
|
||||
goto err_exit;
|
||||
|
||||
if (blk_mq_alloc_ctxs(q))
|
||||
goto err_poll;
|
||||
goto err_exit;
|
||||
|
||||
/* init q->mq_kobj and sw queues' kobjects */
|
||||
blk_mq_sysfs_init(q);
|
||||
@@ -4241,11 +4237,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
|
||||
q->nr_requests = set->queue_depth;
|
||||
|
||||
/*
|
||||
* Default to classic polling
|
||||
*/
|
||||
q->poll_nsec = BLK_MQ_POLL_CLASSIC;
|
||||
|
||||
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
|
||||
blk_mq_add_queue_tag_set(set, q);
|
||||
blk_mq_map_swqueue(q);
|
||||
@@ -4253,9 +4244,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
|
||||
err_hctxs:
|
||||
blk_mq_release(q);
|
||||
err_poll:
|
||||
blk_stat_free_callback(q->poll_cb);
|
||||
q->poll_cb = NULL;
|
||||
err_exit:
|
||||
q->mq_ops = NULL;
|
||||
return -ENOMEM;
|
||||
@@ -4752,138 +4740,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
|
||||
|
||||
/* Enable polling stats and return whether they were already enabled. */
|
||||
static bool blk_poll_stats_enable(struct request_queue *q)
|
||||
{
|
||||
if (q->poll_stat)
|
||||
return true;
|
||||
|
||||
return blk_stats_alloc_enable(q);
|
||||
}
|
||||
|
||||
static void blk_mq_poll_stats_start(struct request_queue *q)
|
||||
{
|
||||
/*
|
||||
* We don't arm the callback if polling stats are not enabled or the
|
||||
* callback is already active.
|
||||
*/
|
||||
if (!q->poll_stat || blk_stat_is_active(q->poll_cb))
|
||||
return;
|
||||
|
||||
blk_stat_activate_msecs(q->poll_cb, 100);
|
||||
}
|
||||
|
||||
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
|
||||
{
|
||||
struct request_queue *q = cb->data;
|
||||
int bucket;
|
||||
|
||||
for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
|
||||
if (cb->stat[bucket].nr_samples)
|
||||
q->poll_stat[bucket] = cb->stat[bucket];
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
unsigned long ret = 0;
|
||||
int bucket;
|
||||
|
||||
/*
|
||||
* If stats collection isn't on, don't sleep but turn it on for
|
||||
* future users
|
||||
*/
|
||||
if (!blk_poll_stats_enable(q))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* As an optimistic guess, use half of the mean service time
|
||||
* for this type of request. We can (and should) make this smarter.
|
||||
* For instance, if the completion latencies are tight, we can
|
||||
* get closer than just half the mean. This is especially
|
||||
* important on devices where the completion latencies are longer
|
||||
* than ~10 usec. We do use the stats for the relevant IO size
|
||||
* if available which does lead to better estimates.
|
||||
*/
|
||||
bucket = blk_mq_poll_stats_bkt(rq);
|
||||
if (bucket < 0)
|
||||
return ret;
|
||||
|
||||
if (q->poll_stat[bucket].nr_samples)
|
||||
ret = (q->poll_stat[bucket].mean + 1) / 2;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, qc);
|
||||
struct request *rq = blk_qc_to_rq(hctx, qc);
|
||||
struct hrtimer_sleeper hs;
|
||||
enum hrtimer_mode mode;
|
||||
unsigned int nsecs;
|
||||
ktime_t kt;
|
||||
|
||||
/*
|
||||
* If a request has completed on queue that uses an I/O scheduler, we
|
||||
* won't get back a request from blk_qc_to_rq.
|
||||
*/
|
||||
if (!rq || (rq->rq_flags & RQF_MQ_POLL_SLEPT))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we get here, hybrid polling is enabled. Hence poll_nsec can be:
|
||||
*
|
||||
* 0: use half of prev avg
|
||||
* >0: use this specific value
|
||||
*/
|
||||
if (q->poll_nsec > 0)
|
||||
nsecs = q->poll_nsec;
|
||||
else
|
||||
nsecs = blk_mq_poll_nsecs(q, rq);
|
||||
|
||||
if (!nsecs)
|
||||
return false;
|
||||
|
||||
rq->rq_flags |= RQF_MQ_POLL_SLEPT;
|
||||
|
||||
/*
|
||||
* This will be replaced with the stats tracking code, using
|
||||
* 'avg_completion_time / 2' as the pre-sleep target.
|
||||
*/
|
||||
kt = nsecs;
|
||||
|
||||
mode = HRTIMER_MODE_REL;
|
||||
hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
|
||||
hrtimer_set_expires(&hs.timer, kt);
|
||||
|
||||
do {
|
||||
if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
|
||||
break;
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
hrtimer_sleeper_start_expires(&hs, mode);
|
||||
if (hs.task)
|
||||
io_schedule();
|
||||
hrtimer_cancel(&hs.timer);
|
||||
mode = HRTIMER_MODE_ABS;
|
||||
} while (hs.task && !signal_pending(current));
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
destroy_hrtimer_on_stack(&hs.timer);
|
||||
|
||||
/*
|
||||
* If we sleep, have the caller restart the poll loop to reset the
|
||||
* state. Like for the other success return cases, the caller is
|
||||
* responsible for checking if the IO completed. If the IO isn't
|
||||
* complete, we'll get called again and will go straight to the busy
|
||||
* poll loop.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
|
||||
struct io_comp_batch *iob, unsigned int flags)
|
||||
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
|
||||
long state = get_current_state();
|
||||
@@ -4910,17 +4768,6 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
|
||||
unsigned int flags)
|
||||
{
|
||||
if (!(flags & BLK_POLL_NOSLEEP) &&
|
||||
q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
|
||||
if (blk_mq_poll_hybrid(q, cookie))
|
||||
return 1;
|
||||
}
|
||||
return blk_mq_poll_classic(q, cookie, iob, flags);
|
||||
}
|
||||
|
||||
unsigned int blk_mq_rq_cpu(struct request *rq)
|
||||
{
|
||||
return rq->mq_ctx->cpu;
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
#ifndef INT_BLK_MQ_H
|
||||
#define INT_BLK_MQ_H
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include "blk-stat.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
struct blk_mq_tag_set;
|
||||
|
||||
@@ -30,6 +30,15 @@ struct blk_mq_ctx {
|
||||
struct kobject kobj;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
enum {
|
||||
BLK_MQ_NO_TAG = -1U,
|
||||
BLK_MQ_TAG_MIN = 1,
|
||||
BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1,
|
||||
};
|
||||
|
||||
typedef unsigned int __bitwise blk_insert_t;
|
||||
#define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01)
|
||||
|
||||
void blk_mq_submit_bio(struct bio *bio);
|
||||
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
|
||||
unsigned int flags);
|
||||
@@ -38,8 +47,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
|
||||
void blk_mq_wake_waiters(struct request_queue *q);
|
||||
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
|
||||
unsigned int);
|
||||
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
|
||||
bool kick_requeue_list);
|
||||
void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags);
|
||||
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
|
||||
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *start);
|
||||
@@ -59,14 +67,7 @@ void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
|
||||
/*
|
||||
* Internal helpers for request insertion into sw queues
|
||||
*/
|
||||
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head);
|
||||
void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
|
||||
bool run_queue);
|
||||
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct list_head *list);
|
||||
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list);
|
||||
void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags);
|
||||
|
||||
/*
|
||||
* CPU -> queue mappings
|
||||
@@ -164,6 +165,60 @@ struct blk_mq_alloc_data {
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
};
|
||||
|
||||
struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
|
||||
unsigned int reserved_tags, int node, int alloc_policy);
|
||||
void blk_mq_free_tags(struct blk_mq_tags *tags);
|
||||
int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
|
||||
struct sbitmap_queue *breserved_tags, unsigned int queue_depth,
|
||||
unsigned int reserved, int node, int alloc_policy);
|
||||
|
||||
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
|
||||
unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
|
||||
unsigned int *offset);
|
||||
void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
|
||||
unsigned int tag);
|
||||
void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags);
|
||||
int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_tags **tags, unsigned int depth, bool can_grow);
|
||||
void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set,
|
||||
unsigned int size);
|
||||
void blk_mq_tag_update_sched_shared_tags(struct request_queue *q);
|
||||
|
||||
void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
|
||||
void *priv);
|
||||
|
||||
static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
|
||||
struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!hctx)
|
||||
return &bt->ws[0];
|
||||
return sbq_wait_ptr(bt, &hctx->wait_index);
|
||||
}
|
||||
|
||||
void __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
|
||||
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
|
||||
|
||||
static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
|
||||
__blk_mq_tag_busy(hctx);
|
||||
}
|
||||
|
||||
static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
|
||||
__blk_mq_tag_idle(hctx);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
|
||||
unsigned int tag)
|
||||
{
|
||||
return tag < tags->nr_reserved_tags;
|
||||
}
|
||||
|
||||
static inline bool blk_mq_is_shared_tags(unsigned int flags)
|
||||
{
|
||||
return flags & BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-pm.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
/**
|
||||
* blk_pm_runtime_init - Block layer runtime PM initialization routine
|
||||
|
||||
@@ -74,7 +74,7 @@ static inline struct rq_qos *wbt_rq_qos(struct request_queue *q)
|
||||
return rq_qos_id(q, RQ_QOS_WBT);
|
||||
}
|
||||
|
||||
static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q)
|
||||
static inline struct rq_qos *iolat_rq_qos(struct request_queue *q)
|
||||
{
|
||||
return rq_qos_id(q, RQ_QOS_LATENCY);
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#include "blk-stat.h"
|
||||
#include "blk-mq.h"
|
||||
@@ -190,7 +189,7 @@ void blk_stat_disable_accounting(struct request_queue *q)
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->stats->lock, flags);
|
||||
if (!--q->stats->accounting)
|
||||
if (!--q->stats->accounting && list_empty(&q->stats->callbacks))
|
||||
blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock_irqrestore(&q->stats->lock, flags);
|
||||
}
|
||||
@@ -201,7 +200,7 @@ void blk_stat_enable_accounting(struct request_queue *q)
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->stats->lock, flags);
|
||||
if (!q->stats->accounting++)
|
||||
if (!q->stats->accounting++ && list_empty(&q->stats->callbacks))
|
||||
blk_queue_flag_set(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock_irqrestore(&q->stats->lock, flags);
|
||||
}
|
||||
@@ -231,21 +230,3 @@ void blk_free_queue_stats(struct blk_queue_stats *stats)
|
||||
|
||||
kfree(stats);
|
||||
}
|
||||
|
||||
bool blk_stats_alloc_enable(struct request_queue *q)
|
||||
{
|
||||
struct blk_rq_stat *poll_stat;
|
||||
|
||||
poll_stat = kcalloc(BLK_MQ_POLL_STATS_BKTS, sizeof(*poll_stat),
|
||||
GFP_ATOMIC);
|
||||
if (!poll_stat)
|
||||
return false;
|
||||
|
||||
if (cmpxchg(&q->poll_stat, NULL, poll_stat) != NULL) {
|
||||
kfree(poll_stat);
|
||||
return true;
|
||||
}
|
||||
|
||||
blk_stat_add_callback(q, q->poll_cb);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/blktrace_api.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include "blk.h"
|
||||
@@ -408,35 +407,12 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
|
||||
|
||||
static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
|
||||
{
|
||||
int val;
|
||||
|
||||
if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
|
||||
val = BLK_MQ_POLL_CLASSIC;
|
||||
else
|
||||
val = q->poll_nsec / 1000;
|
||||
|
||||
return sprintf(page, "%d\n", val);
|
||||
return sprintf(page, "%d\n", -1);
|
||||
}
|
||||
|
||||
static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
int err, val;
|
||||
|
||||
if (!q->mq_ops || !q->mq_ops->poll)
|
||||
return -EINVAL;
|
||||
|
||||
err = kstrtoint(page, 10, &val);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
if (val == BLK_MQ_POLL_CLASSIC)
|
||||
q->poll_nsec = BLK_MQ_POLL_CLASSIC;
|
||||
else if (val >= 0)
|
||||
q->poll_nsec = val * 1000;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
@@ -1368,9 +1368,11 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
|
||||
int ret;
|
||||
u64 v;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_finish;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (sscanf(ctx.body, "%llu", &v) != 1)
|
||||
@@ -1389,7 +1391,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
|
||||
tg_conf_updated(tg, false);
|
||||
ret = 0;
|
||||
out_finish:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
@@ -1561,9 +1563,11 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
|
||||
int ret;
|
||||
int index = of_cft(of)->private;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
|
||||
blkg_conf_init(&ctx, buf);
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_finish;
|
||||
|
||||
tg = blkg_to_tg(ctx.blkg);
|
||||
tg_update_carryover(tg);
|
||||
@@ -1662,7 +1666,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
|
||||
tg->td->limit_valid[LIMIT_LOW]);
|
||||
ret = 0;
|
||||
out_finish:
|
||||
blkg_conf_finish(&ctx);
|
||||
blkg_conf_exit(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
@@ -2439,11 +2443,12 @@ void blk_throtl_register(struct gendisk *disk)
|
||||
#ifndef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
/* if no low limit, use previous default */
|
||||
td->throtl_slice = DFL_THROTL_SLICE_HD;
|
||||
#endif
|
||||
|
||||
#else
|
||||
td->track_bio_latency = !queue_is_mq(q);
|
||||
if (!td->track_bio_latency)
|
||||
blk_stat_enable_accounting(q);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
|
||||
@@ -399,12 +399,6 @@ static inline struct bio *blk_queue_bounce(struct bio *bio,
|
||||
return bio;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP_IOLATENCY
|
||||
int blk_iolatency_init(struct gendisk *disk);
|
||||
#else
|
||||
static inline int blk_iolatency_init(struct gendisk *disk) { return 0; };
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
void disk_free_zone_bitmaps(struct gendisk *disk);
|
||||
void disk_clear_zone_settings(struct gendisk *disk);
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include "blk-mq.h"
|
||||
|
||||
struct io_cq;
|
||||
struct elevator_type;
|
||||
@@ -37,7 +38,8 @@ struct elevator_mq_ops {
|
||||
void (*limit_depth)(blk_opf_t, struct blk_mq_alloc_data *);
|
||||
void (*prepare_request)(struct request *);
|
||||
void (*finish_request)(struct request *);
|
||||
void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
|
||||
void (*insert_requests)(struct blk_mq_hw_ctx *hctx, struct list_head *list,
|
||||
blk_insert_t flags);
|
||||
struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
|
||||
bool (*has_work)(struct blk_mq_hw_ctx *);
|
||||
void (*completed_request)(struct request *, u64);
|
||||
|
||||
@@ -426,6 +426,9 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
*/
|
||||
elevator_init_mq(disk->queue);
|
||||
|
||||
/* Mark bdev as having a submit_bio, if needed */
|
||||
disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
|
||||
|
||||
/*
|
||||
* If the driver provides an explicit major number it also must provide
|
||||
* the number of minors numbers supported, and those will be used to
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sbitmap.h>
|
||||
|
||||
@@ -19,7 +18,6 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/kyber.h>
|
||||
@@ -590,7 +588,8 @@ static void kyber_prepare_request(struct request *rq)
|
||||
}
|
||||
|
||||
static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *rq_list, bool at_head)
|
||||
struct list_head *rq_list,
|
||||
blk_insert_t flags)
|
||||
{
|
||||
struct kyber_hctx_data *khd = hctx->sched_data;
|
||||
struct request *rq, *next;
|
||||
@@ -602,7 +601,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
spin_lock(&kcq->lock);
|
||||
trace_block_rq_insert(rq);
|
||||
if (at_head)
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD)
|
||||
list_move(&rq->queuelist, head);
|
||||
else
|
||||
list_move_tail(&rq->queuelist, head);
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
@@ -23,7 +22,6 @@
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
/*
|
||||
@@ -768,7 +766,7 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
* add rq to rbtree and fifo
|
||||
*/
|
||||
static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head)
|
||||
blk_insert_t flags)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
@@ -801,7 +799,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
if (at_head) {
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD) {
|
||||
list_add(&rq->queuelist, &per_prio->dispatch);
|
||||
rq->fifo_time = jiffies;
|
||||
} else {
|
||||
@@ -822,10 +820,11 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from blk_mq_sched_insert_request() or blk_mq_sched_insert_requests().
|
||||
* Called from blk_mq_insert_request() or blk_mq_dispatch_plug_list().
|
||||
*/
|
||||
static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list, bool at_head)
|
||||
struct list_head *list,
|
||||
blk_insert_t flags)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
@@ -836,7 +835,7 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
rq = list_first_entry(list, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
dd_insert_request(hctx, rq, at_head);
|
||||
dd_insert_request(hctx, rq, flags);
|
||||
}
|
||||
spin_unlock(&dd->lock);
|
||||
}
|
||||
|
||||
@@ -86,6 +86,15 @@ enum opal_response_token {
|
||||
#define OPAL_MSID_KEYLEN 15
|
||||
#define OPAL_UID_LENGTH_HALF 4
|
||||
|
||||
/*
|
||||
* Boolean operators from TCG Core spec 2.01 Section:
|
||||
* 5.1.3.11
|
||||
* Table 61
|
||||
*/
|
||||
#define OPAL_BOOLEAN_AND 0
|
||||
#define OPAL_BOOLEAN_OR 1
|
||||
#define OPAL_BOOLEAN_NOT 2
|
||||
|
||||
/* Enum to index OPALUID array */
|
||||
enum opal_uid {
|
||||
/* users */
|
||||
@@ -105,6 +114,7 @@ enum opal_uid {
|
||||
/* tables */
|
||||
OPAL_TABLE_TABLE,
|
||||
OPAL_LOCKINGRANGE_GLOBAL,
|
||||
OPAL_LOCKINGRANGE_ACE_START_TO_KEY,
|
||||
OPAL_LOCKINGRANGE_ACE_RDLOCKED,
|
||||
OPAL_LOCKINGRANGE_ACE_WRLOCKED,
|
||||
OPAL_MBRCONTROL,
|
||||
|
||||
330
block/sed-opal.c
330
block/sed-opal.c
@@ -83,8 +83,10 @@ struct opal_dev {
|
||||
u16 comid;
|
||||
u32 hsn;
|
||||
u32 tsn;
|
||||
u64 align;
|
||||
u64 align; /* alignment granularity */
|
||||
u64 lowest_lba;
|
||||
u32 logical_block_size;
|
||||
u8 align_required; /* ALIGN: 0 or 1 */
|
||||
|
||||
size_t pos;
|
||||
u8 *cmd;
|
||||
@@ -132,6 +134,8 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
|
||||
{ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 },
|
||||
[OPAL_LOCKINGRANGE_GLOBAL] =
|
||||
{ 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 },
|
||||
[OPAL_LOCKINGRANGE_ACE_START_TO_KEY] =
|
||||
{ 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xD0, 0x01 },
|
||||
[OPAL_LOCKINGRANGE_ACE_RDLOCKED] =
|
||||
{ 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xE0, 0x01 },
|
||||
[OPAL_LOCKINGRANGE_ACE_WRLOCKED] =
|
||||
@@ -407,6 +411,8 @@ static void check_geometry(struct opal_dev *dev, const void *data)
|
||||
|
||||
dev->align = be64_to_cpu(geo->alignment_granularity);
|
||||
dev->lowest_lba = be64_to_cpu(geo->lowest_aligned_lba);
|
||||
dev->logical_block_size = be32_to_cpu(geo->logical_block_size);
|
||||
dev->align_required = geo->reserved01 & 1;
|
||||
}
|
||||
|
||||
static int execute_step(struct opal_dev *dev,
|
||||
@@ -1147,12 +1153,8 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
|
||||
return opal_send_recv(dev, cont);
|
||||
}
|
||||
|
||||
/*
|
||||
* request @column from table @table on device @dev. On success, the column
|
||||
* data will be available in dev->resp->tok[4]
|
||||
*/
|
||||
static int generic_get_column(struct opal_dev *dev, const u8 *table,
|
||||
u64 column)
|
||||
static int generic_get_columns(struct opal_dev *dev, const u8 *table,
|
||||
u64 start_column, u64 end_column)
|
||||
{
|
||||
int err;
|
||||
|
||||
@@ -1162,12 +1164,12 @@ static int generic_get_column(struct opal_dev *dev, const u8 *table,
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_STARTCOLUMN);
|
||||
add_token_u64(&err, dev, column);
|
||||
add_token_u64(&err, dev, start_column);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_u8(&err, dev, OPAL_ENDCOLUMN);
|
||||
add_token_u64(&err, dev, column);
|
||||
add_token_u64(&err, dev, end_column);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
@@ -1178,6 +1180,16 @@ static int generic_get_column(struct opal_dev *dev, const u8 *table,
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
/*
|
||||
* request @column from table @table on device @dev. On success, the column
|
||||
* data will be available in dev->resp->tok[4]
|
||||
*/
|
||||
static int generic_get_column(struct opal_dev *dev, const u8 *table,
|
||||
u64 column)
|
||||
{
|
||||
return generic_get_columns(dev, table, column, column);
|
||||
}
|
||||
|
||||
/*
|
||||
* see TCG SAS 5.3.2.3 for a description of the available columns
|
||||
*
|
||||
@@ -1437,6 +1449,129 @@ static int setup_locking_range(struct opal_dev *dev, void *data)
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
static int response_get_column(const struct parsed_resp *resp,
|
||||
int *iter,
|
||||
u8 column,
|
||||
u64 *value)
|
||||
{
|
||||
const struct opal_resp_tok *tok;
|
||||
int n = *iter;
|
||||
u64 val;
|
||||
|
||||
tok = response_get_token(resp, n);
|
||||
if (IS_ERR(tok))
|
||||
return PTR_ERR(tok);
|
||||
|
||||
if (!response_token_matches(tok, OPAL_STARTNAME)) {
|
||||
pr_debug("Unexpected response token type %d.\n", n);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
n++;
|
||||
|
||||
if (response_get_u64(resp, n) != column) {
|
||||
pr_debug("Token %d does not match expected column %u.\n",
|
||||
n, column);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
n++;
|
||||
|
||||
val = response_get_u64(resp, n);
|
||||
n++;
|
||||
|
||||
tok = response_get_token(resp, n);
|
||||
if (IS_ERR(tok))
|
||||
return PTR_ERR(tok);
|
||||
|
||||
if (!response_token_matches(tok, OPAL_ENDNAME)) {
|
||||
pr_debug("Unexpected response token type %d.\n", n);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
n++;
|
||||
|
||||
*value = val;
|
||||
*iter = n;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int locking_range_status(struct opal_dev *dev, void *data)
|
||||
{
|
||||
u8 lr_buffer[OPAL_UID_LENGTH];
|
||||
u64 resp;
|
||||
bool rlocked, wlocked;
|
||||
int err, tok_n = 2;
|
||||
struct opal_lr_status *lrst = data;
|
||||
|
||||
err = build_locking_range(lr_buffer, sizeof(lr_buffer),
|
||||
lrst->session.opal_key.lr);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = generic_get_columns(dev, lr_buffer, OPAL_RANGESTART,
|
||||
OPAL_WRITELOCKED);
|
||||
if (err) {
|
||||
pr_debug("Couldn't get lr %u table columns %d to %d.\n",
|
||||
lrst->session.opal_key.lr, OPAL_RANGESTART,
|
||||
OPAL_WRITELOCKED);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* range start */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_RANGESTART,
|
||||
&lrst->range_start);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* range length */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_RANGELENGTH,
|
||||
&lrst->range_length);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* RLE */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_READLOCKENABLED,
|
||||
&resp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
lrst->RLE = !!resp;
|
||||
|
||||
/* WLE */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_WRITELOCKENABLED,
|
||||
&resp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
lrst->WLE = !!resp;
|
||||
|
||||
/* read locked */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_READLOCKED, &resp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
rlocked = !!resp;
|
||||
|
||||
/* write locked */
|
||||
err = response_get_column(&dev->parsed, &tok_n, OPAL_WRITELOCKED, &resp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
wlocked = !!resp;
|
||||
|
||||
/* opal_lock_state can not map 'read locked' only state. */
|
||||
lrst->l_state = OPAL_RW;
|
||||
if (rlocked && wlocked)
|
||||
lrst->l_state = OPAL_LK;
|
||||
else if (wlocked)
|
||||
lrst->l_state = OPAL_RO;
|
||||
else if (rlocked) {
|
||||
pr_debug("Can not report read locked only state.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int start_generic_opal_session(struct opal_dev *dev,
|
||||
enum opal_uid auth,
|
||||
enum opal_uid sp_type,
|
||||
@@ -1759,25 +1894,43 @@ static int set_sid_cpin_pin(struct opal_dev *dev, void *data)
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
static int add_user_to_lr(struct opal_dev *dev, void *data)
|
||||
static void add_authority_object_ref(int *err,
|
||||
struct opal_dev *dev,
|
||||
const u8 *uid,
|
||||
size_t uid_len)
|
||||
{
|
||||
add_token_u8(err, dev, OPAL_STARTNAME);
|
||||
add_token_bytestring(err, dev,
|
||||
opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF],
|
||||
OPAL_UID_LENGTH/2);
|
||||
add_token_bytestring(err, dev, uid, uid_len);
|
||||
add_token_u8(err, dev, OPAL_ENDNAME);
|
||||
}
|
||||
|
||||
static void add_boolean_object_ref(int *err,
|
||||
struct opal_dev *dev,
|
||||
u8 boolean_op)
|
||||
{
|
||||
add_token_u8(err, dev, OPAL_STARTNAME);
|
||||
add_token_bytestring(err, dev, opaluid[OPAL_HALF_UID_BOOLEAN_ACE],
|
||||
OPAL_UID_LENGTH/2);
|
||||
add_token_u8(err, dev, boolean_op);
|
||||
add_token_u8(err, dev, OPAL_ENDNAME);
|
||||
}
|
||||
|
||||
static int set_lr_boolean_ace(struct opal_dev *dev,
|
||||
unsigned int opal_uid,
|
||||
u8 lr,
|
||||
const u8 *users,
|
||||
size_t users_len)
|
||||
{
|
||||
u8 lr_buffer[OPAL_UID_LENGTH];
|
||||
u8 user_uid[OPAL_UID_LENGTH];
|
||||
struct opal_lock_unlock *lkul = data;
|
||||
u8 u;
|
||||
int err;
|
||||
|
||||
memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED],
|
||||
OPAL_UID_LENGTH);
|
||||
|
||||
if (lkul->l_state == OPAL_RW)
|
||||
memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_WRLOCKED],
|
||||
OPAL_UID_LENGTH);
|
||||
|
||||
lr_buffer[7] = lkul->session.opal_key.lr;
|
||||
|
||||
memcpy(user_uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH);
|
||||
|
||||
user_uid[7] = lkul->session.who;
|
||||
memcpy(lr_buffer, opaluid[opal_uid], OPAL_UID_LENGTH);
|
||||
lr_buffer[7] = lr;
|
||||
|
||||
err = cmd_start(dev, lr_buffer, opalmethod[OPAL_SET]);
|
||||
|
||||
@@ -1790,35 +1943,49 @@ static int add_user_to_lr(struct opal_dev *dev, void *data)
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTLIST);
|
||||
|
||||
for (u = 0; u < users_len; u++) {
|
||||
if (users[u] == OPAL_ADMIN1)
|
||||
memcpy(user_uid, opaluid[OPAL_ADMIN1_UID],
|
||||
OPAL_UID_LENGTH);
|
||||
else {
|
||||
memcpy(user_uid, opaluid[OPAL_USER1_UID],
|
||||
OPAL_UID_LENGTH);
|
||||
user_uid[7] = users[u];
|
||||
}
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_bytestring(&err, dev,
|
||||
opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF],
|
||||
OPAL_UID_LENGTH/2);
|
||||
add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_bytestring(&err, dev,
|
||||
opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF],
|
||||
OPAL_UID_LENGTH/2);
|
||||
add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
|
||||
add_token_u8(&err, dev, OPAL_STARTNAME);
|
||||
add_token_bytestring(&err, dev, opaluid[OPAL_HALF_UID_BOOLEAN_ACE],
|
||||
OPAL_UID_LENGTH/2);
|
||||
add_token_u8(&err, dev, 1);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
add_authority_object_ref(&err, dev, user_uid, sizeof(user_uid));
|
||||
|
||||
/*
|
||||
* Add boolean operator in postfix only with
|
||||
* two or more authorities being added in ACE
|
||||
* expresion.
|
||||
* */
|
||||
if (u > 0)
|
||||
add_boolean_object_ref(&err, dev, OPAL_BOOLEAN_OR);
|
||||
}
|
||||
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int add_user_to_lr(struct opal_dev *dev, void *data)
|
||||
{
|
||||
int err;
|
||||
struct opal_lock_unlock *lkul = data;
|
||||
const u8 users[] = {
|
||||
lkul->session.who
|
||||
};
|
||||
|
||||
err = set_lr_boolean_ace(dev,
|
||||
lkul->l_state == OPAL_RW ?
|
||||
OPAL_LOCKINGRANGE_ACE_WRLOCKED :
|
||||
OPAL_LOCKINGRANGE_ACE_RDLOCKED,
|
||||
lkul->session.opal_key.lr, users,
|
||||
ARRAY_SIZE(users));
|
||||
if (err) {
|
||||
pr_debug("Error building add user to locking range command.\n");
|
||||
return err;
|
||||
@@ -1827,6 +1994,27 @@ static int add_user_to_lr(struct opal_dev *dev, void *data)
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
static int add_user_to_lr_ace(struct opal_dev *dev, void *data)
|
||||
{
|
||||
int err;
|
||||
struct opal_lock_unlock *lkul = data;
|
||||
const u8 users[] = {
|
||||
OPAL_ADMIN1,
|
||||
lkul->session.who
|
||||
};
|
||||
|
||||
err = set_lr_boolean_ace(dev, OPAL_LOCKINGRANGE_ACE_START_TO_KEY,
|
||||
lkul->session.opal_key.lr, users,
|
||||
ARRAY_SIZE(users));
|
||||
|
||||
if (err) {
|
||||
pr_debug("Error building add user to locking ranges ACEs.\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
|
||||
{
|
||||
u8 lr_buffer[OPAL_UID_LENGTH];
|
||||
@@ -2364,6 +2552,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
|
||||
const struct opal_step steps[] = {
|
||||
{ start_admin1LSP_opal_session, &lk_unlk->session.opal_key },
|
||||
{ add_user_to_lr, lk_unlk },
|
||||
{ add_user_to_lr_ace, lk_unlk },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret;
|
||||
@@ -2580,6 +2769,33 @@ static int opal_setup_locking_range(struct opal_dev *dev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_locking_range_status(struct opal_dev *dev,
|
||||
struct opal_lr_status *opal_lrst,
|
||||
void __user *data)
|
||||
{
|
||||
const struct opal_step lr_steps[] = {
|
||||
{ start_auth_opal_session, &opal_lrst->session },
|
||||
{ locking_range_status, opal_lrst },
|
||||
{ end_opal_session, }
|
||||
};
|
||||
int ret;
|
||||
|
||||
mutex_lock(&dev->dev_lock);
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
/* skip session info when copying back to uspace */
|
||||
if (!ret && copy_to_user(data + offsetof(struct opal_lr_status, range_start),
|
||||
(void *)opal_lrst + offsetof(struct opal_lr_status, range_start),
|
||||
sizeof(*opal_lrst) - offsetof(struct opal_lr_status, range_start))) {
|
||||
pr_debug("Error copying status to userspace\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
|
||||
{
|
||||
const struct opal_step pw_steps[] = {
|
||||
@@ -2744,6 +2960,26 @@ static int opal_get_status(struct opal_dev *dev, void __user *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int opal_get_geometry(struct opal_dev *dev, void __user *data)
|
||||
{
|
||||
struct opal_geometry geo = {0};
|
||||
|
||||
if (check_opal_support(dev))
|
||||
return -EINVAL;
|
||||
|
||||
geo.align = dev->align_required;
|
||||
geo.logical_block_size = dev->logical_block_size;
|
||||
geo.alignment_granularity = dev->align;
|
||||
geo.lowest_aligned_lba = dev->lowest_lba;
|
||||
|
||||
if (copy_to_user(data, &geo, sizeof(geo))) {
|
||||
pr_debug("Error copying geometry data to userspace\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||
{
|
||||
void *p;
|
||||
@@ -2814,6 +3050,12 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
|
||||
case IOC_OPAL_GET_STATUS:
|
||||
ret = opal_get_status(dev, arg);
|
||||
break;
|
||||
case IOC_OPAL_GET_LR_STATUS:
|
||||
ret = opal_locking_range_status(dev, p, arg);
|
||||
break;
|
||||
case IOC_OPAL_GET_GEOMETRY:
|
||||
ret = opal_get_geometry(dev, arg);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user