Merge tag 'for-6.6/block-2023-08-28' of git://git.kernel.dk/linux

Pull block updates from Jens Axboe:
 "Pretty quiet round for this release. This contains:

   - Add support for zoned storage to ublk (Andreas, Ming)

   - Series improving performance for drivers that mark themselves as
     needing a blocking context for issue (Bart)

   - Cleanup the flush logic (Chengming)

   - sed opal keyring support (Greg)

   - Fixes and improvements to the integrity support (Jinyoung)

   - Add some exports for bcachefs that we can hopefully delete again in
     the future (Kent)

   - deadline throttling fix (Zhiguo)

   - Series allowing building the kernel without buffer_head support
     (Christoph)

   - Sanitize the bio page adding flow (Christoph)

   - Write back cache fixes (Christoph)

   - MD updates via Song:
      - Fix perf regression for raid0 large sequential writes (Jan)
      - Fix split bio iostat for raid0 (David)
      - Various raid1 fixes (Heinz, Xueshi)
      - raid6test build fixes (WANG)
      - Deprecate bitmap file support (Christoph)
      - Fix deadlock with md sync thread (Yu)
      - Refactor md io accounting (Yu)
      - Various non-urgent fixes (Li, Yu, Jack)

   - Various fixes and cleanups (Arnd, Azeem, Chengming, Damien, Li,
     Ming, Nitesh, Ruan, Tejun, Thomas, Xu)"

* tag 'for-6.6/block-2023-08-28' of git://git.kernel.dk/linux: (113 commits)
  block: use strscpy() to instead of strncpy()
  block: sed-opal: keyring support for SED keys
  block: sed-opal: Implement IOC_OPAL_REVERT_LSP
  block: sed-opal: Implement IOC_OPAL_DISCOVERY
  blk-mq: prealloc tags when increase tagset nr_hw_queues
  blk-mq: delete redundant tagset map update when fallback
  blk-mq: fix tags leak when shrink nr_hw_queues
  ublk: zoned: support REQ_OP_ZONE_RESET_ALL
  md: raid0: account for split bio in iostat accounting
  md/raid0: Fix performance regression for large sequential writes
  md/raid0: Factor out helper for mapping and submitting a bio
  md raid1: allow writebehind to work on any leg device set WriteMostly
  md/raid1: hold the barrier until handle_read_error() finishes
  md/raid1: free the r1bio before waiting for blocked rdev
  md/raid1: call free_r1bio() before allow_barrier() in raid_end_bio_io()
  blk-cgroup: Fix NULL deref caused by blkg_policy_data being installed before init
  drivers/rnbd: restore sysfs interface to rnbd-client
  md/raid5-cache: fix null-ptr-deref for r5l_flush_stripe_to_raid()
  raid6: test: only check for Altivec if building on powerpc hosts
  raid6: test: make sure all intermediate and artifact files are .gitignored
  ...
This commit is contained in:
Linus Torvalds
2023-08-29 20:21:42 -07:00
101 changed files with 1685 additions and 846 deletions

View File

@@ -5,6 +5,7 @@
menuconfig BLOCK
bool "Enable the block layer" if EXPERT
default y
select FS_IOMAP
select SBITMAP
help
Provide block layer support for the kernel.
@@ -183,6 +184,8 @@ config BLK_DEBUG_FS_ZONED
config BLK_SED_OPAL
bool "Logic for interfacing with Opal enabled SEDs"
depends on KEYS
select PSERIES_PLPKS if PPC_PSERIES
help
Builds Logic for interfacing with Opal enabled controllers.
Enabling this option enables users to setup/unlock/lock

View File

@@ -123,20 +123,38 @@ void bio_integrity_free(struct bio *bio)
int bio_integrity_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct bio_integrity_payload *bip = bio_integrity(bio);
if (bip->bip_vcnt >= bip->bip_max_vcnt) {
printk(KERN_ERR "%s: bip_vec full\n", __func__);
if (((bip->bip_iter.bi_size + len) >> SECTOR_SHIFT) >
queue_max_hw_sectors(q))
return 0;
}
if (bip->bip_vcnt &&
bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
&bip->bip_vec[bip->bip_vcnt - 1], offset))
return 0;
if (bip->bip_vcnt > 0) {
struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
bool same_page = false;
if (bvec_try_merge_hw_page(q, bv, page, len, offset,
&same_page)) {
bip->bip_iter.bi_size += len;
return len;
}
if (bip->bip_vcnt >=
min(bip->bip_max_vcnt, queue_max_integrity_segments(q)))
return 0;
/*
* If the queue doesn't support SG gaps and adding this segment
* would create a gap, disallow it.
*/
if (bvec_gap_to_prev(&q->limits, bv, offset))
return 0;
}
bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
bip->bip_vcnt++;
bip->bip_iter.bi_size += len;
return len;
}
@@ -199,8 +217,6 @@ bool bio_integrity_prep(struct bio *bio)
unsigned long start, end;
unsigned int len, nr_pages;
unsigned int bytes, offset, i;
unsigned int intervals;
blk_status_t status;
if (!bi)
return true;
@@ -224,12 +240,10 @@ bool bio_integrity_prep(struct bio *bio)
!(bi->flags & BLK_INTEGRITY_GENERATE))
return true;
}
intervals = bio_integrity_intervals(bi, bio_sectors(bio));
/* Allocate kernel buffer for protection data */
len = intervals * bi->tuple_size;
len = bio_integrity_bytes(bi, bio_sectors(bio));
buf = kmalloc(len, GFP_NOIO);
status = BLK_STS_RESOURCE;
if (unlikely(buf == NULL)) {
printk(KERN_ERR "could not allocate integrity buffer\n");
goto err_end_io;
@@ -244,12 +258,10 @@ bool bio_integrity_prep(struct bio *bio)
if (IS_ERR(bip)) {
printk(KERN_ERR "could not allocate data integrity bioset\n");
kfree(buf);
status = BLK_STS_RESOURCE;
goto err_end_io;
}
bip->bip_flags |= BIP_BLOCK_INTEGRITY;
bip->bip_iter.bi_size = len;
bip_set_seed(bip, bio->bi_iter.bi_sector);
if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
@@ -257,28 +269,18 @@ bool bio_integrity_prep(struct bio *bio)
/* Map it */
offset = offset_in_page(buf);
for (i = 0 ; i < nr_pages ; i++) {
int ret;
for (i = 0; i < nr_pages && len > 0; i++) {
bytes = PAGE_SIZE - offset;
if (len <= 0)
break;
if (bytes > len)
bytes = len;
ret = bio_integrity_add_page(bio, virt_to_page(buf),
bytes, offset);
if (ret == 0) {
if (bio_integrity_add_page(bio, virt_to_page(buf),
bytes, offset) < bytes) {
printk(KERN_ERR "could not attach integrity payload\n");
status = BLK_STS_RESOURCE;
goto err_end_io;
}
if (ret < bytes)
break;
buf += bytes;
len -= bytes;
offset = 0;
@@ -294,10 +296,9 @@ bool bio_integrity_prep(struct bio *bio)
return true;
err_end_io:
bio->bi_status = status;
bio->bi_status = BLK_STS_RESOURCE;
bio_endio(bio);
return false;
}
EXPORT_SYMBOL(bio_integrity_prep);

View File

@@ -606,15 +606,15 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
}
EXPORT_SYMBOL(bio_kmalloc);
void zero_fill_bio(struct bio *bio)
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
{
struct bio_vec bv;
struct bvec_iter iter;
bio_for_each_segment(bv, bio, iter)
__bio_for_each_segment(bv, bio, iter, start)
memzero_bvec(&bv);
}
EXPORT_SYMBOL(zero_fill_bio);
EXPORT_SYMBOL(zero_fill_bio_iter);
/**
* bio_truncate - truncate the bio to small size of @new_size
@@ -903,9 +903,8 @@ static inline bool bio_full(struct bio *bio, unsigned len)
return false;
}
static inline bool page_is_mergeable(const struct bio_vec *bv,
struct page *page, unsigned int len, unsigned int off,
bool *same_page)
static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
unsigned int len, unsigned int off, bool *same_page)
{
size_t bv_end = bv->bv_offset + bv->bv_len;
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
@@ -919,49 +918,15 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
return false;
*same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
if (*same_page)
return true;
else if (IS_ENABLED(CONFIG_KMSAN))
return false;
return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
}
/**
* __bio_try_merge_page - try appending data to an existing bvec.
* @bio: destination bio
* @page: start page to add
* @len: length of the data to add
* @off: offset of the data relative to @page
* @same_page: return if the segment has been merged inside the same page
*
* Try to add the data at @page + @off to the last bvec of @bio. This is a
* useful optimisation for file systems with a block size smaller than the
* page size.
*
* Warn if (@len, @off) crosses pages in case that @same_page is true.
*
* Return %true on success or %false on failure.
*/
static bool __bio_try_merge_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off, bool *same_page)
{
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return false;
if (bio->bi_vcnt > 0) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (page_is_mergeable(bv, page, len, off, same_page)) {
if (bio->bi_iter.bi_size > UINT_MAX - len) {
*same_page = false;
return false;
}
bv->bv_len += len;
bio->bi_iter.bi_size += len;
return true;
}
if (!*same_page) {
if (IS_ENABLED(CONFIG_KMSAN))
return false;
if (bv->bv_page + bv_end / PAGE_SIZE != page + off / PAGE_SIZE)
return false;
}
return false;
bv->bv_len += len;
return true;
}
/*
@@ -969,11 +934,10 @@ static bool __bio_try_merge_page(struct bio *bio, struct page *page,
* size limit. This is not for normal read/write bios, but for passthrough
* or Zone Append operations that we can't split.
*/
static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
struct page *page, unsigned len,
unsigned offset, bool *same_page)
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
struct page *page, unsigned len, unsigned offset,
bool *same_page)
{
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned long mask = queue_segment_boundary(q);
phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
@@ -982,7 +946,7 @@ static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
return false;
if (bv->bv_len + len > queue_max_segment_size(q))
return false;
return __bio_try_merge_page(bio, page, len, offset, same_page);
return bvec_try_merge_page(bv, page, len, offset, same_page);
}
/**
@@ -1002,33 +966,33 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page)
{
struct bio_vec *bvec;
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return 0;
if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
if (((bio->bi_iter.bi_size + len) >> SECTOR_SHIFT) > max_sectors)
return 0;
if (bio->bi_vcnt > 0) {
if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (bvec_try_merge_hw_page(q, bv, page, len, offset,
same_page)) {
bio->bi_iter.bi_size += len;
return len;
}
if (bio->bi_vcnt >=
min(bio->bi_max_vecs, queue_max_segments(q)))
return 0;
/*
* If the queue doesn't support SG gaps and adding this segment
* would create a gap, disallow it.
*/
bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (bvec_gap_to_prev(&q->limits, bvec, offset))
if (bvec_gap_to_prev(&q->limits, bv, offset))
return 0;
}
if (bio_full(bio, len))
return 0;
if (bio->bi_vcnt >= queue_max_segments(q))
return 0;
bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset);
bio->bi_vcnt++;
bio->bi_iter.bi_size += len;
@@ -1129,11 +1093,21 @@ int bio_add_page(struct bio *bio, struct page *page,
{
bool same_page = false;
if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
if (bio_full(bio, len))
return 0;
__bio_add_page(bio, page, len, offset);
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return 0;
if (bio->bi_iter.bi_size > UINT_MAX - len)
return 0;
if (bio->bi_vcnt > 0 &&
bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
page, len, offset, &same_page)) {
bio->bi_iter.bi_size += len;
return len;
}
if (bio->bi_vcnt >= bio->bi_max_vecs)
return 0;
__bio_add_page(bio, page, len, offset);
return len;
}
EXPORT_SYMBOL(bio_add_page);
@@ -1207,13 +1181,18 @@ static int bio_iov_add_page(struct bio *bio, struct page *page,
{
bool same_page = false;
if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
__bio_add_page(bio, page, len, offset);
if (WARN_ON_ONCE(bio->bi_iter.bi_size > UINT_MAX - len))
return -EIO;
if (bio->bi_vcnt > 0 &&
bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
page, len, offset, &same_page)) {
bio->bi_iter.bi_size += len;
if (same_page)
bio_release_page(bio, page);
return 0;
}
if (same_page)
bio_release_page(bio, page);
__bio_add_page(bio, page, len, offset);
return 0;
}
@@ -1252,7 +1231,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
struct page **pages = (struct page **)bv;
ssize_t size, left;
unsigned len, i = 0;
size_t offset, trim;
size_t offset;
int ret = 0;
/*
@@ -1281,10 +1260,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
iov_iter_revert(iter, trim);
if (bio->bi_bdev) {
size_t trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
iov_iter_revert(iter, trim);
size -= trim;
}
size -= trim;
if (unlikely(!size)) {
ret = -EFAULT;
goto out;
@@ -1337,6 +1318,9 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
int ret = 0;
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return -EIO;
if (iov_iter_is_bvec(iter)) {
bio_iov_bvec_set(bio, iter);
iov_iter_advance(iter, bio->bi_iter.bi_size);
@@ -1490,6 +1474,7 @@ void bio_set_pages_dirty(struct bio *bio)
set_page_dirty_lock(bvec->bv_page);
}
}
EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
/*
* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
@@ -1549,6 +1534,7 @@ defer:
spin_unlock_irqrestore(&bio_dirty_lock, flags);
schedule_work(&bio_dirty_work);
}
EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
static inline bool bio_remaining_done(struct bio *bio)
{

View File

@@ -1511,7 +1511,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
retry:
spin_lock_irq(&q->queue_lock);
/* blkg_list is pushed at the head, reverse walk to allocate parents first */
/* blkg_list is pushed at the head, reverse walk to initialize parents first */
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
struct blkg_policy_data *pd;
@@ -1549,21 +1549,20 @@ retry:
goto enomem;
}
blkg->pd[pol->plid] = pd;
spin_lock(&blkg->blkcg->lock);
pd->blkg = blkg;
pd->plid = pol->plid;
pd->online = false;
}
blkg->pd[pol->plid] = pd;
/* all allocated, init in the same order */
if (pol->pd_init_fn)
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
pol->pd_init_fn(blkg->pd[pol->plid]);
if (pol->pd_init_fn)
pol->pd_init_fn(pd);
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
if (pol->pd_online_fn)
pol->pd_online_fn(blkg->pd[pol->plid]);
blkg->pd[pol->plid]->online = true;
pol->pd_online_fn(pd);
pd->online = true;
spin_unlock(&blkg->blkcg->lock);
}
__set_bit(pol->plid, q->blkcg_pols);
@@ -1580,14 +1579,19 @@ out:
return ret;
enomem:
/* alloc failed, nothing's initialized yet, free everything */
/* alloc failed, take down everything */
spin_lock_irq(&q->queue_lock);
list_for_each_entry(blkg, &q->blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;
struct blkg_policy_data *pd;
spin_lock(&blkcg->lock);
if (blkg->pd[pol->plid]) {
pol->pd_free_fn(blkg->pd[pol->plid]);
pd = blkg->pd[pol->plid];
if (pd) {
if (pd->online && pol->pd_offline_fn)
pol->pd_offline_fn(pd);
pd->online = false;
pol->pd_free_fn(pd);
blkg->pd[pol->plid] = NULL;
}
spin_unlock(&blkcg->lock);

View File

@@ -208,6 +208,7 @@ const char *blk_status_to_str(blk_status_t status)
return "<null>";
return blk_errors[idx].name;
}
EXPORT_SYMBOL_GPL(blk_status_to_str);
/**
* blk_sync_queue - cancel any pending callbacks on a queue

View File

@@ -183,13 +183,13 @@ static void blk_flush_complete_seq(struct request *rq,
/* queue for flush */
if (list_empty(pending))
fq->flush_pending_since = jiffies;
list_move_tail(&rq->flush.list, pending);
list_move_tail(&rq->queuelist, pending);
break;
case REQ_FSEQ_DATA:
list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
fq->flush_data_in_flight++;
spin_lock(&q->requeue_lock);
list_add(&rq->queuelist, &q->requeue_list);
list_move(&rq->queuelist, &q->requeue_list);
spin_unlock(&q->requeue_lock);
blk_mq_kick_requeue_list(q);
break;
@@ -201,7 +201,7 @@ static void blk_flush_complete_seq(struct request *rq,
* flush data request completion path. Restore @rq for
* normal completion and end it.
*/
list_del_init(&rq->flush.list);
list_del_init(&rq->queuelist);
blk_flush_restore_request(rq);
blk_mq_end_request(rq, error);
break;
@@ -257,7 +257,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
fq->flush_running_idx ^= 1;
/* and push the waiting requests to the next stage */
list_for_each_entry_safe(rq, n, running, flush.list) {
list_for_each_entry_safe(rq, n, running, queuelist) {
unsigned int seq = blk_flush_cur_seq(rq);
BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
@@ -291,7 +291,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
{
struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
struct request *first_rq =
list_first_entry(pending, struct request, flush.list);
list_first_entry(pending, struct request, queuelist);
struct request *flush_rq = fq->flush_rq;
/* C1 described at the top of this file */
@@ -299,7 +299,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
return;
/* C2 and C3 */
if (!list_empty(&fq->flush_data_in_flight) &&
if (fq->flush_data_in_flight &&
time_before(jiffies,
fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
return;
@@ -374,6 +374,12 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
* the comment in flush_end_io().
*/
spin_lock_irqsave(&fq->mq_flush_lock, flags);
fq->flush_data_in_flight--;
/*
* May have been corrupted by rq->rq_next reuse, we need to
* re-initialize rq->queuelist before reusing it here.
*/
INIT_LIST_HEAD(&rq->queuelist);
blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
@@ -384,7 +390,6 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
static void blk_rq_init_flush(struct request *rq)
{
rq->flush.seq = 0;
INIT_LIST_HEAD(&rq->flush.list);
rq->rq_flags |= RQF_FLUSH_SEQ;
rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
rq->end_io = mq_flush_data_end_io;
@@ -443,9 +448,9 @@ bool blk_insert_flush(struct request *rq)
* the post flush, and then just pass the command on.
*/
blk_rq_init_flush(rq);
rq->flush.seq |= REQ_FSEQ_POSTFLUSH;
rq->flush.seq |= REQ_FSEQ_PREFLUSH;
spin_lock_irq(&fq->mq_flush_lock);
list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
fq->flush_data_in_flight++;
spin_unlock_irq(&fq->mq_flush_lock);
return false;
default:
@@ -496,7 +501,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
INIT_LIST_HEAD(&fq->flush_queue[0]);
INIT_LIST_HEAD(&fq->flush_queue[1]);
INIT_LIST_HEAD(&fq->flush_data_in_flight);
return fq;

View File

@@ -824,29 +824,6 @@ static void iolatency_clear_scaling(struct blkcg_gq *blkg)
}
}
static int blk_iolatency_try_init(struct blkg_conf_ctx *ctx)
{
static DEFINE_MUTEX(init_mutex);
int ret;
ret = blkg_conf_open_bdev(ctx);
if (ret)
return ret;
/*
* blk_iolatency_init() may fail after rq_qos_add() succeeds which can
* confuse iolat_rq_qos() test. Make the test and init atomic.
*/
mutex_lock(&init_mutex);
if (!iolat_rq_qos(ctx->bdev->bd_queue))
ret = blk_iolatency_init(ctx->bdev->bd_disk);
mutex_unlock(&init_mutex);
return ret;
}
static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
@@ -861,7 +838,17 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
blkg_conf_init(&ctx, buf);
ret = blk_iolatency_try_init(&ctx);
ret = blkg_conf_open_bdev(&ctx);
if (ret)
goto out;
/*
* blk_iolatency_init() may fail after rq_qos_add() succeeds which can
* confuse iolat_rq_qos() test. Make the test and init atomic.
*/
lockdep_assert_held(&ctx.bdev->bd_queue->rq_qos_mutex);
if (!iolat_rq_qos(ctx.bdev->bd_queue))
ret = blk_iolatency_init(ctx.bdev->bd_disk);
if (ret)
goto out;

View File

@@ -43,6 +43,7 @@
#include "blk-ioprio.h"
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
static void blk_mq_request_bypass_insert(struct request *rq,
@@ -1174,15 +1175,11 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
static void blk_mq_complete_send_ipi(struct request *rq)
{
struct llist_head *list;
unsigned int cpu;
cpu = rq->mq_ctx->cpu;
list = &per_cpu(blk_cpu_done, cpu);
if (llist_add(&rq->ipi_list, list)) {
INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
smp_call_function_single_async(cpu, &rq->csd);
}
if (llist_add(&rq->ipi_list, &per_cpu(blk_cpu_done, cpu)))
smp_call_function_single_async(cpu, &per_cpu(blk_cpu_csd, cpu));
}
static void blk_mq_raise_softirq(struct request *rq)
@@ -1343,7 +1340,7 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head)
}
blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
blk_mq_run_hw_queue(hctx, false);
blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
@@ -2242,6 +2239,8 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
*/
WARN_ON_ONCE(!async && in_interrupt());
might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING);
/*
* When queue is quiesced, we may be switching io scheduler, or
* updating nr_hw_queues, or other things, and we can't run queue
@@ -2257,8 +2256,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
if (!need_run)
return;
if (async || (hctx->flags & BLK_MQ_F_BLOCKING) ||
!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
blk_mq_delay_run_hw_queue(hctx, 0);
return;
}
@@ -2393,7 +2391,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
{
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
blk_mq_run_hw_queue(hctx, false);
blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);
}
EXPORT_SYMBOL(blk_mq_start_hw_queue);
@@ -2423,7 +2421,8 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
unsigned long i;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_start_stopped_hw_queue(hctx, async);
blk_mq_start_stopped_hw_queue(hctx, async ||
(hctx->flags & BLK_MQ_F_BLOCKING));
}
EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
@@ -2481,6 +2480,8 @@ static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx,
list_for_each_entry(rq, list, queuelist) {
BUG_ON(rq->mq_ctx != ctx);
trace_block_rq_insert(rq);
if (rq->cmd_flags & REQ_NOWAIT)
run_queue_async = true;
}
spin_lock(&ctx->lock);
@@ -2641,7 +2642,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
blk_mq_insert_request(rq, 0);
blk_mq_run_hw_queue(hctx, false);
blk_mq_run_hw_queue(hctx, rq->cmd_flags & REQ_NOWAIT);
return;
}
@@ -4402,9 +4403,13 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
int new_nr_hw_queues)
{
struct blk_mq_tags **new_tags;
int i;
if (set->nr_hw_queues >= new_nr_hw_queues)
if (set->nr_hw_queues >= new_nr_hw_queues) {
for (i = new_nr_hw_queues; i < set->nr_hw_queues; i++)
__blk_mq_free_map_and_rqs(set, i);
goto done;
}
new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
@@ -4416,6 +4421,16 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
sizeof(*set->tags));
kfree(set->tags);
set->tags = new_tags;
for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) {
if (!__blk_mq_alloc_map_and_rqs(set, i)) {
while (--i >= set->nr_hw_queues)
__blk_mq_free_map_and_rqs(set, i);
return -ENOMEM;
}
cond_resched();
}
done:
set->nr_hw_queues = new_nr_hw_queues;
return 0;
@@ -4749,7 +4764,6 @@ fallback:
__blk_mq_free_map_and_rqs(set, i);
set->nr_hw_queues = prev_nr_hw_queues;
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
goto fallback;
}
blk_mq_map_swqueue(q);
@@ -4853,6 +4867,9 @@ static int __init blk_mq_init(void)
for_each_possible_cpu(i)
init_llist_head(&per_cpu(blk_cpu_done, i));
for_each_possible_cpu(i)
INIT_CSD(&per_cpu(blk_cpu_csd, i),
__blk_mq_complete_request_remote, NULL);
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,

View File

@@ -830,10 +830,13 @@ EXPORT_SYMBOL(blk_set_queue_depth);
*/
void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
{
if (wc)
if (wc) {
blk_queue_flag_set(QUEUE_FLAG_HW_WC, q);
blk_queue_flag_set(QUEUE_FLAG_WC, q);
else
} else {
blk_queue_flag_clear(QUEUE_FLAG_HW_WC, q);
blk_queue_flag_clear(QUEUE_FLAG_WC, q);
}
if (fua)
blk_queue_flag_set(QUEUE_FLAG_FUA, q);
else

View File

@@ -449,21 +449,16 @@ static ssize_t queue_wc_show(struct request_queue *q, char *page)
static ssize_t queue_wc_store(struct request_queue *q, const char *page,
size_t count)
{
int set = -1;
if (!strncmp(page, "write back", 10))
set = 1;
else if (!strncmp(page, "write through", 13) ||
!strncmp(page, "none", 4))
set = 0;
if (set == -1)
return -EINVAL;
if (set)
if (!strncmp(page, "write back", 10)) {
if (!test_bit(QUEUE_FLAG_HW_WC, &q->queue_flags))
return -EINVAL;
blk_queue_flag_set(QUEUE_FLAG_WC, q);
else
} else if (!strncmp(page, "write through", 13) ||
!strncmp(page, "none", 4)) {
blk_queue_flag_clear(QUEUE_FLAG_WC, q);
} else {
return -EINVAL;
}
return count;
}

View File

@@ -15,15 +15,14 @@ struct elevator_type;
extern struct dentry *blk_debugfs_root;
struct blk_flush_queue {
spinlock_t mq_flush_lock;
unsigned int flush_pending_idx:1;
unsigned int flush_running_idx:1;
blk_status_t rq_status;
unsigned long flush_pending_since;
struct list_head flush_queue[2];
struct list_head flush_data_in_flight;
unsigned long flush_data_in_flight;
struct request *flush_rq;
spinlock_t mq_flush_lock;
};
bool is_flush_rq(struct request *req);
@@ -76,6 +75,10 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
gfp_t gfp_mask);
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
struct page *page, unsigned len, unsigned offset,
bool *same_page);
static inline bool biovec_phys_mergeable(struct request_queue *q,
struct bio_vec *vec1, struct bio_vec *vec2)
{
@@ -251,7 +254,6 @@ static inline void bio_integrity_free(struct bio *bio)
unsigned long blk_rq_timeout(unsigned long timeout);
void blk_add_timer(struct request *req);
const char *blk_status_to_str(blk_status_t status);
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs);

View File

@@ -15,6 +15,7 @@
#include <linux/falloc.h>
#include <linux/suspend.h>
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/module.h>
#include "blk.h"
@@ -23,15 +24,6 @@ static inline struct inode *bdev_file_inode(struct file *file)
return file->f_mapping->host;
}
static int blkdev_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
bh->b_bdev = I_BDEV(inode);
bh->b_blocknr = iblock;
set_buffer_mapped(bh);
return 0;
}
static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
{
blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
@@ -387,6 +379,37 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
}
static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
{
struct block_device *bdev = I_BDEV(inode);
loff_t isize = i_size_read(inode);
iomap->bdev = bdev;
iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
if (iomap->offset >= isize)
return -EIO;
iomap->type = IOMAP_MAPPED;
iomap->addr = iomap->offset;
iomap->length = isize - iomap->offset;
iomap->flags |= IOMAP_F_BUFFER_HEAD; /* noop for !CONFIG_BUFFER_HEAD */
return 0;
}
static const struct iomap_ops blkdev_iomap_ops = {
.iomap_begin = blkdev_iomap_begin,
};
#ifdef CONFIG_BUFFER_HEAD
static int blkdev_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
bh->b_bdev = I_BDEV(inode);
bh->b_blocknr = iblock;
set_buffer_mapped(bh);
return 0;
}
static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
{
return block_write_full_page(page, blkdev_get_block, wbc);
@@ -429,10 +452,58 @@ const struct address_space_operations def_blk_aops = {
.writepage = blkdev_writepage,
.write_begin = blkdev_write_begin,
.write_end = blkdev_write_end,
.direct_IO = blkdev_direct_IO,
.migrate_folio = buffer_migrate_folio_norefs,
.is_dirty_writeback = buffer_check_dirty_writeback,
};
#else /* CONFIG_BUFFER_HEAD */
static int blkdev_read_folio(struct file *file, struct folio *folio)
{
return iomap_read_folio(folio, &blkdev_iomap_ops);
}
static void blkdev_readahead(struct readahead_control *rac)
{
iomap_readahead(rac, &blkdev_iomap_ops);
}
static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
struct inode *inode, loff_t offset)
{
loff_t isize = i_size_read(inode);
if (WARN_ON_ONCE(offset >= isize))
return -EIO;
if (offset >= wpc->iomap.offset &&
offset < wpc->iomap.offset + wpc->iomap.length)
return 0;
return blkdev_iomap_begin(inode, offset, isize - offset,
IOMAP_WRITE, &wpc->iomap, NULL);
}
static const struct iomap_writeback_ops blkdev_writeback_ops = {
.map_blocks = blkdev_map_blocks,
};
static int blkdev_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct iomap_writepage_ctx wpc = { };
return iomap_writepages(mapping, wbc, &wpc, &blkdev_writeback_ops);
}
const struct address_space_operations def_blk_aops = {
.dirty_folio = filemap_dirty_folio,
.release_folio = iomap_release_folio,
.invalidate_folio = iomap_invalidate_folio,
.read_folio = blkdev_read_folio,
.readahead = blkdev_readahead,
.writepages = blkdev_writepages,
.is_partially_uptodate = iomap_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
.migrate_folio = filemap_migrate_folio,
};
#endif /* CONFIG_BUFFER_HEAD */
/*
* for a block special file file_inode(file)->i_size is zero
@@ -506,7 +577,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
* during an unstable branch.
*/
filp->f_flags |= O_LARGEFILE;
filp->f_mode |= FMODE_BUF_RASYNC;
filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
/*
* Use the file private data to store the holder for exclusive openes.
@@ -534,6 +605,35 @@ static int blkdev_release(struct inode *inode, struct file *filp)
return 0;
}
static ssize_t
blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
size_t count = iov_iter_count(from);
ssize_t written;
written = kiocb_invalidate_pages(iocb, count);
if (written) {
if (written == -EBUSY)
return 0;
return written;
}
written = blkdev_direct_IO(iocb, from);
if (written > 0) {
kiocb_invalidate_post_direct_write(iocb, count);
iocb->ki_pos += written;
count -= written;
}
if (written != -EIOCBQUEUED)
iov_iter_revert(from, count - iov_iter_count(from));
return written;
}
static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from)
{
return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops);
}
/*
* Write data to the block device. Only intended for the block device itself
* and the raw driver which basically is a fake block device.
@@ -543,7 +643,8 @@ static int blkdev_release(struct inode *inode, struct file *filp)
*/
static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
struct file *file = iocb->ki_filp;
struct block_device *bdev = I_BDEV(file->f_mapping->host);
struct inode *bd_inode = bdev->bd_inode;
loff_t size = bdev_nr_bytes(bdev);
size_t shorted = 0;
@@ -570,7 +671,23 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
iov_iter_truncate(from, size);
}
ret = __generic_file_write_iter(iocb, from);
ret = file_remove_privs(file);
if (ret)
return ret;
ret = file_update_time(file);
if (ret)
return ret;
if (iocb->ki_flags & IOCB_DIRECT) {
ret = blkdev_direct_write(iocb, from);
if (ret >= 0 && iov_iter_count(from))
ret = direct_write_fallback(iocb, from, ret,
blkdev_buffered_write(iocb, from));
} else {
ret = blkdev_buffered_write(iocb, from);
}
if (ret > 0)
ret = generic_write_sync(iocb, ret);
iov_iter_reexpand(from, iov_iter_count(from) + shorted);

View File

@@ -646,8 +646,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
struct request_queue *q = hctx->queue;
struct deadline_data *dd = q->elevator->elevator_data;
struct blk_mq_tags *tags = hctx->sched_tags;
unsigned int shift = tags->bitmap_tags.sb.shift;
dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
dd->async_depth = max(1U, 3 * (1U << shift) / 4);
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
}

View File

@@ -225,6 +225,10 @@ enum opal_parameter {
OPAL_SUM_SET_LIST = 0x060000,
};
enum opal_revertlsp {
OPAL_KEEP_GLOBAL_RANGE_KEY = 0x060000,
};
/* Packets derived from:
* TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
* Secion: 3.2.3 ComPackets, Packets & Subpackets

View File

@@ -81,8 +81,7 @@ static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
length = min_t(int, next - partdef,
sizeof(new_subpart->name) - 1);
strncpy(new_subpart->name, partdef, length);
new_subpart->name[length] = '\0';
strscpy(new_subpart->name, partdef, length);
partdef = ++next;
} else
@@ -140,8 +139,7 @@ static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
}
length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1);
strncpy(newparts->name, bdevdef, length);
newparts->name[length] = '\0';
strscpy(newparts->name, bdevdef, length);
newparts->nr_subparts = 0;
next_subpart = &newparts->subpart;
@@ -153,8 +151,7 @@ static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
length = (!next) ? (sizeof(buf) - 1) :
min_t(int, next - bdevdef, sizeof(buf) - 1);
strncpy(buf, bdevdef, length);
buf[length] = '\0';
strscpy(buf, bdevdef, length);
ret = parse_subpart(next_subpart, buf);
if (ret)
@@ -267,8 +264,7 @@ static int add_part(int slot, struct cmdline_subpart *subpart,
label_min = min_t(int, sizeof(info->volname) - 1,
sizeof(subpart->name));
strncpy(info->volname, subpart->name, label_min);
info->volname[label_min] = '\0';
strscpy(info->volname, subpart->name, label_min);
snprintf(tmp, sizeof(tmp), "(%s)", info->volname);
strlcat(state->pp_buf, tmp, PAGE_SIZE);

View File

@@ -20,6 +20,9 @@
#include <linux/sed-opal.h>
#include <linux/string.h>
#include <linux/kdev_t.h>
#include <linux/key.h>
#include <linux/key-type.h>
#include <keys/user-type.h>
#include "opal_proto.h"
@@ -29,6 +32,8 @@
/* Number of bytes needed by cmd_finalize. */
#define CMD_FINALIZE_BYTES_NEEDED 7
static struct key *sed_opal_keyring;
struct opal_step {
int (*fn)(struct opal_dev *dev, void *data);
void *data;
@@ -269,6 +274,101 @@ static void print_buffer(const u8 *ptr, u32 length)
#endif
}
/*
* Allocate/update a SED Opal key and add it to the SED Opal keyring.
*/
static int update_sed_opal_key(const char *desc, u_char *key_data, int keylen)
{
key_ref_t kr;
if (!sed_opal_keyring)
return -ENOKEY;
kr = key_create_or_update(make_key_ref(sed_opal_keyring, true), "user",
desc, (const void *)key_data, keylen,
KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_WRITE,
KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN |
KEY_ALLOC_BYPASS_RESTRICTION);
if (IS_ERR(kr)) {
pr_err("Error adding SED key (%ld)\n", PTR_ERR(kr));
return PTR_ERR(kr);
}
return 0;
}
/*
* Read a SED Opal key from the SED Opal keyring.
*/
static int read_sed_opal_key(const char *key_name, u_char *buffer, int buflen)
{
int ret;
key_ref_t kref;
struct key *key;
if (!sed_opal_keyring)
return -ENOKEY;
kref = keyring_search(make_key_ref(sed_opal_keyring, true),
&key_type_user, key_name, true);
if (IS_ERR(kref))
ret = PTR_ERR(kref);
key = key_ref_to_ptr(kref);
down_read(&key->sem);
ret = key_validate(key);
if (ret == 0) {
if (buflen > key->datalen)
buflen = key->datalen;
ret = key->type->read(key, (char *)buffer, buflen);
}
up_read(&key->sem);
key_ref_put(kref);
return ret;
}
static int opal_get_key(struct opal_dev *dev, struct opal_key *key)
{
int ret = 0;
switch (key->key_type) {
case OPAL_INCLUDED:
/* the key is ready to use */
break;
case OPAL_KEYRING:
/* the key is in the keyring */
ret = read_sed_opal_key(OPAL_AUTH_KEY, key->key, OPAL_KEY_MAX);
if (ret > 0) {
if (ret > U8_MAX) {
ret = -ENOSPC;
goto error;
}
key->key_len = ret;
key->key_type = OPAL_INCLUDED;
}
break;
default:
ret = -EINVAL;
break;
}
if (ret < 0)
goto error;
/* must have a PEK by now or it's an error */
if (key->key_type != OPAL_INCLUDED || key->key_len == 0) {
ret = -EINVAL;
goto error;
}
return 0;
error:
pr_debug("Error getting password: %d\n", ret);
return ret;
}
static bool check_tper(const void *data)
{
const struct d0_tper_features *tper = data;
@@ -463,8 +563,11 @@ out_error:
return error;
}
static int opal_discovery0_end(struct opal_dev *dev)
static int opal_discovery0_end(struct opal_dev *dev, void *data)
{
struct opal_discovery *discv_out = data; /* may be NULL */
u8 __user *buf_out;
u64 len_out;
bool found_com_id = false, supported = true, single_user = false;
const struct d0_header *hdr = (struct d0_header *)dev->resp;
const u8 *epos = dev->resp, *cpos = dev->resp;
@@ -480,6 +583,15 @@ static int opal_discovery0_end(struct opal_dev *dev)
return -EFAULT;
}
if (discv_out) {
buf_out = (u8 __user *)(uintptr_t)discv_out->data;
len_out = min_t(u64, discv_out->size, hlen);
if (buf_out && copy_to_user(buf_out, dev->resp, len_out))
return -EFAULT;
discv_out->size = hlen; /* actual size of data */
}
epos += hlen; /* end of buffer */
cpos += sizeof(*hdr); /* current position on buffer */
@@ -565,13 +677,13 @@ static int opal_discovery0(struct opal_dev *dev, void *data)
if (ret)
return ret;
return opal_discovery0_end(dev);
return opal_discovery0_end(dev, data);
}
static int opal_discovery0_step(struct opal_dev *dev)
{
const struct opal_step discovery0_step = {
opal_discovery0,
opal_discovery0, NULL
};
return execute_step(dev, &discovery0_step, 0);
@@ -1757,6 +1869,26 @@ static int internal_activate_user(struct opal_dev *dev, void *data)
return finalize_and_send(dev, parse_and_check_status);
}
static int revert_lsp(struct opal_dev *dev, void *data)
{
struct opal_revert_lsp *rev = data;
int err;
err = cmd_start(dev, opaluid[OPAL_THISSP_UID],
opalmethod[OPAL_REVERTSP]);
add_token_u8(&err, dev, OPAL_STARTNAME);
add_token_u64(&err, dev, OPAL_KEEP_GLOBAL_RANGE_KEY);
add_token_u8(&err, dev, (rev->options & OPAL_PRESERVE) ?
OPAL_TRUE : OPAL_FALSE);
add_token_u8(&err, dev, OPAL_ENDNAME);
if (err) {
pr_debug("Error building REVERT SP command.\n");
return err;
}
return finalize_and_send(dev, parse_and_check_status);
}
static int erase_locking_range(struct opal_dev *dev, void *data)
{
struct opal_session_info *session = data;
@@ -2427,6 +2559,9 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev,
};
int ret;
ret = opal_get_key(dev, &opal_session->opal_key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, erase_steps, ARRAY_SIZE(erase_steps));
@@ -2435,6 +2570,42 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev,
return ret;
}
static int opal_get_discv(struct opal_dev *dev, struct opal_discovery *discv)
{
const struct opal_step discovery0_step = {
opal_discovery0, discv
};
int ret = 0;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_step(dev, &discovery0_step, 0);
mutex_unlock(&dev->dev_lock);
if (ret)
return ret;
return discv->size; /* modified to actual length of data */
}
static int opal_revertlsp(struct opal_dev *dev, struct opal_revert_lsp *rev)
{
/* controller will terminate session */
const struct opal_step steps[] = {
{ start_admin1LSP_opal_session, &rev->key },
{ revert_lsp, rev }
};
int ret;
ret = opal_get_key(dev, &rev->key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, steps, ARRAY_SIZE(steps));
mutex_unlock(&dev->dev_lock);
return ret;
}
static int opal_erase_locking_range(struct opal_dev *dev,
struct opal_session_info *opal_session)
{
@@ -2445,6 +2616,9 @@ static int opal_erase_locking_range(struct opal_dev *dev,
};
int ret;
ret = opal_get_key(dev, &opal_session->opal_key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, erase_steps, ARRAY_SIZE(erase_steps));
@@ -2473,6 +2647,9 @@ static int opal_enable_disable_shadow_mbr(struct opal_dev *dev,
opal_mbr->enable_disable != OPAL_MBR_DISABLE)
return -EINVAL;
ret = opal_get_key(dev, &opal_mbr->key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps));
@@ -2498,6 +2675,9 @@ static int opal_set_mbr_done(struct opal_dev *dev,
mbr_done->done_flag != OPAL_MBR_NOT_DONE)
return -EINVAL;
ret = opal_get_key(dev, &mbr_done->key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps));
@@ -2519,6 +2699,9 @@ static int opal_write_shadow_mbr(struct opal_dev *dev,
if (info->size == 0)
return 0;
ret = opal_get_key(dev, &info->key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps));
@@ -2576,6 +2759,9 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
return -EINVAL;
}
ret = opal_get_key(dev, &lk_unlk->session.opal_key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, steps, ARRAY_SIZE(steps));
@@ -2598,6 +2784,10 @@ static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal, bool psi
int ret;
ret = opal_get_key(dev, opal);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
if (psid)
@@ -2698,6 +2888,9 @@ static int opal_lock_unlock(struct opal_dev *dev,
if (lk_unlk->session.who > OPAL_USER9)
return -EINVAL;
ret = opal_get_key(dev, &lk_unlk->session.opal_key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
opal_lock_check_for_saved_key(dev, lk_unlk);
ret = __opal_lock_unlock(dev, lk_unlk);
@@ -2721,6 +2914,9 @@ static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal)
if (!dev)
return -ENODEV;
ret = opal_get_key(dev, opal);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, owner_steps, ARRAY_SIZE(owner_steps));
@@ -2743,6 +2939,9 @@ static int opal_activate_lsp(struct opal_dev *dev,
if (!opal_lr_act->num_lrs || opal_lr_act->num_lrs > OPAL_MAX_LRS)
return -EINVAL;
ret = opal_get_key(dev, &opal_lr_act->key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, active_steps, ARRAY_SIZE(active_steps));
@@ -2761,6 +2960,9 @@ static int opal_setup_locking_range(struct opal_dev *dev,
};
int ret;
ret = opal_get_key(dev, &opal_lrs->session.opal_key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps));
@@ -2814,6 +3016,14 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
ret = execute_steps(dev, pw_steps, ARRAY_SIZE(pw_steps));
mutex_unlock(&dev->dev_lock);
if (ret)
return ret;
/* update keyring with new password */
ret = update_sed_opal_key(OPAL_AUTH_KEY,
opal_pw->new_user_pw.opal_key.key,
opal_pw->new_user_pw.opal_key.key_len);
return ret;
}
@@ -2834,6 +3044,9 @@ static int opal_activate_user(struct opal_dev *dev,
return -EINVAL;
}
ret = opal_get_key(dev, &opal_session->opal_key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
ret = execute_steps(dev, act_steps, ARRAY_SIZE(act_steps));
@@ -2920,6 +3133,9 @@ static int opal_generic_read_write_table(struct opal_dev *dev,
{
int ret, bit_set;
ret = opal_get_key(dev, &rw_tbl->key);
if (ret)
return ret;
mutex_lock(&dev->dev_lock);
setup_opal_dev(dev);
@@ -2988,9 +3204,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (!dev)
return -ENOTSUPP;
return -EOPNOTSUPP;
if (!(dev->flags & OPAL_FL_SUPPORTED))
return -ENOTSUPP;
return -EOPNOTSUPP;
if (cmd & IOC_IN) {
p = memdup_user(arg, _IOC_SIZE(cmd));
@@ -3056,6 +3272,13 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
case IOC_OPAL_GET_GEOMETRY:
ret = opal_get_geometry(dev, arg);
break;
case IOC_OPAL_REVERT_LSP:
ret = opal_revertlsp(dev, p);
break;
case IOC_OPAL_DISCOVERY:
ret = opal_get_discv(dev, p);
break;
default:
break;
}
@@ -3065,3 +3288,22 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
return ret;
}
EXPORT_SYMBOL_GPL(sed_ioctl);
static int __init sed_opal_init(void)
{
struct key *kr;
kr = keyring_alloc(".sed_opal",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
(KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW |
KEY_USR_READ | KEY_USR_SEARCH | KEY_USR_WRITE,
KEY_ALLOC_NOT_IN_QUOTA,
NULL, NULL);
if (IS_ERR(kr))
return PTR_ERR(kr);
sed_opal_keyring = kr;
return 0;
}
late_initcall(sed_opal_init);