1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-12 01:20:14 +00:00
torvalds-linux/include/linux/backing-dev-defs.h
Linus Torvalds cc25df3e2e for-6.19/block-20251201
-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmktsoMQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpuiUD/92eivL+HmOh10o8trvxajB0yuyqfSjHHrL
 g+xUbF4s9bgAg/v+Upx7sTY8jdrTcMjKov+G9T6uPvBMqVmeVdZckA1PSAKQaIX1
 Zb7nS2LnO7F6JKbwpwVrrIaqVbcz8MfGIIMbN4yNNEOMCwdIVMp4fo7trPBknJNx
 WddNSGUFlIF3NqSI8AflSS/pYnGm+McfBHXBpJAKipI3iquKKubHv+FX9kLp7Tn4
 x27ZoCWOHglIBTJXU0mmXCVsLF8b5BA8DQcGtT62azb8+l0cRTkaHY0DFAv5BvhG
 TqcjrKdmR0cGSNt+nEmFrujE3atBRl0G0kiHA80YgA1MTtYzdPaUVOUtM9k/rEem
 gpiGMDpBypdxyJAyijPSaVJdfcg0psOlYbhIR4N2wbj/dq8268h+cWzXlF1spgVt
 /7ygoaCmfMNbTy9rKThTjH+es787AVXUAXXaPHhIFsnCKUj8xQl4pT7XltmgYeWx
 1/XD1NEJeLHHog5upAVlGX3H5tbvP1nIICxbZa9mDOJX1rwxxI7/s/RucPjbNXuY
 AiaKPTfxtB9+Ihd2HrJ/76RVMkckcOBc4GIKoFfwuKDbcdLXQ5FcZCmVRoI1V9SV
 KsH7JBgihLwR9XWKE1vp9+CBNe1Qlu3K4IjG/E7CNLeuDntIBu73ihqGP/DqV6Bq
 RX1Dc0OyAQ==
 =m22w
 -----END PGP SIGNATURE-----

Merge tag 'for-6.19/block-20251201' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull block updates from Jens Axboe:

 - Fix head insertion for mq-deadline, a regression from when priority
   support was added

 - Series simplifying and improving the ublk user copy code

 - Various ublk related cleanups

 - Fixup REQ_NOWAIT handling in loop/zloop, clearing NOWAIT when the
   request is punted to a thread for handling

 - Merge and then later revert loop dio nowait support, as it ended up
   causing excessive stack usage for when the inline issue code needs to
   dip back into the full file system code

 - Improve auto integrity code, making it less deadlock prone

 - Speedup polled IO handling, but manually managing the hctx lookups

 - Fixes for blk-throttle for SSD devices

 - Small series with fixes for the S390 dasd driver

 - Add support for caching zones, avoiding unnecessary report zone
   queries

 - MD pull requests via Yu:
      - fix null-ptr-dereference regression for dm-raid0
      - fix IO hang for raid5 when array is broken with IO inflight
      - remove legacy 1s delay to speed up system shutdown
      - change maintainer's email address
      - data can be lost if array is created with different lbs devices,
        fix this problem and record lbs of the array in metadata
      - fix rcu protection for md_thread
      - fix mddev kobject lifetime regression
      - enable atomic writes for md-linear
      - some cleanups

 - bcache updates via Coly
      - remove useless discard and cache device code
      - improve usage of per-cpu workqueues

 - Reorganize the IO scheduler switching code, fixing some lockdep
   reports as well

 - Improve the block layer P2P DMA support

 - Add support to the block tracing code for zoned devices

 - Segment calculation improves, and memory alignment flexibility
   improvements

 - Set of prep and cleanups patches for ublk batching support. The
   actual batching hasn't been added yet, but helps shrink down the
   workload of getting that patchset ready for 6.20

 - Fix for how the ps3 block driver handles segments offsets

 - Improve how block plugging handles batch tag allocations

 - nbd fixes for use-after-free of the configuration on device clear/put

 - Set of improvements and fixes for zloop

 - Add Damien as maintainer of the block zoned device code handling

 - Various other fixes and cleanups

* tag 'for-6.19/block-20251201' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (162 commits)
  block/rnbd: correct all kernel-doc complaints
  blk-mq: use queue_hctx in blk_mq_map_queue_type
  md: remove legacy 1s delay in md_notify_reboot
  md/raid5: fix IO hang when array is broken with IO inflight
  md: warn about updating super block failure
  md/raid0: fix NULL pointer dereference in create_strip_zones() for dm-raid
  sbitmap: fix all kernel-doc warnings
  ublk: add helper of __ublk_fetch()
  ublk: pass const pointer to ublk_queue_is_zoned()
  ublk: refactor auto buffer register in ublk_dispatch_req()
  ublk: add `union ublk_io_buf` with improved naming
  ublk: add parameter `struct io_uring_cmd *` to ublk_prep_auto_buf_reg()
  kfifo: add kfifo_alloc_node() helper for NUMA awareness
  blk-mq: fix potential uaf for 'queue_hw_ctx'
  blk-mq: use array manage hctx map instead of xarray
  ublk: prevent invalid access with DEBUG
  s390/dasd: Use scnprintf() instead of sprintf()
  s390/dasd: Move device name formatting into separate function
  s390/dasd: Remove unnecessary debugfs_create() return checks
  s390/dasd: Fix gendisk parent after copy pair swap
  ...
2025-12-03 19:26:18 -08:00

308 lines
8.8 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_BACKING_DEV_DEFS_H
#define __LINUX_BACKING_DEV_DEFS_H
#include <linux/list.h>
#include <linux/radix-tree.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/percpu_counter.h>
#include <linux/percpu-refcount.h>
#include <linux/flex_proportions.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
#include <linux/refcount.h>
struct page;
struct device;
struct dentry;
/*
* Bits in bdi_writeback.state
*/
enum wb_state {
WB_registered, /* bdi_register() was done */
WB_writeback_running, /* Writeback is in progress */
WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */
WB_start_all, /* nr_pages == 0 (all) work pending */
};
enum wb_stat_item {
WB_RECLAIMABLE,
WB_WRITEBACK,
WB_DIRTIED,
WB_WRITTEN,
NR_WB_STAT_ITEMS
};
#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
/*
* why some writeback work was initiated
*/
enum wb_reason {
WB_REASON_BACKGROUND,
WB_REASON_VMSCAN,
WB_REASON_SYNC,
WB_REASON_PERIODIC,
WB_REASON_LAPTOP_TIMER,
WB_REASON_FS_FREE_SPACE,
/*
* There is no bdi forker thread any more and works are done
* by emergency worker, however, this is TPs userland visible
* and we'll be exposing exactly the same information,
* so it has a mismatch name.
*/
WB_REASON_FORKER_THREAD,
WB_REASON_FOREIGN_FLUSH,
WB_REASON_MAX,
};
struct wb_completion {
atomic_t cnt;
wait_queue_head_t *waitq;
unsigned long progress_stamp; /* The jiffies when slow progress is detected */
unsigned long wait_start; /* The jiffies when waiting for the writeback work to finish */
};
#define __WB_COMPLETION_INIT(_waitq) \
(struct wb_completion){ .cnt = ATOMIC_INIT(1), .waitq = (_waitq) }
/*
* If one wants to wait for one or more wb_writeback_works, each work's
* ->done should be set to a wb_completion defined using the following
* macro. Once all work items are issued with wb_queue_work(), the caller
* can wait for the completion of all using wb_wait_for_completion(). Work
* items which are waited upon aren't freed automatically on completion.
*/
#define WB_COMPLETION_INIT(bdi) __WB_COMPLETION_INIT(&(bdi)->wb_waitq)
#define DEFINE_WB_COMPLETION(cmpl, bdi) \
struct wb_completion cmpl = WB_COMPLETION_INIT(bdi)
/*
* Each wb (bdi_writeback) can perform writeback operations, is measured
* and throttled, independently. Without cgroup writeback, each bdi
* (bdi_writeback) is served by its embedded bdi->wb.
*
* On the default hierarchy, blkcg implicitly enables memcg. This allows
* using memcg's page ownership for attributing writeback IOs, and every
* memcg - blkcg combination can be served by its own wb by assigning a
* dedicated wb to each memcg, which enables isolation across different
* cgroups and propagation of IO back pressure down from the IO layer upto
* the tasks which are generating the dirty pages to be written back.
*
* A cgroup wb is indexed on its bdi by the ID of the associated memcg,
* refcounted with the number of inodes attached to it, and pins the memcg
* and the corresponding blkcg. As the corresponding blkcg for a memcg may
* change as blkcg is disabled and enabled higher up in the hierarchy, a wb
* is tested for blkcg after lookup and removed from index on mismatch so
* that a new wb for the combination can be created.
*
* Each bdi_writeback that is not embedded into the backing_dev_info must hold
* a reference to the parent backing_dev_info. See cgwb_create() for details.
*/
struct bdi_writeback {
struct backing_dev_info *bdi; /* our parent bdi */
unsigned long state; /* Always use atomic bitops on this */
unsigned long last_old_flush; /* last old data flush */
struct list_head b_dirty; /* dirty inodes */
struct list_head b_io; /* parked for writeback */
struct list_head b_more_io; /* parked for more writeback */
struct list_head b_dirty_time; /* time stamps are dirty */
spinlock_t list_lock; /* protects the b_* lists */
atomic_t writeback_inodes; /* number of inodes under writeback */
struct percpu_counter stat[NR_WB_STAT_ITEMS];
unsigned long bw_time_stamp; /* last time write bw is updated */
unsigned long dirtied_stamp;
unsigned long written_stamp; /* pages written at bw_time_stamp */
unsigned long write_bandwidth; /* the estimated write bandwidth */
unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */
/*
* The base dirty throttle rate, re-calculated on every 200ms.
* All the bdi tasks' dirty rate will be curbed under it.
* @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
* in small steps and is much more smooth/stable than the latter.
*/
unsigned long dirty_ratelimit;
unsigned long balanced_dirty_ratelimit;
struct fprop_local_percpu completions;
int dirty_exceeded;
enum wb_reason start_all_reason;
spinlock_t work_lock; /* protects work_list & dwork scheduling */
struct list_head work_list;
struct delayed_work dwork; /* work item used for writeback */
struct delayed_work bw_dwork; /* work item used for bandwidth estimate */
struct list_head bdi_node; /* anchored at bdi->wb_list */
#ifdef CONFIG_CGROUP_WRITEBACK
struct percpu_ref refcnt; /* used only for !root wb's */
struct fprop_local_percpu memcg_completions;
struct cgroup_subsys_state *memcg_css; /* the associated memcg */
struct cgroup_subsys_state *blkcg_css; /* and blkcg */
struct list_head memcg_node; /* anchored at memcg->cgwb_list */
struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */
struct list_head b_attached; /* attached inodes, protected by list_lock */
struct list_head offline_node; /* anchored at offline_cgwbs */
struct work_struct switch_work; /* work used to perform inode switching
* to this wb */
struct llist_head switch_wbs_ctxs; /* queued contexts for
* writeback switching */
union {
struct work_struct release_work;
struct rcu_head rcu;
};
#endif
};
struct backing_dev_info {
u64 id;
struct rb_node rb_node; /* keyed by ->id */
struct list_head bdi_list;
/* max readahead in PAGE_SIZE units */
unsigned long __data_racy ra_pages;
unsigned long io_pages; /* max allowed IO size */
struct kref refcnt; /* Reference counter for the structure */
unsigned int capabilities; /* Device capabilities */
unsigned int min_ratio;
unsigned int max_ratio, max_prop_frac;
/*
* Sum of avg_write_bw of wbs with dirty inodes. > 0 if there are
* any dirty wbs, which is depended upon by bdi_has_dirty().
*/
atomic_long_t tot_write_bandwidth;
/*
* Jiffies when last process was dirty throttled on this bdi. Used by
* blk-wbt.
*/
unsigned long last_bdp_sleep;
struct bdi_writeback wb; /* the root writeback info for this bdi */
struct list_head wb_list; /* list of all wbs */
#ifdef CONFIG_CGROUP_WRITEBACK
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */
struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
#endif
wait_queue_head_t wb_waitq;
struct device *dev;
char dev_name[64];
struct device *owner;
struct timer_list laptop_mode_wb_timer;
#ifdef CONFIG_DEBUG_FS
struct dentry *debug_dir;
#endif
};
struct wb_lock_cookie {
bool locked;
unsigned long flags;
};
#ifdef CONFIG_CGROUP_WRITEBACK
/**
* wb_tryget - try to increment a wb's refcount
* @wb: bdi_writeback to get
*/
static inline bool wb_tryget(struct bdi_writeback *wb)
{
if (wb != &wb->bdi->wb)
return percpu_ref_tryget(&wb->refcnt);
return true;
}
/**
* wb_get - increment a wb's refcount
* @wb: bdi_writeback to get
*/
static inline void wb_get(struct bdi_writeback *wb)
{
if (wb != &wb->bdi->wb)
percpu_ref_get(&wb->refcnt);
}
/**
* wb_put - decrement a wb's refcount
* @wb: bdi_writeback to put
* @nr: number of references to put
*/
static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
{
if (WARN_ON_ONCE(!wb->bdi)) {
/*
* A driver bug might cause a file to be removed before bdi was
* initialized.
*/
return;
}
if (wb != &wb->bdi->wb)
percpu_ref_put_many(&wb->refcnt, nr);
}
/**
* wb_put - decrement a wb's refcount
* @wb: bdi_writeback to put
*/
static inline void wb_put(struct bdi_writeback *wb)
{
wb_put_many(wb, 1);
}
/**
* wb_dying - is a wb dying?
* @wb: bdi_writeback of interest
*
* Returns whether @wb is unlinked and being drained.
*/
static inline bool wb_dying(struct bdi_writeback *wb)
{
return percpu_ref_is_dying(&wb->refcnt);
}
#else /* CONFIG_CGROUP_WRITEBACK */
static inline bool wb_tryget(struct bdi_writeback *wb)
{
return true;
}
static inline void wb_get(struct bdi_writeback *wb)
{
}
static inline void wb_put(struct bdi_writeback *wb)
{
}
static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
{
}
static inline bool wb_dying(struct bdi_writeback *wb)
{
return false;
}
#endif /* CONFIG_CGROUP_WRITEBACK */
#endif /* __LINUX_BACKING_DEV_DEFS_H */