mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-12 09:32:12 +00:00
This patch introduces dm-pcache, a new DM target that places a DAX-
capable persistent-memory device in front of any slower block device and
uses it as a high-throughput, low-latency cache.
Design highlights
-----------------
- DAX data path – data is copied directly between DRAM and the pmem
mapping, bypassing the block layer’s overhead.
- Segmented, crash-consistent layout
- all layout metadata are dual-replicated CRC-protected.
- atomic kset flushes; key replay on mount guarantees cache integrity
even after power loss.
- Striped multi-tree index
- Multi‑tree indexing for high parallelism.
- overlap-resolution logic ensures non-intersecting cached extents.
- Background services
- write-back worker flushes dirty keys in order, preserving backing-device
crash consistency. This is important for checkpoint in cloud storage.
- garbage collector reclaims clean segments when utilisation exceeds a
tunable threshold.
- Data integrity – optional CRC32 on cached payload; metadata always protected.
Comparison with existing block-level caches
---------------------------------------------------------------------------------------------------------------------------------
| Feature | pcache (this patch) | bcache | dm-writecache |
|----------------------------------|---------------------------------|------------------------------|---------------------------|
| pmem access method | DAX | bio (block I/O) | DAX |
| Write latency (4 K rand-write) | ~5 µs | ~20 µs | ~5 µs |
| Concurrency | multi subtree index | global index tree | single tree + wc_lock |
| IOPS (4K randwrite, 32 numjobs) | 2.1 M | 352 K | 283 K |
| Read-cache support | YES | YES | NO |
| Deployment | no re-format of backend | backend devices must be | no re-format of backend |
| | | reformatted | |
| Write-back ordering | log-structured; | no ordering guarantee | no ordering guarantee |
| | preserves app-IO-order | | |
| Data integrity checks | metadata + data CRC(optional) | metadata CRC only | none |
---------------------------------------------------------------------------------------------------------------------------------
Signed-off-by: Dongsheng Yang <dongsheng.yang@linux.dev>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
71 lines
2.2 KiB
C
71 lines
2.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
#ifndef _PCACHE_CACHE_DEV_H
|
|
#define _PCACHE_CACHE_DEV_H
|
|
|
|
#include <linux/device.h>
|
|
#include <linux/device-mapper.h>
|
|
|
|
#include "pcache_internal.h"
|
|
|
|
#define PCACHE_MAGIC 0x65B05EFA96C596EFULL
|
|
|
|
#define PCACHE_SB_OFF (4 * PCACHE_KB)
|
|
#define PCACHE_SB_SIZE (4 * PCACHE_KB)
|
|
|
|
#define PCACHE_CACHE_INFO_OFF (PCACHE_SB_OFF + PCACHE_SB_SIZE)
|
|
#define PCACHE_CACHE_INFO_SIZE (4 * PCACHE_KB)
|
|
|
|
#define PCACHE_CACHE_CTRL_OFF (PCACHE_CACHE_INFO_OFF + (PCACHE_CACHE_INFO_SIZE * PCACHE_META_INDEX_MAX))
|
|
#define PCACHE_CACHE_CTRL_SIZE (4 * PCACHE_KB)
|
|
|
|
#define PCACHE_SEGMENTS_OFF (PCACHE_CACHE_CTRL_OFF + PCACHE_CACHE_CTRL_SIZE)
|
|
#define PCACHE_SEG_INFO_SIZE (4 * PCACHE_KB)
|
|
|
|
#define PCACHE_CACHE_DEV_SIZE_MIN (512 * PCACHE_MB) /* 512 MB */
|
|
#define PCACHE_SEG_SIZE (16 * PCACHE_MB) /* Size of each PCACHE segment (16 MB) */
|
|
|
|
#define CACHE_DEV_SB(cache_dev) ((struct pcache_sb *)(cache_dev->mapping + PCACHE_SB_OFF))
|
|
#define CACHE_DEV_CACHE_INFO(cache_dev) ((void *)cache_dev->mapping + PCACHE_CACHE_INFO_OFF)
|
|
#define CACHE_DEV_CACHE_CTRL(cache_dev) ((void *)cache_dev->mapping + PCACHE_CACHE_CTRL_OFF)
|
|
#define CACHE_DEV_SEGMENTS(cache_dev) ((void *)cache_dev->mapping + PCACHE_SEGMENTS_OFF)
|
|
#define CACHE_DEV_SEGMENT(cache_dev, id) ((void *)CACHE_DEV_SEGMENTS(cache_dev) + (u64)id * PCACHE_SEG_SIZE)
|
|
|
|
/*
|
|
* PCACHE SB flags configured during formatting
|
|
*
|
|
* The PCACHE_SB_F_xxx flags define registration requirements based on cache_dev
|
|
* formatting. For a machine to register a cache_dev:
|
|
* - PCACHE_SB_F_BIGENDIAN: Requires a big-endian machine.
|
|
*/
|
|
#define PCACHE_SB_F_BIGENDIAN BIT(0)
|
|
|
|
struct pcache_sb {
|
|
__le32 crc;
|
|
__le32 flags;
|
|
__le64 magic;
|
|
|
|
__le32 seg_num;
|
|
};
|
|
|
|
struct pcache_cache_dev {
|
|
u32 sb_flags;
|
|
u32 seg_num;
|
|
void *mapping;
|
|
bool use_vmap;
|
|
|
|
struct dm_dev *dm_dev;
|
|
|
|
struct mutex seg_lock;
|
|
unsigned long *seg_bitmap;
|
|
};
|
|
|
|
struct dm_pcache;
|
|
int cache_dev_start(struct dm_pcache *pcache);
|
|
void cache_dev_stop(struct dm_pcache *pcache);
|
|
|
|
void cache_dev_zero_range(struct pcache_cache_dev *cache_dev, void *pos, u32 size);
|
|
|
|
int cache_dev_get_empty_segment_id(struct pcache_cache_dev *cache_dev, u32 *seg_id);
|
|
|
|
#endif /* _PCACHE_CACHE_DEV_H */
|