1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-12 01:20:14 +00:00

f2fs-for-6.18-rc1

This release focuses on two primary updates for Android devices. First, it sets
 hash-based file name lookup as the default method to improve performance, while
 retaining an option to fall back to a linear lookup. Second, it resolves a
 persistent issue with the checkpoint=enable feature. The update further boosts
 performance by prefetching node blocks, merging FUA writes more efficiently, and
 optimizing block allocation policies.
 
 The release is rounded out by a comprehensive set of bug fixes that address
 memory safety, data integrity, and potential system hangs, along with minor
 documentation and code clean-ups.
 
 Enhancement:
  - add mount option and sysfs entry to tune the lookup mode
  - dump more information and add a timeout when enabling/disabling checkpoints
  - readahead node blocks in F2FS_GET_BLOCK_PRECACHE mode
  - merge FUA command with the existing writes
  - allocate HOT_DATA for IPU writes
  - Use allocate_section_policy to control write priority in multi-devices setups
  - add reserved nodes for privileged users
  - Add bggc_io_aware to adjust the priority of BG_GC when issuing IO
  - show the list of donation files
 
 Bug fix:
  - add missing dput() when printing the donation list
  - fix UAF issue in f2fs_merge_page_bio()
  - add sanity check on ei.len in __update_extent_tree_range()
  - fix infinite loop in __insert_extent_tree()
  - fix zero-sized extent for precache extents
  - fix to mitigate overhead of f2fs_zero_post_eof_page()
  - fix to avoid migrating empty section
  - fix to truncate first page in error path of f2fs_truncate()
  - fix to update map->m_next_extent correctly in f2fs_map_blocks()
  - fix wrong layout information on 16KB page
  - fix to do sanity check on node footer for non inode dnode
  - fix to avoid NULL pointer dereference in f2fs_check_quota_consistency()
  - fix to detect potential corrupted nid in free_nid_list
  - fix to clear unusable_cap for checkpoint=enable
  - fix to zero data after EOF for compressed file correctly
  - fix to avoid overflow while left shift operation
  - fix condition in __allow_reserved_blocks()
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE00UqedjCtOrGVvQiQBSofoJIUNIFAmjgDGsACgkQQBSofoJI
 UNICWQ//VJPl1HPhdvWB1QhGIL/kt0/9yxhmgdz3NAeU399NfE9rTvMQC9gunLV0
 EW0o0EUhI/nOM+m/bOKlqwvklYe6AcO4RglXDzE3eq13k3Z3g3phM+YUwXQib/m5
 jRcDWnHwSd9YY5iTHcJlxsVlWBe8nEQXJlHjo6+Iq70bLfT50hTiqPbgYwjoBy+B
 ISolj70XIFXlPsciG9AW7VOGjJBPMsNsRqrd08neYxVycIhC8rcolTLm+8hUQkLc
 9y/E+wYypYlaHrN8jBqYLNOXBffql+9qOFDKAXRwDvfVxt4nIlLUHzcLvtVLDGC3
 hTMPIcKm8D3EwqxY4SjpQH66EkC63XrquFm9zveU4ckJhs4++Kb9uwuKUofNhCWj
 8gw9OKafb8SSoBimjnCpQpXecvfwMbIoTUPJ5ytpNV+q27eBs+pe3lkDcA2O4Xdu
 SEMGeBlrxvOAgrRbnE65uIv/GjXcUK9LqXERuErjNs/YJOrj/ByDT2wJH5yqASwH
 9csO/3fKc91EAGy+Kd49z3E8S2wuoI+22noir/AB7WKyRg5ZO7q3ZiZxqsrc1iJN
 Z/gh0QrWVQVVnn23z8VPArQX2fMZQ8iOMvcM54G+05ipj3mUBNT5eZlyEPb3FcUe
 o4XvTtKkcFhEIawf+WgED07PBpdzz5w1f8hx3EWCLda0LacHILQ=
 =cIQy
 -----END PGP SIGNATURE-----

Merge tag 'f2fs-for-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "This focuses on two primary updates for Android devices.

  First, it sets hash-based file name lookup as the default method to
  improve performance, while retaining an option to fall back to a
  linear lookup.

  Second, it resolves a persistent issue with the 'checkpoint=enable'
  feature.

  The update further boosts performance by prefetching node blocks,
  merging FUA writes more efficiently, and optimizing block allocation
  policies.

  The release is rounded out by a comprehensive set of bug fixes that
  address memory safety, data integrity, and potential system hangs,
  along with minor documentation and code clean-ups.

  Enhancements:
   - add mount option and sysfs entry to tune the lookup mode
   - dump more information and add a timeout when enabling/disabling
     checkpoints
   - readahead node blocks in F2FS_GET_BLOCK_PRECACHE mode
   - merge FUA command with the existing writes
   - allocate HOT_DATA for IPU writes
   - Use allocate_section_policy to control write priority in
     multi-devices setups
   - add reserved nodes for privileged users
   - Add bggc_io_aware to adjust the priority of BG_GC when issuing IO
   - show the list of donation files

  Bug fixes:
   - add missing dput() when printing the donation list
   - fix UAF issue in f2fs_merge_page_bio()
   - add sanity check on ei.len in __update_extent_tree_range()
   - fix infinite loop in __insert_extent_tree()
   - fix zero-sized extent for precache extents
   - fix to mitigate overhead of f2fs_zero_post_eof_page()
   - fix to avoid migrating empty section
   - fix to truncate first page in error path of f2fs_truncate()
   - fix to update map->m_next_extent correctly in f2fs_map_blocks()
   - fix wrong layout information on 16KB page
   - fix to do sanity check on node footer for non inode dnode
   - fix to avoid NULL pointer dereference in
     f2fs_check_quota_consistency()
   - fix to detect potential corrupted nid in free_nid_list
   - fix to clear unusable_cap for checkpoint=enable
   - fix to zero data after EOF for compressed file correctly
   - fix to avoid overflow while left shift operation
   - fix condition in __allow_reserved_blocks()"

* tag 'f2fs-for-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (43 commits)
  f2fs: add missing dput() when printing the donation list
  f2fs: fix UAF issue in f2fs_merge_page_bio()
  f2fs: readahead node blocks in F2FS_GET_BLOCK_PRECACHE mode
  f2fs: add sanity check on ei.len in __update_extent_tree_range()
  f2fs: fix infinite loop in __insert_extent_tree()
  f2fs: fix zero-sized extent for precache extents
  f2fs: fix to mitigate overhead of f2fs_zero_post_eof_page()
  f2fs: fix to avoid migrating empty section
  f2fs: fix to truncate first page in error path of f2fs_truncate()
  f2fs: fix to update map->m_next_extent correctly in f2fs_map_blocks()
  f2fs: fix wrong layout information on 16KB page
  f2fs: clean up error handing of f2fs_submit_page_read()
  f2fs: avoid unnecessary folio_clear_uptodate() for cleanup
  f2fs: merge FUA command with the existing writes
  f2fs: allocate HOT_DATA for IPU writes
  f2fs: Use allocate_section_policy to control write priority in multi-devices setups
  Documentation: f2fs: Reword title
  Documentation: f2fs: Indent compression_mode option list
  Documentation: f2fs: Wrap snippets in literal code blocks
  Documentation: f2fs: Span write hint table section rows
  ...
This commit is contained in:
Linus Torvalds 2025-10-03 14:05:12 -07:00
commit 86d563ac5f
18 changed files with 697 additions and 199 deletions

View File

@ -822,8 +822,8 @@ What: /sys/fs/f2fs/<disk>/gc_valid_thresh_ratio
Date: September 2024
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: It controls the valid block ratio threshold not to trigger excessive GC
for zoned deivces. The initial value of it is 95(%). F2FS will stop the
background GC thread from intiating GC for sections having valid blocks
for zoned devices. The initial value of it is 95(%). F2FS will stop the
background GC thread from initiating GC for sections having valid blocks
exceeding the ratio.
What: /sys/fs/f2fs/<disk>/max_read_extent_count
@ -847,7 +847,7 @@ Description: For several zoned storage devices, vendors will provide extra space
filesystem level GC. To do that, we can reserve the space using
reserved_blocks. However, it is not enough, since this extra space should
not be shown to users. So, with this new sysfs node, we can hide the space
by substracting reserved_blocks from total bytes.
by subtracting reserved_blocks from total bytes.
What: /sys/fs/f2fs/<disk>/encoding_flags
Date: April 2025
@ -883,3 +883,53 @@ Date: June 2025
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy
Default: 1
What: /sys/fs/f2fs/<disk>/effective_lookup_mode
Date: August 2025
Contact: "Daniel Lee" <chullee@google.com>
Description:
This is a read-only entry to show the effective directory lookup mode
F2FS is currently using for casefolded directories.
This considers both the "lookup_mode" mount option and the on-disk
encoding flag, SB_ENC_NO_COMPAT_FALLBACK_FL.
Possible values are:
- "perf": Hash-only lookup.
- "compat": Hash-based lookup with a linear search fallback enabled
- "auto:perf": lookup_mode is auto and fallback is disabled on-disk
- "auto:compat": lookup_mode is auto and fallback is enabled on-disk
What: /sys/fs/f2fs/<disk>/bggc_io_aware
Date: August 2025
Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
Description: Used to adjust the BG_GC priority when pending IO, with a default value
of 0. Specifically, for ZUFS, the default value is 1.
================== ======================================================
value description
bggc_io_aware = 0 skip background GC if there is any kind of pending IO
bggc_io_aware = 1 skip background GC if there is pending read IO
bggc_io_aware = 2 don't aware IO for background GC
================== ======================================================
What: /sys/fs/f2fs/<disk>/allocate_section_hint
Date: August 2025
Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
Description: Indicates the hint section between the first device and others in multi-devices
setup. It defaults to the end of the first device in sections. For a single storage
device, it defaults to the total number of sections. It can be manually set to match
scenarios where multi-devices are mapped to the same dm device.
What: /sys/fs/f2fs/<disk>/allocate_section_policy
Date: August 2025
Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
Description: Controls write priority in multi-devices setups. A value of 0 means normal writing.
A value of 1 prioritizes writing to devices before the allocate_section_hint. A value of 2
prioritizes writing to devices after the allocate_section_hint. The default is 0.
=========================== ==========================================================
value description
allocate_section_policy = 0 Normal writing
allocate_section_policy = 1 Prioritize writing to section before allocate_section_hint
allocate_section_policy = 2 Prioritize writing to section after allocate_section_hint
=========================== ==========================================================

View File

@ -1,8 +1,11 @@
.. SPDX-License-Identifier: GPL-2.0
==========================================
WHAT IS Flash-Friendly File System (F2FS)?
==========================================
=================================
Flash-Friendly File System (F2FS)
=================================
Overview
========
NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have
been equipped on a variety systems ranging from mobile to server systems. Since
@ -173,9 +176,12 @@ data_flush Enable data flushing before checkpoint in order to
persist data of regular and symlink.
reserve_root=%d Support configuring reserved space which is used for
allocation from a privileged user with specified uid or
gid, unit: 4KB, the default limit is 0.2% of user blocks.
resuid=%d The user ID which may use the reserved blocks.
resgid=%d The group ID which may use the reserved blocks.
gid, unit: 4KB, the default limit is 12.5% of user blocks.
reserve_node=%d Support configuring reserved nodes which are used for
allocation from a privileged user with specified uid or
gid, the default limit is 12.5% of all nodes.
resuid=%d The user ID which may use the reserved blocks and nodes.
resgid=%d The group ID which may use the reserved blocks and nodes.
fault_injection=%d Enable fault injection in all supported types with
specified injection rate.
fault_type=%d Support configuring fault injection type, should be
@ -291,9 +297,13 @@ compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo"
"lz4", "zstd" and "lzo-rle" algorithm.
compress_algorithm=%s:%d Control compress algorithm and its compress level, now, only
"lz4" and "zstd" support compress level config.
========= ===========
algorithm level range
========= ===========
lz4 3 - 16
zstd 1 - 22
========= ===========
compress_log_size=%u Support configuring compress cluster size. The size will
be 4KB * (1 << %u). The default and minimum sizes are 16KB.
compress_extension=%s Support adding specified extension, so that f2fs can enable
@ -357,6 +367,7 @@ errors=%s Specify f2fs behavior on critical errors. This supports modes:
panic immediately, continue without doing anything, and remount
the partition in read-only mode. By default it uses "continue"
mode.
====================== =============== =============== ========
mode continue remount-ro panic
====================== =============== =============== ========
@ -370,6 +381,25 @@ errors=%s Specify f2fs behavior on critical errors. This supports modes:
====================== =============== =============== ========
nat_bits Enable nat_bits feature to enhance full/empty nat blocks access,
by default it's disabled.
lookup_mode=%s Control the directory lookup behavior for casefolded
directories. This option has no effect on directories
that do not have the casefold feature enabled.
================== ========================================
Value Description
================== ========================================
perf (Default) Enforces a hash-only lookup.
The linear search fallback is always
disabled, ignoring the on-disk flag.
compat Enables the linear search fallback for
compatibility with directory entries
created by older kernel that used a
different case-folding algorithm.
This mode ignores the on-disk flag.
auto F2FS determines the mode based on the
on-disk `SB_ENC_NO_COMPAT_FALLBACK_FL`
flag.
================== ========================================
======================== ============================================================
Debugfs Entries
@ -795,11 +825,13 @@ ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
extension list " "
-- buffered io
------------------------------------------------------------------
N/A COLD_DATA WRITE_LIFE_EXTREME
N/A HOT_DATA WRITE_LIFE_SHORT
N/A WARM_DATA WRITE_LIFE_NOT_SET
-- direct io
------------------------------------------------------------------
WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
@ -915,24 +947,26 @@ compression enabled files (refer to "Compression implementation" section for how
enable compression on a regular inode).
1) compress_mode=fs
This is the default option. f2fs does automatic compression in the writeback of the
compression enabled files.
This is the default option. f2fs does automatic compression in the writeback of the
compression enabled files.
2) compress_mode=user
This disables the automatic compression and gives the user discretion of choosing the
target file and the timing. The user can do manual compression/decompression on the
compression enabled files using F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE
ioctls like the below.
To decompress a file,
This disables the automatic compression and gives the user discretion of choosing the
target file and the timing. The user can do manual compression/decompression on the
compression enabled files using F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE
ioctls like the below.
fd = open(filename, O_WRONLY, 0);
ret = ioctl(fd, F2FS_IOC_DECOMPRESS_FILE);
To decompress a file::
To compress a file,
fd = open(filename, O_WRONLY, 0);
ret = ioctl(fd, F2FS_IOC_DECOMPRESS_FILE);
fd = open(filename, O_WRONLY, 0);
ret = ioctl(fd, F2FS_IOC_COMPRESS_FILE);
To compress a file::
fd = open(filename, O_WRONLY, 0);
ret = ioctl(fd, F2FS_IOC_COMPRESS_FILE);
NVMe Zoned Namespace devices
----------------------------
@ -962,32 +996,32 @@ reserved and used by another filesystem or for different purposes. Once that
external usage is complete, the device aliasing file can be deleted, releasing
the reserved space back to F2FS for its own use.
<use-case>
.. code-block::
# ls /dev/vd*
/dev/vdb (32GB) /dev/vdc (32GB)
# mkfs.ext4 /dev/vdc
# mkfs.f2fs -c /dev/vdc@vdc.file /dev/vdb
# mount /dev/vdb /mnt/f2fs
# ls -l /mnt/f2fs
vdc.file
# df -h
/dev/vdb 64G 33G 32G 52% /mnt/f2fs
# ls /dev/vd*
/dev/vdb (32GB) /dev/vdc (32GB)
# mkfs.ext4 /dev/vdc
# mkfs.f2fs -c /dev/vdc@vdc.file /dev/vdb
# mount /dev/vdb /mnt/f2fs
# ls -l /mnt/f2fs
vdc.file
# df -h
/dev/vdb 64G 33G 32G 52% /mnt/f2fs
# mount -o loop /dev/vdc /mnt/ext4
# df -h
/dev/vdb 64G 33G 32G 52% /mnt/f2fs
/dev/loop7 32G 24K 30G 1% /mnt/ext4
# umount /mnt/ext4
# mount -o loop /dev/vdc /mnt/ext4
# df -h
/dev/vdb 64G 33G 32G 52% /mnt/f2fs
/dev/loop7 32G 24K 30G 1% /mnt/ext4
# umount /mnt/ext4
# f2fs_io getflags /mnt/f2fs/vdc.file
get a flag on /mnt/f2fs/vdc.file ret=0, flags=nocow(pinned),immutable
# f2fs_io setflags noimmutable /mnt/f2fs/vdc.file
get a flag on noimmutable ret=0, flags=800010
set a flag on /mnt/f2fs/vdc.file ret=0, flags=noimmutable
# rm /mnt/f2fs/vdc.file
# df -h
/dev/vdb 64G 753M 64G 2% /mnt/f2fs
# f2fs_io getflags /mnt/f2fs/vdc.file
get a flag on /mnt/f2fs/vdc.file ret=0, flags=nocow(pinned),immutable
# f2fs_io setflags noimmutable /mnt/f2fs/vdc.file
get a flag on noimmutable ret=0, flags=800010
set a flag on /mnt/f2fs/vdc.file ret=0, flags=noimmutable
# rm /mnt/f2fs/vdc.file
# df -h
/dev/vdb 64G 753M 64G 2% /mnt/f2fs
So, the key idea is, user can do any file operations on /dev/vdc, and
reclaim the space after the use, while the space is counted as /data.

View File

@ -1442,6 +1442,34 @@ u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi)
return get_sectors_written(sbi->sb->s_bdev);
}
static inline void stat_cp_time(struct cp_control *cpc, enum cp_time type)
{
cpc->stats.times[type] = ktime_get();
}
static inline void check_cp_time(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
unsigned long long sb_diff, cur_diff;
enum cp_time ct;
sb_diff = (u64)ktime_ms_delta(sbi->cp_stats.times[CP_TIME_END],
sbi->cp_stats.times[CP_TIME_START]);
cur_diff = (u64)ktime_ms_delta(cpc->stats.times[CP_TIME_END],
cpc->stats.times[CP_TIME_START]);
if (cur_diff > sb_diff) {
sbi->cp_stats = cpc->stats;
if (cur_diff < CP_LONG_LATENCY_THRESHOLD)
return;
f2fs_warn(sbi, "checkpoint was blocked for %llu ms", cur_diff);
for (ct = CP_TIME_START; ct < CP_TIME_MAX - 1; ct++)
f2fs_warn(sbi, "Step#%d: %llu ms", ct,
(u64)ktime_ms_delta(cpc->stats.times[ct + 1],
cpc->stats.times[ct]));
}
}
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@ -1459,6 +1487,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Flush all the NAT/SIT pages */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
stat_cp_time(cpc, CP_TIME_SYNC_META);
/* start to update checkpoint, cp ver is already updated previously */
ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true));
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
@ -1555,20 +1585,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
stat_cp_time(cpc, CP_TIME_SYNC_CP_META);
/* Wait for all dirty meta pages to be submitted for IO */
f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
stat_cp_time(cpc, CP_TIME_WAIT_DIRTY_META);
/* wait for previous submitted meta pages writeback */
f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
stat_cp_time(cpc, CP_TIME_WAIT_CP_DATA);
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
if (err)
return err;
stat_cp_time(cpc, CP_TIME_FLUSH_DEVICE);
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
stat_cp_time(cpc, CP_TIME_WAIT_LAST_CP);
/*
* invalidate intermediate page cache borrowed from meta inode which are
@ -1613,6 +1649,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unsigned long long ckpt_ver;
int err = 0;
stat_cp_time(cpc, CP_TIME_START);
if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi))
return -EROFS;
@ -1624,6 +1662,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (cpc->reason != CP_RESIZE)
f2fs_down_write(&sbi->cp_global_sem);
stat_cp_time(cpc, CP_TIME_LOCK);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
@ -1639,6 +1679,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (err)
goto out;
stat_cp_time(cpc, CP_TIME_OP_LOCK);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
f2fs_flush_merged_writes(sbi);
@ -1678,6 +1720,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_sit_entries(sbi, cpc);
stat_cp_time(cpc, CP_TIME_FLUSH_META);
/* save inmem log status */
f2fs_save_inmem_curseg(sbi);
@ -1695,6 +1739,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
stat_inc_cp_count(sbi);
stop:
unblock_operations(sbi);
stat_cp_time(cpc, CP_TIME_END);
check_cp_time(sbi, cpc);
if (cpc->reason & CP_RECOVERY)
f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver);
@ -1778,6 +1824,7 @@ static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi)
llist_for_each_entry_safe(req, next, dispatch_list, llnode) {
diff = (u64)ktime_ms_delta(ktime_get(), req->queue_time);
req->ret = ret;
req->delta_time = diff;
complete(&req->wait);
sum_diff += diff;
@ -1873,6 +1920,12 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
else
flush_remained_ckpt_reqs(sbi, &req);
if (unlikely(req.delta_time >= CP_LONG_LATENCY_THRESHOLD)) {
f2fs_warn_ratelimited(sbi,
"blocked on checkpoint for %u ms", cprc->peak_time);
dump_stack();
}
return req.ret;
}

View File

@ -1215,9 +1215,11 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock)
{
void *fsdata = NULL;
struct page *pagep;
struct page **rpages;
int log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
pgoff_t start_idx = from >> (PAGE_SHIFT + log_cluster_size) <<
log_cluster_size;
int i;
int err;
err = f2fs_is_compressed_cluster(inode, start_idx);
@ -1238,27 +1240,30 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock)
if (err <= 0)
return err;
if (err > 0) {
struct page **rpages = fsdata;
int cluster_size = F2FS_I(inode)->i_cluster_size;
int i;
rpages = fsdata;
for (i = cluster_size - 1; i >= 0; i--) {
struct folio *folio = page_folio(rpages[i]);
loff_t start = folio->index << PAGE_SHIFT;
for (i = (1 << log_cluster_size) - 1; i >= 0; i--) {
struct folio *folio = page_folio(rpages[i]);
loff_t start = (loff_t)folio->index << PAGE_SHIFT;
loff_t offset = from > start ? from - start : 0;
if (from <= start) {
folio_zero_segment(folio, 0, folio_size(folio));
} else {
folio_zero_segment(folio, from - start,
folio_size(folio));
break;
}
}
folio_zero_segment(folio, offset, folio_size(folio));
f2fs_compress_write_end(inode, fsdata, start_idx, true);
if (from >= start)
break;
}
return 0;
f2fs_compress_write_end(inode, fsdata, start_idx, true);
err = filemap_write_and_wait_range(inode->i_mapping,
round_down(from, 1 << log_cluster_size << PAGE_SHIFT),
LLONG_MAX);
if (err)
return err;
truncate_pagecache(inode, from);
return f2fs_do_truncate_blocks(inode, round_up(from, PAGE_SIZE), lock);
}
static int f2fs_write_compressed_pages(struct compress_ctx *cc,

View File

@ -733,9 +733,11 @@ static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
static bool io_type_is_mergeable(struct f2fs_bio_info *io,
struct f2fs_io_info *fio)
{
blk_opf_t mask = ~(REQ_PREFLUSH | REQ_FUA);
if (io->fio.op != fio->op)
return false;
return io->fio.op_flags == fio->op_flags;
return (io->fio.op_flags & mask) == (fio->op_flags & mask);
}
static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
@ -911,7 +913,7 @@ alloc_new:
if (fio->io_wbc)
wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio));
inc_page_count(fio->sbi, WB_DATA_TYPE(data_folio, false));
inc_page_count(fio->sbi, WB_DATA_TYPE(folio, false));
*fio->last_block = fio->new_blkaddr;
*fio->bio = bio;
@ -1083,7 +1085,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
}
/* This can handle encryption stuffs */
static int f2fs_submit_page_read(struct inode *inode, struct folio *folio,
static void f2fs_submit_page_read(struct inode *inode, struct folio *folio,
block_t blkaddr, blk_opf_t op_flags,
bool for_write)
{
@ -1092,23 +1094,16 @@ static int f2fs_submit_page_read(struct inode *inode, struct folio *folio,
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
folio->index, for_write);
if (IS_ERR(bio))
return PTR_ERR(bio);
/* wait for GCed page writeback via META_MAPPING */
f2fs_wait_on_block_writeback(inode, blkaddr);
if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) {
iostat_update_and_unbind_ctx(bio);
if (bio->bi_private)
mempool_free(bio->bi_private, bio_post_read_ctx_pool);
bio_put(bio);
return -EFAULT;
}
if (!bio_add_folio(bio, folio, PAGE_SIZE, 0))
f2fs_bug_on(sbi, 1);
inc_page_count(sbi, F2FS_RD_DATA);
f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
f2fs_submit_read_bio(sbi, bio, DATA);
return 0;
}
static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
@ -1265,10 +1260,8 @@ got_it:
return folio;
}
err = f2fs_submit_page_read(inode, folio, dn.data_blkaddr,
f2fs_submit_page_read(inode, folio, dn.data_blkaddr,
op_flags, for_write);
if (err)
goto put_err;
return folio;
put_err:
@ -1572,6 +1565,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
pgofs = (pgoff_t)map->m_lblk;
end = pgofs + maxblocks;
if (flag == F2FS_GET_BLOCK_PRECACHE)
mode = LOOKUP_NODE_RA;
next_dnode:
if (map->m_may_create) {
if (f2fs_lfs_mode(sbi))
@ -1778,12 +1774,13 @@ sync_out:
if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk;
f2fs_update_read_extent_cache_range(&dn,
start_pgofs, map->m_pblk + ofs,
map->m_len - ofs);
if (map->m_len > ofs)
f2fs_update_read_extent_cache_range(&dn,
start_pgofs, map->m_pblk + ofs,
map->m_len - ofs);
}
if (map->m_next_extent)
*map->m_next_extent = pgofs + 1;
*map->m_next_extent = is_hole ? pgofs + 1 : pgofs;
}
f2fs_put_dnode(&dn);
unlock_out:
@ -2145,16 +2142,10 @@ submit_and_realloc:
f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
bio = NULL;
}
if (bio == NULL) {
if (bio == NULL)
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
f2fs_ra_op_flags(rac), index,
false);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
bio = NULL;
goto out;
}
}
/*
* If the page is under writeback, we need to wait for
@ -2303,18 +2294,10 @@ submit_and_realloc:
bio = NULL;
}
if (!bio) {
if (!bio)
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages - i,
f2fs_ra_op_flags(rac),
folio->index, for_write);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
f2fs_decompress_end_io(dic, ret, true);
f2fs_put_dnode(&dn);
*bio_ret = NULL;
return ret;
}
}
if (!bio_add_folio(bio, folio, blocksize, 0))
goto submit_and_realloc;
@ -3639,11 +3622,9 @@ repeat:
err = -EFSCORRUPTED;
goto put_folio;
}
err = f2fs_submit_page_read(use_cow ?
f2fs_submit_page_read(use_cow ?
F2FS_I(inode)->cow_inode : inode,
folio, blkaddr, 0, true);
if (err)
goto put_folio;
folio_lock(folio);
if (unlikely(folio->mapping != mapping)) {

View File

@ -16,6 +16,21 @@
#include "xattr.h"
#include <trace/events/f2fs.h>
static inline bool f2fs_should_fallback_to_linear(struct inode *dir)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
switch (F2FS_OPTION(sbi).lookup_mode) {
case LOOKUP_PERF:
return false;
case LOOKUP_COMPAT:
return true;
case LOOKUP_AUTO:
return !sb_no_casefold_compat_fallback(sbi->sb);
}
return false;
}
#if IS_ENABLED(CONFIG_UNICODE)
extern struct kmem_cache *f2fs_cf_name_slab;
#endif
@ -366,7 +381,7 @@ start_find_entry:
out:
#if IS_ENABLED(CONFIG_UNICODE)
if (!sb_no_casefold_compat_fallback(dir->i_sb) &&
if (f2fs_should_fallback_to_linear(dir) &&
IS_CASEFOLDED(dir) && !de && use_hash) {
use_hash = false;
goto start_find_entry;

View File

@ -604,7 +604,13 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
p = &(*p)->rb_right;
leftmost = false;
} else {
f2fs_err_ratelimited(sbi, "%s: corrupted extent, type: %d, "
"extent node in rb tree [%u, %u, %u], age [%llu, %llu], "
"extent node to insert [%u, %u, %u], age [%llu, %llu]",
__func__, et->type, en->ei.fofs, en->ei.blk, en->ei.len, en->ei.age,
en->ei.last_blocks, ei->fofs, ei->blk, ei->len, ei->age, ei->last_blocks);
f2fs_bug_on(sbi, 1);
return NULL;
}
}
@ -664,6 +670,15 @@ static void __update_extent_tree_range(struct inode *inode,
if (!et)
return;
if (unlikely(len == 0)) {
f2fs_err_ratelimited(sbi, "%s: extent len is zero, type: %d, "
"extent [%u, %u, %u], age [%llu, %llu]",
__func__, type, tei->fofs, tei->blk, tei->len,
tei->age, tei->last_blocks);
f2fs_bug_on(sbi, 1);
return;
}
if (type == EX_READ)
trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
tei->blk, 0);

View File

@ -131,6 +131,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
* string rather than using the MS_LAZYTIME flag, so this must remain.
*/
#define F2FS_MOUNT_LAZYTIME 0x40000000
#define F2FS_MOUNT_RESERVE_NODE 0x80000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@ -155,6 +156,18 @@ enum blkzone_allocation_policy {
BLKZONE_ALLOC_PRIOR_CONV, /* Prioritize writing to conventional zones */
};
enum bggc_io_aware_policy {
AWARE_ALL_IO, /* skip background GC if there is any kind of pending IO */
AWARE_READ_IO, /* skip background GC if there is pending read IO */
AWARE_NONE, /* don't aware IO for background GC */
};
enum device_allocation_policy {
ALLOCATE_FORWARD_NOHINT,
ALLOCATE_FORWARD_WITHIN_HINT,
ALLOCATE_FORWARD_FROM_HINT,
};
/*
* An implementation of an rwsem that is explicitly unfair to readers. This
* prevents priority inversion when a low-priority reader acquires the read lock
@ -172,6 +185,7 @@ struct f2fs_rwsem {
struct f2fs_mount_info {
unsigned int opt;
block_t root_reserved_blocks; /* root reserved blocks */
block_t root_reserved_nodes; /* root reserved nodes */
kuid_t s_resuid; /* reserved blocks for uid */
kgid_t s_resgid; /* reserved blocks for gid */
int active_logs; /* # of active logs */
@ -212,6 +226,7 @@ struct f2fs_mount_info {
int compress_mode; /* compression mode */
unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
unsigned int lookup_mode;
};
#define F2FS_FEATURE_ENCRYPT 0x00000001
@ -266,14 +281,36 @@ enum {
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
#define DEF_DISABLE_INTERVAL 5 /* 5 secs */
#define DEF_ENABLE_INTERVAL 16 /* 16 secs */
#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */
#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
enum cp_time {
CP_TIME_START, /* begin */
CP_TIME_LOCK, /* after cp_global_sem */
CP_TIME_OP_LOCK, /* after block_operation */
CP_TIME_FLUSH_META, /* after flush sit/nat */
CP_TIME_SYNC_META, /* after sync_meta_pages */
CP_TIME_SYNC_CP_META, /* after sync cp meta pages */
CP_TIME_WAIT_DIRTY_META,/* after wait on dirty meta */
CP_TIME_WAIT_CP_DATA, /* after wait on cp data */
CP_TIME_FLUSH_DEVICE, /* after flush device cache */
CP_TIME_WAIT_LAST_CP, /* after wait on last cp pack */
CP_TIME_END, /* after unblock_operation */
CP_TIME_MAX,
};
/* time cost stats of checkpoint */
struct cp_stats {
ktime_t times[CP_TIME_MAX];
};
struct cp_control {
int reason;
__u64 trim_start;
__u64 trim_end;
__u64 trim_minlen;
struct cp_stats stats;
};
/*
@ -334,7 +371,10 @@ struct ckpt_req {
struct completion wait; /* completion for checkpoint done */
struct llist_node llnode; /* llist_node to be linked in wait queue */
int ret; /* return code of checkpoint */
ktime_t queue_time; /* request queued time */
union {
ktime_t queue_time; /* request queued time */
ktime_t delta_time; /* time in queue */
};
};
struct ckpt_req_control {
@ -350,6 +390,9 @@ struct ckpt_req_control {
unsigned int peak_time; /* peak wait time in msec until now */
};
/* a time threshold that checkpoint was blocked for, unit: ms */
#define CP_LONG_LATENCY_THRESHOLD 5000
/* for the bitmap indicate blocks to be discarded */
struct discard_entry {
struct list_head list; /* list head */
@ -1375,6 +1418,7 @@ enum {
DISCARD_TIME,
GC_TIME,
DISABLE_TIME,
ENABLE_TIME,
UMOUNT_DISCARD_TIMEOUT,
MAX_TIME,
};
@ -1454,6 +1498,12 @@ enum {
TOTAL_CALL = FOREGROUND,
};
enum f2fs_lookup_mode {
LOOKUP_PERF,
LOOKUP_COMPAT,
LOOKUP_AUTO,
};
static inline int f2fs_test_bit(unsigned int nr, char *addr);
static inline void f2fs_set_bit(unsigned int nr, char *addr);
static inline void f2fs_clear_bit(unsigned int nr, char *addr);
@ -1643,6 +1693,7 @@ struct f2fs_sb_info {
unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
long interval_time[MAX_TIME]; /* to store thresholds */
struct ckpt_req_control cprc_info; /* for checkpoint request control */
struct cp_stats cp_stats; /* for time stat of checkpoint */
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
@ -1810,6 +1861,9 @@ struct f2fs_sb_info {
spinlock_t dev_lock; /* protect dirty_device */
bool aligned_blksize; /* all devices has the same logical blksize */
unsigned int first_seq_zone_segno; /* first segno in sequential zone */
unsigned int bggc_io_aware; /* For adjust the BG_GC priority when pending IO */
unsigned int allocate_section_hint; /* the boundary position between devices */
unsigned int allocate_section_policy; /* determine the section writing priority */
/* For write statistics */
u64 sectors_written_start;
@ -2362,13 +2416,11 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
return ofs == XATTR_NODE_OFFSET;
}
static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
static inline bool __allow_reserved_root(struct f2fs_sb_info *sbi,
struct inode *inode, bool cap)
{
if (!inode)
return true;
if (!test_opt(sbi, RESERVE_ROOT))
return false;
if (IS_NOQUOTA(inode))
return true;
if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid()))
@ -2389,7 +2441,7 @@ static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi,
avail_user_block_count = sbi->user_block_count -
sbi->current_reserved_blocks;
if (!__allow_reserved_blocks(sbi, inode, cap))
if (test_opt(sbi, RESERVE_ROOT) && !__allow_reserved_root(sbi, inode, cap))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
@ -2747,7 +2799,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
struct inode *inode, bool is_inode)
{
block_t valid_block_count;
unsigned int valid_node_count;
unsigned int valid_node_count, avail_user_node_count;
unsigned int avail_user_block_count;
int err;
@ -2769,15 +2821,20 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
valid_block_count = sbi->total_valid_block_count + 1;
avail_user_block_count = get_available_block_count(sbi, inode, false);
avail_user_block_count = get_available_block_count(sbi, inode,
test_opt(sbi, RESERVE_NODE));
if (unlikely(valid_block_count > avail_user_block_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
}
avail_user_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
if (test_opt(sbi, RESERVE_NODE) &&
!__allow_reserved_root(sbi, inode, true))
avail_user_node_count -= F2FS_OPTION(sbi).root_reserved_nodes;
valid_node_count = sbi->total_valid_node_count + 1;
if (unlikely(valid_node_count > sbi->total_node_count)) {
if (unlikely(valid_node_count > avail_user_node_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
}
@ -3004,13 +3061,10 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
if (sbi->gc_mode == GC_URGENT_HIGH)
return true;
if (zoned_gc) {
if (is_inflight_read_io(sbi))
return false;
} else {
if (is_inflight_io(sbi, type))
return false;
}
if (sbi->bggc_io_aware == AWARE_READ_IO && is_inflight_read_io(sbi))
return false;
if (sbi->bggc_io_aware == AWARE_ALL_IO && is_inflight_io(sbi, type))
return false;
if (sbi->gc_mode == GC_URGENT_MID)
return true;
@ -3770,6 +3824,7 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname);
* node.c
*/
struct node_info;
enum node_type;
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
@ -3792,7 +3847,8 @@ int f2fs_remove_inode_page(struct inode *inode);
struct folio *f2fs_new_inode_folio(struct inode *inode);
struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs);
void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid);
struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid,
enum node_type node_type);
struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino);
struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid);
int f2fs_move_node_folio(struct folio *node_folio, int gc_type);

View File

@ -35,15 +35,23 @@
#include <trace/events/f2fs.h>
#include <uapi/linux/f2fs.h>
static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size)
static void f2fs_zero_post_eof_page(struct inode *inode,
loff_t new_size, bool lock)
{
loff_t old_size = i_size_read(inode);
if (old_size >= new_size)
return;
if (mapping_empty(inode->i_mapping))
return;
if (lock)
filemap_invalidate_lock(inode->i_mapping);
/* zero or drop pages only in range of [old_size, new_size] */
truncate_pagecache(inode, old_size);
truncate_inode_pages_range(inode->i_mapping, old_size, new_size);
if (lock)
filemap_invalidate_unlock(inode->i_mapping);
}
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
@ -114,9 +122,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
filemap_invalidate_lock(inode->i_mapping);
f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT);
filemap_invalidate_unlock(inode->i_mapping);
f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true);
file_update_time(vmf->vma->vm_file);
filemap_invalidate_lock_shared(inode->i_mapping);
@ -904,8 +910,16 @@ int f2fs_truncate(struct inode *inode)
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
if (err) {
/*
* Always truncate page #0 to avoid page cache
* leak in evict() path.
*/
truncate_inode_pages_range(inode->i_mapping,
F2FS_BLK_TO_BYTES(0),
F2FS_BLK_END_BYTES(0));
return err;
}
}
err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
@ -1141,7 +1155,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
filemap_invalidate_lock(inode->i_mapping);
if (attr->ia_size > old_size)
f2fs_zero_post_eof_page(inode, attr->ia_size);
f2fs_zero_post_eof_page(inode, attr->ia_size, false);
truncate_setsize(inode, attr->ia_size);
if (attr->ia_size <= old_size)
@ -1260,9 +1274,7 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
filemap_invalidate_lock(inode->i_mapping);
f2fs_zero_post_eof_page(inode, offset + len);
filemap_invalidate_unlock(inode->i_mapping);
f2fs_zero_post_eof_page(inode, offset + len, true);
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@ -1547,7 +1559,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
f2fs_zero_post_eof_page(inode, offset + len);
f2fs_zero_post_eof_page(inode, offset + len, false);
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
@ -1670,9 +1682,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
filemap_invalidate_lock(mapping);
f2fs_zero_post_eof_page(inode, offset + len);
filemap_invalidate_unlock(mapping);
f2fs_zero_post_eof_page(inode, offset + len, true);
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@ -1806,7 +1816,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
f2fs_zero_post_eof_page(inode, offset + len);
f2fs_zero_post_eof_page(inode, offset + len, false);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
@ -1864,9 +1874,7 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
if (err)
return err;
filemap_invalidate_lock(inode->i_mapping);
f2fs_zero_post_eof_page(inode, offset + len);
filemap_invalidate_unlock(inode->i_mapping);
f2fs_zero_post_eof_page(inode, offset + len, true);
f2fs_balance_fs(sbi, true);
@ -4914,9 +4922,8 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
if (err)
return err;
filemap_invalidate_lock(inode->i_mapping);
f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from));
filemap_invalidate_unlock(inode->i_mapping);
f2fs_zero_post_eof_page(inode,
iocb->ki_pos + iov_iter_count(from), true);
return count;
}

View File

@ -1071,7 +1071,7 @@ next_step:
}
/* phase == 2 */
node_folio = f2fs_get_node_folio(sbi, nid);
node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
if (IS_ERR(node_folio))
continue;
@ -1145,7 +1145,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
nid = le32_to_cpu(sum->nid);
ofs_in_node = le16_to_cpu(sum->ofs_in_node);
node_folio = f2fs_get_node_folio(sbi, nid);
node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
if (IS_ERR(node_folio))
return false;
@ -1794,6 +1794,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
struct folio *sum_folio = filemap_get_folio(META_MAPPING(sbi),
GET_SUM_BLOCK(sbi, segno));
if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno))) {
f2fs_err(sbi, "%s: segment %u is used by log",
__func__, segno);
f2fs_bug_on(sbi, 1);
goto skip;
}
if (get_valid_blocks(sbi, segno, false) == 0)
goto freed;
if (gc_type == BG_GC && __is_large_section(sbi) &&
@ -1805,7 +1812,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
sum = folio_address(sum_folio);
if (type != GET_SUM_TYPE((&sum->footer))) {
f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SIT and SSA",
segno, type, GET_SUM_TYPE((&sum->footer)));
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_CORRUPTED_SUMMARY);
@ -2068,6 +2075,13 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
/*
* avoid migrating empty section, as it can be allocated by
* log in parallel.
*/
if (!get_valid_blocks(sbi, segno, true))
continue;
if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno)))
continue;
@ -2182,6 +2196,8 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
MAIN_SECS(sbi) += secs;
if (sbi->allocate_section_hint > MAIN_SECS(sbi))
sbi->allocate_section_hint = MAIN_SECS(sbi);
FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks);
@ -2189,6 +2205,9 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
if (f2fs_is_multi_device(sbi)) {
int last_dev = sbi->s_ndevs - 1;
sbi->allocate_section_hint = FDEV(0).total_segments /
SEGS_PER_SEC(sbi);
FDEV(last_dev).total_segments =
(int)FDEV(last_dev).total_segments + segs;
FDEV(last_dev).end_blk =

View File

@ -27,12 +27,17 @@ static struct kmem_cache *free_nid_slab;
static struct kmem_cache *nat_entry_set_slab;
static struct kmem_cache *fsync_node_entry_slab;
static inline bool is_invalid_nid(struct f2fs_sb_info *sbi, nid_t nid)
{
return nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid;
}
/*
* Check whether the given nid is within node id range.
*/
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
{
if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
if (unlikely(is_invalid_nid(sbi, nid))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.",
__func__, nid);
@ -871,7 +876,8 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
}
if (!done) {
nfolio[i] = f2fs_get_node_folio(sbi, nids[i]);
nfolio[i] = f2fs_get_node_folio(sbi, nids[i],
NODE_TYPE_NON_INODE);
if (IS_ERR(nfolio[i])) {
err = PTR_ERR(nfolio[i]);
f2fs_folio_put(nfolio[0], false);
@ -989,7 +995,7 @@ static int truncate_dnode(struct dnode_of_data *dn)
return 1;
/* get direct node */
folio = f2fs_get_node_folio(sbi, dn->nid);
folio = f2fs_get_node_folio(sbi, dn->nid, NODE_TYPE_NON_INODE);
if (PTR_ERR(folio) == -ENOENT)
return 1;
else if (IS_ERR(folio))
@ -1033,7 +1039,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
folio = f2fs_get_node_folio(F2FS_I_SB(dn->inode), dn->nid);
folio = f2fs_get_node_folio(F2FS_I_SB(dn->inode), dn->nid,
NODE_TYPE_NON_INODE);
if (IS_ERR(folio)) {
trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(folio));
return PTR_ERR(folio);
@ -1111,7 +1118,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
/* get indirect nodes in the path */
for (i = 0; i < idx + 1; i++) {
/* reference count'll be increased */
folios[i] = f2fs_get_node_folio(F2FS_I_SB(dn->inode), nid[i]);
folios[i] = f2fs_get_node_folio(F2FS_I_SB(dn->inode), nid[i],
NODE_TYPE_NON_INODE);
if (IS_ERR(folios[i])) {
err = PTR_ERR(folios[i]);
idx = i - 1;
@ -1496,21 +1504,37 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi,
struct folio *folio, pgoff_t nid,
enum node_type ntype)
{
if (unlikely(nid != nid_of_node(folio) ||
(ntype == NODE_TYPE_INODE && !IS_INODE(folio)) ||
(ntype == NODE_TYPE_XATTR &&
!f2fs_has_xattr_block(ofs_of_node(folio))) ||
time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) {
f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, "
"node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
ntype, nid, nid_of_node(folio), ino_of_node(folio),
ofs_of_node(folio), cpver_of_node(folio),
next_blkaddr_of_node(folio));
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
return -EFSCORRUPTED;
if (unlikely(nid != nid_of_node(folio)))
goto out_err;
switch (ntype) {
case NODE_TYPE_INODE:
if (!IS_INODE(folio))
goto out_err;
break;
case NODE_TYPE_XATTR:
if (!f2fs_has_xattr_block(ofs_of_node(folio)))
goto out_err;
break;
case NODE_TYPE_NON_INODE:
if (IS_INODE(folio))
goto out_err;
break;
default:
break;
}
if (time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))
goto out_err;
return 0;
out_err:
f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, "
"node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
ntype, nid, nid_of_node(folio), ino_of_node(folio),
ofs_of_node(folio), cpver_of_node(folio),
next_blkaddr_of_node(folio));
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
return -EFSCORRUPTED;
}
static struct folio *__get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid,
@ -1546,7 +1570,7 @@ repeat:
if (unlikely(!folio_test_uptodate(folio))) {
err = -EIO;
goto out_err;
goto out_put_err;
}
if (!f2fs_inode_chksum_verify(sbi, folio)) {
@ -1567,9 +1591,10 @@ out_put_err:
return ERR_PTR(err);
}
struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid)
struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid,
enum node_type node_type)
{
return __get_node_folio(sbi, nid, NULL, 0, NODE_TYPE_REGULAR);
return __get_node_folio(sbi, nid, NULL, 0, node_type);
}
struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino)
@ -2634,6 +2659,16 @@ retry:
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
i = list_first_entry(&nm_i->free_nid_list,
struct free_nid, list);
if (unlikely(is_invalid_nid(sbi, i->nid))) {
spin_unlock(&nm_i->nid_list_lock);
f2fs_err(sbi, "Corrupted nid %u in free_nid_list",
i->nid);
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_CORRUPTED_NID);
return false;
}
*nid = i->nid;
__move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);

View File

@ -57,6 +57,7 @@ enum node_type {
NODE_TYPE_REGULAR,
NODE_TYPE_INODE,
NODE_TYPE_XATTR,
NODE_TYPE_NON_INODE,
};
/*

View File

@ -548,7 +548,7 @@ got_it:
}
/* Get the node page */
node_folio = f2fs_get_node_folio(sbi, nid);
node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
if (IS_ERR(node_folio))
return PTR_ERR(node_folio);

View File

@ -2774,6 +2774,8 @@ static int get_new_segment(struct f2fs_sb_info *sbi,
unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
unsigned int alloc_policy = sbi->allocate_section_policy;
unsigned int alloc_hint = sbi->allocate_section_hint;
bool init = true;
int i;
int ret = 0;
@ -2807,6 +2809,21 @@ static int get_new_segment(struct f2fs_sb_info *sbi,
}
#endif
/*
* Prevent allocate_section_hint from exceeding MAIN_SECS()
* due to desynchronization.
*/
if (alloc_policy != ALLOCATE_FORWARD_NOHINT &&
alloc_hint > MAIN_SECS(sbi))
alloc_hint = MAIN_SECS(sbi);
if (alloc_policy == ALLOCATE_FORWARD_FROM_HINT &&
hint < alloc_hint)
hint = alloc_hint;
else if (alloc_policy == ALLOCATE_FORWARD_WITHIN_HINT &&
hint >= alloc_hint)
hint = 0;
find_other_zone:
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
@ -3672,7 +3689,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
f2fs_is_cow_file(inode))
f2fs_is_cow_file(inode) ||
is_inode_flag_set(inode, FI_NEED_IPU))
return CURSEG_HOT_DATA;
return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
inode->i_write_hint);
@ -3936,12 +3954,18 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
int seg_type = log_type_to_seg_type(type);
bool keep_order = (f2fs_lfs_mode(fio->sbi) &&
seg_type == CURSEG_COLD_DATA);
int err;
if (keep_order)
f2fs_down_read(&fio->sbi->io_order_lock);
if (f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio)) {
err = f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio);
if (unlikely(err)) {
f2fs_err_ratelimited(fio->sbi,
"%s Failed to allocate data block, ino:%u, index:%lu, type:%d, old_blkaddr:0x%x, new_blkaddr:0x%x, err:%d",
__func__, fio->ino, folio->index, type,
fio->old_blkaddr, fio->new_blkaddr, err);
if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host))
fscrypt_finalize_bounce_page(&fio->encrypted_page);
folio_end_writeback(folio);

View File

@ -600,6 +600,16 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
}
static inline unsigned int get_left_section_blocks(struct f2fs_sb_info *sbi,
enum log_type type, unsigned int segno)
{
if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
return CAP_BLKS_PER_SEC(sbi) - SEGS_TO_BLKS(sbi,
(segno - GET_START_SEG_FROM_SEC(sbi, segno))) -
CURSEG_I(sbi, type)->next_blkoff;
return CAP_BLKS_PER_SEC(sbi) - get_ckpt_valid_blocks(sbi, segno, true);
}
static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
unsigned int node_blocks, unsigned int data_blocks,
unsigned int dent_blocks)
@ -614,14 +624,7 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
if (unlikely(segno == NULL_SEGNO))
return false;
if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) {
left_blocks = CAP_BLKS_PER_SEC(sbi) -
SEGS_TO_BLKS(sbi, (segno - GET_START_SEG_FROM_SEC(sbi, segno))) -
CURSEG_I(sbi, i)->next_blkoff;
} else {
left_blocks = CAP_BLKS_PER_SEC(sbi) -
get_ckpt_valid_blocks(sbi, segno, true);
}
left_blocks = get_left_section_blocks(sbi, i, segno);
blocks = i <= CURSEG_COLD_DATA ? data_blocks : node_blocks;
if (blocks > left_blocks)
@ -634,14 +637,7 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
if (unlikely(segno == NULL_SEGNO))
return false;
if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) {
left_blocks = CAP_BLKS_PER_SEC(sbi) -
SEGS_TO_BLKS(sbi, (segno - GET_START_SEG_FROM_SEC(sbi, segno))) -
CURSEG_I(sbi, CURSEG_HOT_DATA)->next_blkoff;
} else {
left_blocks = CAP_BLKS_PER_SEC(sbi) -
get_ckpt_valid_blocks(sbi, segno, true);
}
left_blocks = get_left_section_blocks(sbi, CURSEG_HOT_DATA, segno);
if (dent_blocks > left_blocks)
return false;

View File

@ -143,6 +143,7 @@ enum {
Opt_extent_cache,
Opt_data_flush,
Opt_reserve_root,
Opt_reserve_node,
Opt_resgid,
Opt_resuid,
Opt_mode,
@ -181,6 +182,7 @@ enum {
Opt_nat_bits,
Opt_jqfmt,
Opt_checkpoint,
Opt_lookup_mode,
Opt_err,
};
@ -244,6 +246,13 @@ static const struct constant_table f2fs_param_errors[] = {
{}
};
static const struct constant_table f2fs_param_lookup_mode[] = {
{"perf", LOOKUP_PERF},
{"compat", LOOKUP_COMPAT},
{"auto", LOOKUP_AUTO},
{}
};
static const struct fs_parameter_spec f2fs_param_specs[] = {
fsparam_enum("background_gc", Opt_gc_background, f2fs_param_background_gc),
fsparam_flag("disable_roll_forward", Opt_disable_roll_forward),
@ -265,6 +274,7 @@ static const struct fs_parameter_spec f2fs_param_specs[] = {
fsparam_flag_no("extent_cache", Opt_extent_cache),
fsparam_flag("data_flush", Opt_data_flush),
fsparam_u32("reserve_root", Opt_reserve_root),
fsparam_u32("reserve_node", Opt_reserve_node),
fsparam_gid("resgid", Opt_resgid),
fsparam_uid("resuid", Opt_resuid),
fsparam_enum("mode", Opt_mode, f2fs_param_mode),
@ -300,6 +310,7 @@ static const struct fs_parameter_spec f2fs_param_specs[] = {
fsparam_enum("memory", Opt_memory_mode, f2fs_param_memory_mode),
fsparam_flag("age_extent_cache", Opt_age_extent_cache),
fsparam_enum("errors", Opt_errors, f2fs_param_errors),
fsparam_enum("lookup_mode", Opt_lookup_mode, f2fs_param_lookup_mode),
{}
};
@ -336,6 +347,8 @@ static match_table_t f2fs_checkpoint_tokens = {
#define F2FS_SPEC_discard_unit (1 << 21)
#define F2FS_SPEC_memory_mode (1 << 22)
#define F2FS_SPEC_errors (1 << 23)
#define F2FS_SPEC_lookup_mode (1 << 24)
#define F2FS_SPEC_reserve_node (1 << 25)
struct f2fs_fs_context {
struct f2fs_mount_info info;
@ -437,22 +450,30 @@ static void f2fs_destroy_casefold_cache(void) { }
static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
{
block_t limit = min((sbi->user_block_count >> 3),
block_t block_limit = min((sbi->user_block_count >> 3),
sbi->user_block_count - sbi->reserved_blocks);
block_t node_limit = sbi->total_node_count >> 3;
/* limit is 12.5% */
if (test_opt(sbi, RESERVE_ROOT) &&
F2FS_OPTION(sbi).root_reserved_blocks > limit) {
F2FS_OPTION(sbi).root_reserved_blocks = limit;
F2FS_OPTION(sbi).root_reserved_blocks > block_limit) {
F2FS_OPTION(sbi).root_reserved_blocks = block_limit;
f2fs_info(sbi, "Reduce reserved blocks for root = %u",
F2FS_OPTION(sbi).root_reserved_blocks);
}
if (!test_opt(sbi, RESERVE_ROOT) &&
if (test_opt(sbi, RESERVE_NODE) &&
F2FS_OPTION(sbi).root_reserved_nodes > node_limit) {
F2FS_OPTION(sbi).root_reserved_nodes = node_limit;
f2fs_info(sbi, "Reduce reserved nodes for root = %u",
F2FS_OPTION(sbi).root_reserved_nodes);
}
if (!test_opt(sbi, RESERVE_ROOT) && !test_opt(sbi, RESERVE_NODE) &&
(!uid_eq(F2FS_OPTION(sbi).s_resuid,
make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
!gid_eq(F2FS_OPTION(sbi).s_resgid,
make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root"
" and reserve_node",
from_kuid_munged(&init_user_ns,
F2FS_OPTION(sbi).s_resuid),
from_kgid_munged(&init_user_ns,
@ -847,6 +868,11 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32;
ctx->spec_mask |= F2FS_SPEC_reserve_root;
break;
case Opt_reserve_node:
ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_NODE);
F2FS_CTX_INFO(ctx).root_reserved_nodes = result.uint_32;
ctx->spec_mask |= F2FS_SPEC_reserve_node;
break;
case Opt_resuid:
F2FS_CTX_INFO(ctx).s_resuid = result.uid;
ctx->spec_mask |= F2FS_SPEC_resuid;
@ -994,6 +1020,10 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_enable:
F2FS_CTX_INFO(ctx).unusable_cap_perc = 0;
ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc;
F2FS_CTX_INFO(ctx).unusable_cap = 0;
ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap;
ctx_clear_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
default:
@ -1149,6 +1179,10 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_nat_bits:
ctx_set_opt(ctx, F2FS_MOUNT_NAT_BITS);
break;
case Opt_lookup_mode:
F2FS_CTX_INFO(ctx).lookup_mode = result.uint_32;
ctx->spec_mask |= F2FS_SPEC_lookup_mode;
break;
}
return 0;
}
@ -1191,7 +1225,11 @@ static int f2fs_check_quota_consistency(struct fs_context *fc,
goto err_jquota_change;
if (old_qname) {
if (strcmp(old_qname, new_qname) == 0) {
if (!new_qname) {
f2fs_info(sbi, "remove qf_name %s",
old_qname);
continue;
} else if (strcmp(old_qname, new_qname) == 0) {
ctx->qname_mask &= ~(1 << i);
continue;
}
@ -1430,6 +1468,14 @@ static int f2fs_check_opt_consistency(struct fs_context *fc,
ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT);
ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_ROOT;
}
if (test_opt(sbi, RESERVE_NODE) &&
(ctx->opt_mask & F2FS_MOUNT_RESERVE_NODE) &&
ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_NODE)) {
f2fs_info(sbi, "Preserve previous reserve_node=%u",
F2FS_OPTION(sbi).root_reserved_nodes);
ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_NODE);
ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_NODE;
}
err = f2fs_check_test_dummy_encryption(fc, sb);
if (err)
@ -1629,6 +1675,9 @@ static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb)
if (ctx->spec_mask & F2FS_SPEC_reserve_root)
F2FS_OPTION(sbi).root_reserved_blocks =
F2FS_CTX_INFO(ctx).root_reserved_blocks;
if (ctx->spec_mask & F2FS_SPEC_reserve_node)
F2FS_OPTION(sbi).root_reserved_nodes =
F2FS_CTX_INFO(ctx).root_reserved_nodes;
if (ctx->spec_mask & F2FS_SPEC_resgid)
F2FS_OPTION(sbi).s_resgid = F2FS_CTX_INFO(ctx).s_resgid;
if (ctx->spec_mask & F2FS_SPEC_resuid)
@ -1658,6 +1707,8 @@ static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb)
F2FS_OPTION(sbi).memory_mode = F2FS_CTX_INFO(ctx).memory_mode;
if (ctx->spec_mask & F2FS_SPEC_errors)
F2FS_OPTION(sbi).errors = F2FS_CTX_INFO(ctx).errors;
if (ctx->spec_mask & F2FS_SPEC_lookup_mode)
F2FS_OPTION(sbi).lookup_mode = F2FS_CTX_INFO(ctx).lookup_mode;
f2fs_apply_compression(fc, sb);
f2fs_apply_test_dummy_encryption(fc, sb);
@ -2349,9 +2400,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
seq_puts(seq, "fragment:block");
seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
if (test_opt(sbi, RESERVE_ROOT))
seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
if (test_opt(sbi, RESERVE_ROOT) || test_opt(sbi, RESERVE_NODE))
seq_printf(seq, ",reserve_root=%u,reserve_node=%u,resuid=%u,"
"resgid=%u",
F2FS_OPTION(sbi).root_reserved_blocks,
F2FS_OPTION(sbi).root_reserved_nodes,
from_kuid_munged(&init_user_ns,
F2FS_OPTION(sbi).s_resuid),
from_kgid_munged(&init_user_ns,
@ -2422,6 +2475,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(sbi, NAT_BITS))
seq_puts(seq, ",nat_bits");
if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_PERF)
seq_show_option(seq, "lookup_mode", "perf");
else if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_COMPAT)
seq_show_option(seq, "lookup_mode", "compat");
else if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_AUTO)
seq_show_option(seq, "lookup_mode", "auto");
return 0;
}
@ -2486,6 +2546,8 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount)
#endif
f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL);
F2FS_OPTION(sbi).lookup_mode = LOOKUP_PERF;
}
#ifdef CONFIG_QUOTA
@ -2566,21 +2628,39 @@ out_unlock:
restore_flag:
sbi->gc_mode = gc_mode;
sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
f2fs_info(sbi, "f2fs_disable_checkpoint() finish, err:%d", err);
return err;
}
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
{
int retry = DEFAULT_RETRY_IO_COUNT;
unsigned int nr_pages = get_pages(sbi, F2FS_DIRTY_DATA) / 16;
long long start, writeback, end;
f2fs_info(sbi, "f2fs_enable_checkpoint() starts, meta: %lld, node: %lld, data: %lld",
get_pages(sbi, F2FS_DIRTY_META),
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DATA));
f2fs_update_time(sbi, ENABLE_TIME);
start = ktime_get();
/* we should flush all the data to keep data consistency */
do {
sync_inodes_sb(sbi->sb);
while (get_pages(sbi, F2FS_DIRTY_DATA)) {
writeback_inodes_sb_nr(sbi->sb, nr_pages, WB_REASON_SYNC);
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
} while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
if (unlikely(retry < 0))
f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
if (f2fs_time_over(sbi, ENABLE_TIME))
break;
}
writeback = ktime_get();
sync_inodes_sb(sbi->sb);
if (unlikely(get_pages(sbi, F2FS_DIRTY_DATA)))
f2fs_warn(sbi, "checkpoint=enable has some unwritten data: %lld",
get_pages(sbi, F2FS_DIRTY_DATA));
f2fs_down_write(&sbi->gc_lock);
f2fs_dirty_to_prefree(sbi);
@ -2593,6 +2673,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
/* Let's ensure there's no pending checkpoint anymore */
f2fs_flush_ckpt_thread(sbi);
end = ktime_get();
f2fs_info(sbi, "f2fs_enable_checkpoint() finishes, writeback:%llu, sync:%llu",
ktime_ms_delta(writeback, start),
ktime_ms_delta(end, writeback));
}
static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
@ -4156,6 +4242,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->total_node_count = SEGS_TO_BLKS(sbi,
((le32_to_cpu(raw_super->segment_count_nat) / 2) *
NAT_ENTRY_PER_BLOCK));
sbi->allocate_section_hint = le32_to_cpu(raw_super->section_count);
sbi->allocate_section_policy = ALLOCATE_FORWARD_NOHINT;
F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino);
F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
@ -4179,6 +4267,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
sbi->interval_time[ENABLE_TIME] = DEF_ENABLE_INTERVAL;
sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] =
DEF_UMOUNT_DISCARD_TIMEOUT;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
@ -4637,9 +4726,11 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
sbi->aligned_blksize = true;
sbi->bggc_io_aware = AWARE_ALL_IO;
#ifdef CONFIG_BLK_DEV_ZONED
sbi->max_open_zones = UINT_MAX;
sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ;
sbi->bggc_io_aware = AWARE_READ_IO;
#endif
for (i = 0; i < max_devices; i++) {
@ -4667,6 +4758,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
SEGS_TO_BLKS(sbi,
FDEV(i).total_segments) - 1 +
le32_to_cpu(raw_super->segment0_blkaddr);
sbi->allocate_section_hint = FDEV(i).total_segments /
SEGS_PER_SEC(sbi);
} else {
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
FDEV(i).end_blk = FDEV(i).start_blk +

View File

@ -281,6 +281,22 @@ static ssize_t encoding_flags_show(struct f2fs_attr *a,
le16_to_cpu(F2FS_RAW_SUPER(sbi)->s_encoding_flags));
}
static ssize_t effective_lookup_mode_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
switch (F2FS_OPTION(sbi).lookup_mode) {
case LOOKUP_PERF:
return sysfs_emit(buf, "perf\n");
case LOOKUP_COMPAT:
return sysfs_emit(buf, "compat\n");
case LOOKUP_AUTO:
if (sb_no_casefold_compat_fallback(sbi->sb))
return sysfs_emit(buf, "auto:perf\n");
return sysfs_emit(buf, "auto:compat\n");
}
return 0;
}
static ssize_t mounted_time_sec_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@ -866,6 +882,27 @@ out:
return count;
}
if (!strcmp(a->attr.name, "bggc_io_aware")) {
if (t < AWARE_ALL_IO || t > AWARE_NONE)
return -EINVAL;
sbi->bggc_io_aware = t;
return count;
}
if (!strcmp(a->attr.name, "allocate_section_hint")) {
if (t < 0 || t > MAIN_SECS(sbi))
return -EINVAL;
sbi->allocate_section_hint = t;
return count;
}
if (!strcmp(a->attr.name, "allocate_section_policy")) {
if (t < ALLOCATE_FORWARD_NOHINT || t > ALLOCATE_FORWARD_FROM_HINT)
return -EINVAL;
sbi->allocate_section_policy = t;
return count;
}
*ui = (unsigned int)t;
return count;
@ -1138,6 +1175,8 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
F2FS_SBI_GENERAL_RW_ATTR(dir_level);
F2FS_SBI_GENERAL_RW_ATTR(allocate_section_hint);
F2FS_SBI_GENERAL_RW_ATTR(allocate_section_policy);
#ifdef CONFIG_F2FS_IOSTAT
F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
@ -1175,6 +1214,7 @@ F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy);
#endif
F2FS_SBI_GENERAL_RW_ATTR(carve_out);
F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section);
F2FS_SBI_GENERAL_RW_ATTR(bggc_io_aware);
/* STAT_INFO ATTR */
#ifdef CONFIG_F2FS_STAT_FS
@ -1211,6 +1251,7 @@ F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
F2FS_GENERAL_RO_ATTR(unusable);
F2FS_GENERAL_RO_ATTR(encoding);
F2FS_GENERAL_RO_ATTR(encoding_flags);
F2FS_GENERAL_RO_ATTR(effective_lookup_mode);
F2FS_GENERAL_RO_ATTR(mounted_time_sec);
F2FS_GENERAL_RO_ATTR(main_blkaddr);
F2FS_GENERAL_RO_ATTR(pending_discard);
@ -1303,6 +1344,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(discard_idle_interval),
ATTR_LIST(gc_idle_interval),
ATTR_LIST(umount_discard_timeout),
ATTR_LIST(bggc_io_aware),
#ifdef CONFIG_F2FS_IOSTAT
ATTR_LIST(iostat_enable),
ATTR_LIST(iostat_period_ms),
@ -1329,6 +1371,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(current_reserved_blocks),
ATTR_LIST(encoding),
ATTR_LIST(encoding_flags),
ATTR_LIST(effective_lookup_mode),
ATTR_LIST(mounted_time_sec),
#ifdef CONFIG_F2FS_STAT_FS
ATTR_LIST(cp_foreground_calls),
@ -1371,6 +1414,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(max_read_extent_count),
ATTR_LIST(carve_out),
ATTR_LIST(reserved_pin_section),
ATTR_LIST(allocate_section_hint),
ATTR_LIST(allocate_section_policy),
NULL,
};
ATTRIBUTE_GROUPS(f2fs);
@ -1723,12 +1768,15 @@ static int __maybe_unused disk_map_seq_show(struct seq_file *seq,
seq_printf(seq, " Main : 0x%010x (%10d)\n",
SM_I(sbi)->main_blkaddr,
le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main));
seq_printf(seq, " # of Sections : %12d\n",
le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count));
seq_printf(seq, " Block size : %12lu KB\n", F2FS_BLKSIZE >> 10);
seq_printf(seq, " Segment size : %12d MB\n",
(BLKS_PER_SEG(sbi) << (F2FS_BLKSIZE_BITS - 10)) >> 10);
seq_printf(seq, " Segs/Sections : %12d\n",
SEGS_PER_SEC(sbi));
seq_printf(seq, " Section size : %12d MB\n",
SEGS_PER_SEC(sbi) << 1);
(BLKS_PER_SEC(sbi) << (F2FS_BLKSIZE_BITS - 10)) >> 10);
seq_printf(seq, " # of Sections : %12d\n",
le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count));
if (!f2fs_is_multi_device(sbi))
return 0;
@ -1742,6 +1790,69 @@ static int __maybe_unused disk_map_seq_show(struct seq_file *seq,
return 0;
}
static int __maybe_unused donation_list_seq_show(struct seq_file *seq,
void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
struct f2fs_inode_info *fi;
struct dentry *dentry;
char *buf, *path;
int i;
buf = f2fs_getname(sbi);
if (!buf)
return 0;
seq_printf(seq, "Donation List\n");
seq_printf(seq, " # of files : %u\n", sbi->donate_files);
seq_printf(seq, " %-50s %10s %20s %20s %22s\n",
"File path", "Status", "Donation offset (kb)",
"Donation size (kb)", "File cached size (kb)");
seq_printf(seq, "---\n");
for (i = 0; i < sbi->donate_files; i++) {
spin_lock(&sbi->inode_lock[DONATE_INODE]);
if (list_empty(&sbi->inode_list[DONATE_INODE])) {
spin_unlock(&sbi->inode_lock[DONATE_INODE]);
break;
}
fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
struct f2fs_inode_info, gdonate_list);
list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[DONATE_INODE]);
if (!inode)
continue;
inode_lock_shared(inode);
dentry = d_find_alias(inode);
if (!dentry) {
path = NULL;
} else {
path = dentry_path_raw(dentry, buf, PATH_MAX);
if (IS_ERR(path))
goto next;
}
seq_printf(seq, " %-50s %10s %20llu %20llu %22llu\n",
path ? path : "<unlinked>",
is_inode_flag_set(inode, FI_DONATE_FINISHED) ?
"Evicted" : "Donated",
(loff_t)fi->donate_start << (PAGE_SHIFT - 10),
(loff_t)(fi->donate_end + 1) << (PAGE_SHIFT - 10),
(loff_t)inode->i_mapping->nrpages << (PAGE_SHIFT - 10));
next:
dput(dentry);
inode_unlock_shared(inode);
iput(inode);
}
f2fs_putname(buf);
return 0;
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
static int __maybe_unused inject_stats_seq_show(struct seq_file *seq,
void *offset)
@ -1851,6 +1962,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
discard_plist_seq_show, sb);
proc_create_single_data("disk_map", 0444, sbi->s_proc,
disk_map_seq_show, sb);
proc_create_single_data("donation_list", 0444, sbi->s_proc,
donation_list_seq_show, sb);
#ifdef CONFIG_F2FS_FAULT_INJECTION
proc_create_single_data("inject_stats", 0444, sbi->s_proc,
inject_stats_seq_show, sb);

View File

@ -79,6 +79,7 @@ enum stop_cp_reason {
STOP_CP_REASON_FLUSH_FAIL,
STOP_CP_REASON_NO_SEGMENT,
STOP_CP_REASON_CORRUPTED_FREE_BITMAP,
STOP_CP_REASON_CORRUPTED_NID,
STOP_CP_REASON_MAX,
};