mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-12 01:20:14 +00:00
f2fs-for-6.18-rc1
This release focuses on two primary updates for Android devices. First, it sets
hash-based file name lookup as the default method to improve performance, while
retaining an option to fall back to a linear lookup. Second, it resolves a
persistent issue with the checkpoint=enable feature. The update further boosts
performance by prefetching node blocks, merging FUA writes more efficiently, and
optimizing block allocation policies.
The release is rounded out by a comprehensive set of bug fixes that address
memory safety, data integrity, and potential system hangs, along with minor
documentation and code clean-ups.
Enhancement:
- add mount option and sysfs entry to tune the lookup mode
- dump more information and add a timeout when enabling/disabling checkpoints
- readahead node blocks in F2FS_GET_BLOCK_PRECACHE mode
- merge FUA command with the existing writes
- allocate HOT_DATA for IPU writes
- Use allocate_section_policy to control write priority in multi-devices setups
- add reserved nodes for privileged users
- Add bggc_io_aware to adjust the priority of BG_GC when issuing IO
- show the list of donation files
Bug fix:
- add missing dput() when printing the donation list
- fix UAF issue in f2fs_merge_page_bio()
- add sanity check on ei.len in __update_extent_tree_range()
- fix infinite loop in __insert_extent_tree()
- fix zero-sized extent for precache extents
- fix to mitigate overhead of f2fs_zero_post_eof_page()
- fix to avoid migrating empty section
- fix to truncate first page in error path of f2fs_truncate()
- fix to update map->m_next_extent correctly in f2fs_map_blocks()
- fix wrong layout information on 16KB page
- fix to do sanity check on node footer for non inode dnode
- fix to avoid NULL pointer dereference in f2fs_check_quota_consistency()
- fix to detect potential corrupted nid in free_nid_list
- fix to clear unusable_cap for checkpoint=enable
- fix to zero data after EOF for compressed file correctly
- fix to avoid overflow while left shift operation
- fix condition in __allow_reserved_blocks()
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEE00UqedjCtOrGVvQiQBSofoJIUNIFAmjgDGsACgkQQBSofoJI
UNICWQ//VJPl1HPhdvWB1QhGIL/kt0/9yxhmgdz3NAeU399NfE9rTvMQC9gunLV0
EW0o0EUhI/nOM+m/bOKlqwvklYe6AcO4RglXDzE3eq13k3Z3g3phM+YUwXQib/m5
jRcDWnHwSd9YY5iTHcJlxsVlWBe8nEQXJlHjo6+Iq70bLfT50hTiqPbgYwjoBy+B
ISolj70XIFXlPsciG9AW7VOGjJBPMsNsRqrd08neYxVycIhC8rcolTLm+8hUQkLc
9y/E+wYypYlaHrN8jBqYLNOXBffql+9qOFDKAXRwDvfVxt4nIlLUHzcLvtVLDGC3
hTMPIcKm8D3EwqxY4SjpQH66EkC63XrquFm9zveU4ckJhs4++Kb9uwuKUofNhCWj
8gw9OKafb8SSoBimjnCpQpXecvfwMbIoTUPJ5ytpNV+q27eBs+pe3lkDcA2O4Xdu
SEMGeBlrxvOAgrRbnE65uIv/GjXcUK9LqXERuErjNs/YJOrj/ByDT2wJH5yqASwH
9csO/3fKc91EAGy+Kd49z3E8S2wuoI+22noir/AB7WKyRg5ZO7q3ZiZxqsrc1iJN
Z/gh0QrWVQVVnn23z8VPArQX2fMZQ8iOMvcM54G+05ipj3mUBNT5eZlyEPb3FcUe
o4XvTtKkcFhEIawf+WgED07PBpdzz5w1f8hx3EWCLda0LacHILQ=
=cIQy
-----END PGP SIGNATURE-----
Merge tag 'f2fs-for-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"This focuses on two primary updates for Android devices.
First, it sets hash-based file name lookup as the default method to
improve performance, while retaining an option to fall back to a
linear lookup.
Second, it resolves a persistent issue with the 'checkpoint=enable'
feature.
The update further boosts performance by prefetching node blocks,
merging FUA writes more efficiently, and optimizing block allocation
policies.
The release is rounded out by a comprehensive set of bug fixes that
address memory safety, data integrity, and potential system hangs,
along with minor documentation and code clean-ups.
Enhancements:
- add mount option and sysfs entry to tune the lookup mode
- dump more information and add a timeout when enabling/disabling
checkpoints
- readahead node blocks in F2FS_GET_BLOCK_PRECACHE mode
- merge FUA command with the existing writes
- allocate HOT_DATA for IPU writes
- Use allocate_section_policy to control write priority in
multi-devices setups
- add reserved nodes for privileged users
- Add bggc_io_aware to adjust the priority of BG_GC when issuing IO
- show the list of donation files
Bug fixes:
- add missing dput() when printing the donation list
- fix UAF issue in f2fs_merge_page_bio()
- add sanity check on ei.len in __update_extent_tree_range()
- fix infinite loop in __insert_extent_tree()
- fix zero-sized extent for precache extents
- fix to mitigate overhead of f2fs_zero_post_eof_page()
- fix to avoid migrating empty section
- fix to truncate first page in error path of f2fs_truncate()
- fix to update map->m_next_extent correctly in f2fs_map_blocks()
- fix wrong layout information on 16KB page
- fix to do sanity check on node footer for non inode dnode
- fix to avoid NULL pointer dereference in
f2fs_check_quota_consistency()
- fix to detect potential corrupted nid in free_nid_list
- fix to clear unusable_cap for checkpoint=enable
- fix to zero data after EOF for compressed file correctly
- fix to avoid overflow while left shift operation
- fix condition in __allow_reserved_blocks()"
* tag 'f2fs-for-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (43 commits)
f2fs: add missing dput() when printing the donation list
f2fs: fix UAF issue in f2fs_merge_page_bio()
f2fs: readahead node blocks in F2FS_GET_BLOCK_PRECACHE mode
f2fs: add sanity check on ei.len in __update_extent_tree_range()
f2fs: fix infinite loop in __insert_extent_tree()
f2fs: fix zero-sized extent for precache extents
f2fs: fix to mitigate overhead of f2fs_zero_post_eof_page()
f2fs: fix to avoid migrating empty section
f2fs: fix to truncate first page in error path of f2fs_truncate()
f2fs: fix to update map->m_next_extent correctly in f2fs_map_blocks()
f2fs: fix wrong layout information on 16KB page
f2fs: clean up error handing of f2fs_submit_page_read()
f2fs: avoid unnecessary folio_clear_uptodate() for cleanup
f2fs: merge FUA command with the existing writes
f2fs: allocate HOT_DATA for IPU writes
f2fs: Use allocate_section_policy to control write priority in multi-devices setups
Documentation: f2fs: Reword title
Documentation: f2fs: Indent compression_mode option list
Documentation: f2fs: Wrap snippets in literal code blocks
Documentation: f2fs: Span write hint table section rows
...
This commit is contained in:
commit
86d563ac5f
@ -822,8 +822,8 @@ What: /sys/fs/f2fs/<disk>/gc_valid_thresh_ratio
|
||||
Date: September 2024
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: It controls the valid block ratio threshold not to trigger excessive GC
|
||||
for zoned deivces. The initial value of it is 95(%). F2FS will stop the
|
||||
background GC thread from intiating GC for sections having valid blocks
|
||||
for zoned devices. The initial value of it is 95(%). F2FS will stop the
|
||||
background GC thread from initiating GC for sections having valid blocks
|
||||
exceeding the ratio.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/max_read_extent_count
|
||||
@ -847,7 +847,7 @@ Description: For several zoned storage devices, vendors will provide extra space
|
||||
filesystem level GC. To do that, we can reserve the space using
|
||||
reserved_blocks. However, it is not enough, since this extra space should
|
||||
not be shown to users. So, with this new sysfs node, we can hide the space
|
||||
by substracting reserved_blocks from total bytes.
|
||||
by subtracting reserved_blocks from total bytes.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/encoding_flags
|
||||
Date: April 2025
|
||||
@ -883,3 +883,53 @@ Date: June 2025
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy
|
||||
Default: 1
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/effective_lookup_mode
|
||||
Date: August 2025
|
||||
Contact: "Daniel Lee" <chullee@google.com>
|
||||
Description:
|
||||
This is a read-only entry to show the effective directory lookup mode
|
||||
F2FS is currently using for casefolded directories.
|
||||
This considers both the "lookup_mode" mount option and the on-disk
|
||||
encoding flag, SB_ENC_NO_COMPAT_FALLBACK_FL.
|
||||
|
||||
Possible values are:
|
||||
- "perf": Hash-only lookup.
|
||||
- "compat": Hash-based lookup with a linear search fallback enabled
|
||||
- "auto:perf": lookup_mode is auto and fallback is disabled on-disk
|
||||
- "auto:compat": lookup_mode is auto and fallback is enabled on-disk
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/bggc_io_aware
|
||||
Date: August 2025
|
||||
Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
|
||||
Description: Used to adjust the BG_GC priority when pending IO, with a default value
|
||||
of 0. Specifically, for ZUFS, the default value is 1.
|
||||
|
||||
================== ======================================================
|
||||
value description
|
||||
bggc_io_aware = 0 skip background GC if there is any kind of pending IO
|
||||
bggc_io_aware = 1 skip background GC if there is pending read IO
|
||||
bggc_io_aware = 2 don't aware IO for background GC
|
||||
================== ======================================================
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/allocate_section_hint
|
||||
Date: August 2025
|
||||
Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
|
||||
Description: Indicates the hint section between the first device and others in multi-devices
|
||||
setup. It defaults to the end of the first device in sections. For a single storage
|
||||
device, it defaults to the total number of sections. It can be manually set to match
|
||||
scenarios where multi-devices are mapped to the same dm device.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/allocate_section_policy
|
||||
Date: August 2025
|
||||
Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
|
||||
Description: Controls write priority in multi-devices setups. A value of 0 means normal writing.
|
||||
A value of 1 prioritizes writing to devices before the allocate_section_hint. A value of 2
|
||||
prioritizes writing to devices after the allocate_section_hint. The default is 0.
|
||||
|
||||
=========================== ==========================================================
|
||||
value description
|
||||
allocate_section_policy = 0 Normal writing
|
||||
allocate_section_policy = 1 Prioritize writing to section before allocate_section_hint
|
||||
allocate_section_policy = 2 Prioritize writing to section after allocate_section_hint
|
||||
=========================== ==========================================================
|
||||
|
||||
@ -1,8 +1,11 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==========================================
|
||||
WHAT IS Flash-Friendly File System (F2FS)?
|
||||
==========================================
|
||||
=================================
|
||||
Flash-Friendly File System (F2FS)
|
||||
=================================
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have
|
||||
been equipped on a variety systems ranging from mobile to server systems. Since
|
||||
@ -173,9 +176,12 @@ data_flush Enable data flushing before checkpoint in order to
|
||||
persist data of regular and symlink.
|
||||
reserve_root=%d Support configuring reserved space which is used for
|
||||
allocation from a privileged user with specified uid or
|
||||
gid, unit: 4KB, the default limit is 0.2% of user blocks.
|
||||
resuid=%d The user ID which may use the reserved blocks.
|
||||
resgid=%d The group ID which may use the reserved blocks.
|
||||
gid, unit: 4KB, the default limit is 12.5% of user blocks.
|
||||
reserve_node=%d Support configuring reserved nodes which are used for
|
||||
allocation from a privileged user with specified uid or
|
||||
gid, the default limit is 12.5% of all nodes.
|
||||
resuid=%d The user ID which may use the reserved blocks and nodes.
|
||||
resgid=%d The group ID which may use the reserved blocks and nodes.
|
||||
fault_injection=%d Enable fault injection in all supported types with
|
||||
specified injection rate.
|
||||
fault_type=%d Support configuring fault injection type, should be
|
||||
@ -291,9 +297,13 @@ compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo"
|
||||
"lz4", "zstd" and "lzo-rle" algorithm.
|
||||
compress_algorithm=%s:%d Control compress algorithm and its compress level, now, only
|
||||
"lz4" and "zstd" support compress level config.
|
||||
|
||||
========= ===========
|
||||
algorithm level range
|
||||
========= ===========
|
||||
lz4 3 - 16
|
||||
zstd 1 - 22
|
||||
========= ===========
|
||||
compress_log_size=%u Support configuring compress cluster size. The size will
|
||||
be 4KB * (1 << %u). The default and minimum sizes are 16KB.
|
||||
compress_extension=%s Support adding specified extension, so that f2fs can enable
|
||||
@ -357,6 +367,7 @@ errors=%s Specify f2fs behavior on critical errors. This supports modes:
|
||||
panic immediately, continue without doing anything, and remount
|
||||
the partition in read-only mode. By default it uses "continue"
|
||||
mode.
|
||||
|
||||
====================== =============== =============== ========
|
||||
mode continue remount-ro panic
|
||||
====================== =============== =============== ========
|
||||
@ -370,6 +381,25 @@ errors=%s Specify f2fs behavior on critical errors. This supports modes:
|
||||
====================== =============== =============== ========
|
||||
nat_bits Enable nat_bits feature to enhance full/empty nat blocks access,
|
||||
by default it's disabled.
|
||||
lookup_mode=%s Control the directory lookup behavior for casefolded
|
||||
directories. This option has no effect on directories
|
||||
that do not have the casefold feature enabled.
|
||||
|
||||
================== ========================================
|
||||
Value Description
|
||||
================== ========================================
|
||||
perf (Default) Enforces a hash-only lookup.
|
||||
The linear search fallback is always
|
||||
disabled, ignoring the on-disk flag.
|
||||
compat Enables the linear search fallback for
|
||||
compatibility with directory entries
|
||||
created by older kernel that used a
|
||||
different case-folding algorithm.
|
||||
This mode ignores the on-disk flag.
|
||||
auto F2FS determines the mode based on the
|
||||
on-disk `SB_ENC_NO_COMPAT_FALLBACK_FL`
|
||||
flag.
|
||||
================== ========================================
|
||||
======================== ============================================================
|
||||
|
||||
Debugfs Entries
|
||||
@ -795,11 +825,13 @@ ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
|
||||
extension list " "
|
||||
|
||||
-- buffered io
|
||||
------------------------------------------------------------------
|
||||
N/A COLD_DATA WRITE_LIFE_EXTREME
|
||||
N/A HOT_DATA WRITE_LIFE_SHORT
|
||||
N/A WARM_DATA WRITE_LIFE_NOT_SET
|
||||
|
||||
-- direct io
|
||||
------------------------------------------------------------------
|
||||
WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
|
||||
WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
|
||||
WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
|
||||
@ -915,24 +947,26 @@ compression enabled files (refer to "Compression implementation" section for how
|
||||
enable compression on a regular inode).
|
||||
|
||||
1) compress_mode=fs
|
||||
This is the default option. f2fs does automatic compression in the writeback of the
|
||||
compression enabled files.
|
||||
|
||||
This is the default option. f2fs does automatic compression in the writeback of the
|
||||
compression enabled files.
|
||||
|
||||
2) compress_mode=user
|
||||
This disables the automatic compression and gives the user discretion of choosing the
|
||||
target file and the timing. The user can do manual compression/decompression on the
|
||||
compression enabled files using F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE
|
||||
ioctls like the below.
|
||||
|
||||
To decompress a file,
|
||||
This disables the automatic compression and gives the user discretion of choosing the
|
||||
target file and the timing. The user can do manual compression/decompression on the
|
||||
compression enabled files using F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE
|
||||
ioctls like the below.
|
||||
|
||||
fd = open(filename, O_WRONLY, 0);
|
||||
ret = ioctl(fd, F2FS_IOC_DECOMPRESS_FILE);
|
||||
To decompress a file::
|
||||
|
||||
To compress a file,
|
||||
fd = open(filename, O_WRONLY, 0);
|
||||
ret = ioctl(fd, F2FS_IOC_DECOMPRESS_FILE);
|
||||
|
||||
fd = open(filename, O_WRONLY, 0);
|
||||
ret = ioctl(fd, F2FS_IOC_COMPRESS_FILE);
|
||||
To compress a file::
|
||||
|
||||
fd = open(filename, O_WRONLY, 0);
|
||||
ret = ioctl(fd, F2FS_IOC_COMPRESS_FILE);
|
||||
|
||||
NVMe Zoned Namespace devices
|
||||
----------------------------
|
||||
@ -962,32 +996,32 @@ reserved and used by another filesystem or for different purposes. Once that
|
||||
external usage is complete, the device aliasing file can be deleted, releasing
|
||||
the reserved space back to F2FS for its own use.
|
||||
|
||||
<use-case>
|
||||
.. code-block::
|
||||
|
||||
# ls /dev/vd*
|
||||
/dev/vdb (32GB) /dev/vdc (32GB)
|
||||
# mkfs.ext4 /dev/vdc
|
||||
# mkfs.f2fs -c /dev/vdc@vdc.file /dev/vdb
|
||||
# mount /dev/vdb /mnt/f2fs
|
||||
# ls -l /mnt/f2fs
|
||||
vdc.file
|
||||
# df -h
|
||||
/dev/vdb 64G 33G 32G 52% /mnt/f2fs
|
||||
# ls /dev/vd*
|
||||
/dev/vdb (32GB) /dev/vdc (32GB)
|
||||
# mkfs.ext4 /dev/vdc
|
||||
# mkfs.f2fs -c /dev/vdc@vdc.file /dev/vdb
|
||||
# mount /dev/vdb /mnt/f2fs
|
||||
# ls -l /mnt/f2fs
|
||||
vdc.file
|
||||
# df -h
|
||||
/dev/vdb 64G 33G 32G 52% /mnt/f2fs
|
||||
|
||||
# mount -o loop /dev/vdc /mnt/ext4
|
||||
# df -h
|
||||
/dev/vdb 64G 33G 32G 52% /mnt/f2fs
|
||||
/dev/loop7 32G 24K 30G 1% /mnt/ext4
|
||||
# umount /mnt/ext4
|
||||
# mount -o loop /dev/vdc /mnt/ext4
|
||||
# df -h
|
||||
/dev/vdb 64G 33G 32G 52% /mnt/f2fs
|
||||
/dev/loop7 32G 24K 30G 1% /mnt/ext4
|
||||
# umount /mnt/ext4
|
||||
|
||||
# f2fs_io getflags /mnt/f2fs/vdc.file
|
||||
get a flag on /mnt/f2fs/vdc.file ret=0, flags=nocow(pinned),immutable
|
||||
# f2fs_io setflags noimmutable /mnt/f2fs/vdc.file
|
||||
get a flag on noimmutable ret=0, flags=800010
|
||||
set a flag on /mnt/f2fs/vdc.file ret=0, flags=noimmutable
|
||||
# rm /mnt/f2fs/vdc.file
|
||||
# df -h
|
||||
/dev/vdb 64G 753M 64G 2% /mnt/f2fs
|
||||
# f2fs_io getflags /mnt/f2fs/vdc.file
|
||||
get a flag on /mnt/f2fs/vdc.file ret=0, flags=nocow(pinned),immutable
|
||||
# f2fs_io setflags noimmutable /mnt/f2fs/vdc.file
|
||||
get a flag on noimmutable ret=0, flags=800010
|
||||
set a flag on /mnt/f2fs/vdc.file ret=0, flags=noimmutable
|
||||
# rm /mnt/f2fs/vdc.file
|
||||
# df -h
|
||||
/dev/vdb 64G 753M 64G 2% /mnt/f2fs
|
||||
|
||||
So, the key idea is, user can do any file operations on /dev/vdc, and
|
||||
reclaim the space after the use, while the space is counted as /data.
|
||||
|
||||
@ -1442,6 +1442,34 @@ u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi)
|
||||
return get_sectors_written(sbi->sb->s_bdev);
|
||||
}
|
||||
|
||||
static inline void stat_cp_time(struct cp_control *cpc, enum cp_time type)
|
||||
{
|
||||
cpc->stats.times[type] = ktime_get();
|
||||
}
|
||||
|
||||
static inline void check_cp_time(struct f2fs_sb_info *sbi, struct cp_control *cpc)
|
||||
{
|
||||
unsigned long long sb_diff, cur_diff;
|
||||
enum cp_time ct;
|
||||
|
||||
sb_diff = (u64)ktime_ms_delta(sbi->cp_stats.times[CP_TIME_END],
|
||||
sbi->cp_stats.times[CP_TIME_START]);
|
||||
cur_diff = (u64)ktime_ms_delta(cpc->stats.times[CP_TIME_END],
|
||||
cpc->stats.times[CP_TIME_START]);
|
||||
|
||||
if (cur_diff > sb_diff) {
|
||||
sbi->cp_stats = cpc->stats;
|
||||
if (cur_diff < CP_LONG_LATENCY_THRESHOLD)
|
||||
return;
|
||||
|
||||
f2fs_warn(sbi, "checkpoint was blocked for %llu ms", cur_diff);
|
||||
for (ct = CP_TIME_START; ct < CP_TIME_MAX - 1; ct++)
|
||||
f2fs_warn(sbi, "Step#%d: %llu ms", ct,
|
||||
(u64)ktime_ms_delta(cpc->stats.times[ct + 1],
|
||||
cpc->stats.times[ct]));
|
||||
}
|
||||
}
|
||||
|
||||
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
|
||||
{
|
||||
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
|
||||
@ -1459,6 +1487,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
|
||||
/* Flush all the NAT/SIT pages */
|
||||
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
|
||||
|
||||
stat_cp_time(cpc, CP_TIME_SYNC_META);
|
||||
|
||||
/* start to update checkpoint, cp ver is already updated previously */
|
||||
ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true));
|
||||
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
|
||||
@ -1555,20 +1585,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
|
||||
|
||||
/* Here, we have one bio having CP pack except cp pack 2 page */
|
||||
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
|
||||
stat_cp_time(cpc, CP_TIME_SYNC_CP_META);
|
||||
|
||||
/* Wait for all dirty meta pages to be submitted for IO */
|
||||
f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
|
||||
stat_cp_time(cpc, CP_TIME_WAIT_DIRTY_META);
|
||||
|
||||
/* wait for previous submitted meta pages writeback */
|
||||
f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
|
||||
stat_cp_time(cpc, CP_TIME_WAIT_CP_DATA);
|
||||
|
||||
/* flush all device cache */
|
||||
err = f2fs_flush_device_cache(sbi);
|
||||
if (err)
|
||||
return err;
|
||||
stat_cp_time(cpc, CP_TIME_FLUSH_DEVICE);
|
||||
|
||||
/* barrier and flush checkpoint cp pack 2 page if it can */
|
||||
commit_checkpoint(sbi, ckpt, start_blk);
|
||||
f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
|
||||
stat_cp_time(cpc, CP_TIME_WAIT_LAST_CP);
|
||||
|
||||
/*
|
||||
* invalidate intermediate page cache borrowed from meta inode which are
|
||||
@ -1613,6 +1649,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
|
||||
unsigned long long ckpt_ver;
|
||||
int err = 0;
|
||||
|
||||
stat_cp_time(cpc, CP_TIME_START);
|
||||
|
||||
if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi))
|
||||
return -EROFS;
|
||||
|
||||
@ -1624,6 +1662,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
|
||||
if (cpc->reason != CP_RESIZE)
|
||||
f2fs_down_write(&sbi->cp_global_sem);
|
||||
|
||||
stat_cp_time(cpc, CP_TIME_LOCK);
|
||||
|
||||
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
|
||||
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
|
||||
((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
|
||||
@ -1639,6 +1679,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
stat_cp_time(cpc, CP_TIME_OP_LOCK);
|
||||
|
||||
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
|
||||
|
||||
f2fs_flush_merged_writes(sbi);
|
||||
@ -1678,6 +1720,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
|
||||
|
||||
f2fs_flush_sit_entries(sbi, cpc);
|
||||
|
||||
stat_cp_time(cpc, CP_TIME_FLUSH_META);
|
||||
|
||||
/* save inmem log status */
|
||||
f2fs_save_inmem_curseg(sbi);
|
||||
|
||||
@ -1695,6 +1739,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
|
||||
stat_inc_cp_count(sbi);
|
||||
stop:
|
||||
unblock_operations(sbi);
|
||||
stat_cp_time(cpc, CP_TIME_END);
|
||||
check_cp_time(sbi, cpc);
|
||||
|
||||
if (cpc->reason & CP_RECOVERY)
|
||||
f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver);
|
||||
@ -1778,6 +1824,7 @@ static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi)
|
||||
llist_for_each_entry_safe(req, next, dispatch_list, llnode) {
|
||||
diff = (u64)ktime_ms_delta(ktime_get(), req->queue_time);
|
||||
req->ret = ret;
|
||||
req->delta_time = diff;
|
||||
complete(&req->wait);
|
||||
|
||||
sum_diff += diff;
|
||||
@ -1873,6 +1920,12 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
|
||||
else
|
||||
flush_remained_ckpt_reqs(sbi, &req);
|
||||
|
||||
if (unlikely(req.delta_time >= CP_LONG_LATENCY_THRESHOLD)) {
|
||||
f2fs_warn_ratelimited(sbi,
|
||||
"blocked on checkpoint for %u ms", cprc->peak_time);
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
return req.ret;
|
||||
}
|
||||
|
||||
|
||||
@ -1215,9 +1215,11 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock)
|
||||
{
|
||||
void *fsdata = NULL;
|
||||
struct page *pagep;
|
||||
struct page **rpages;
|
||||
int log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
|
||||
pgoff_t start_idx = from >> (PAGE_SHIFT + log_cluster_size) <<
|
||||
log_cluster_size;
|
||||
int i;
|
||||
int err;
|
||||
|
||||
err = f2fs_is_compressed_cluster(inode, start_idx);
|
||||
@ -1238,27 +1240,30 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock)
|
||||
if (err <= 0)
|
||||
return err;
|
||||
|
||||
if (err > 0) {
|
||||
struct page **rpages = fsdata;
|
||||
int cluster_size = F2FS_I(inode)->i_cluster_size;
|
||||
int i;
|
||||
rpages = fsdata;
|
||||
|
||||
for (i = cluster_size - 1; i >= 0; i--) {
|
||||
struct folio *folio = page_folio(rpages[i]);
|
||||
loff_t start = folio->index << PAGE_SHIFT;
|
||||
for (i = (1 << log_cluster_size) - 1; i >= 0; i--) {
|
||||
struct folio *folio = page_folio(rpages[i]);
|
||||
loff_t start = (loff_t)folio->index << PAGE_SHIFT;
|
||||
loff_t offset = from > start ? from - start : 0;
|
||||
|
||||
if (from <= start) {
|
||||
folio_zero_segment(folio, 0, folio_size(folio));
|
||||
} else {
|
||||
folio_zero_segment(folio, from - start,
|
||||
folio_size(folio));
|
||||
break;
|
||||
}
|
||||
}
|
||||
folio_zero_segment(folio, offset, folio_size(folio));
|
||||
|
||||
f2fs_compress_write_end(inode, fsdata, start_idx, true);
|
||||
if (from >= start)
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
|
||||
f2fs_compress_write_end(inode, fsdata, start_idx, true);
|
||||
|
||||
err = filemap_write_and_wait_range(inode->i_mapping,
|
||||
round_down(from, 1 << log_cluster_size << PAGE_SHIFT),
|
||||
LLONG_MAX);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
truncate_pagecache(inode, from);
|
||||
|
||||
return f2fs_do_truncate_blocks(inode, round_up(from, PAGE_SIZE), lock);
|
||||
}
|
||||
|
||||
static int f2fs_write_compressed_pages(struct compress_ctx *cc,
|
||||
|
||||
@ -733,9 +733,11 @@ static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
|
||||
static bool io_type_is_mergeable(struct f2fs_bio_info *io,
|
||||
struct f2fs_io_info *fio)
|
||||
{
|
||||
blk_opf_t mask = ~(REQ_PREFLUSH | REQ_FUA);
|
||||
|
||||
if (io->fio.op != fio->op)
|
||||
return false;
|
||||
return io->fio.op_flags == fio->op_flags;
|
||||
return (io->fio.op_flags & mask) == (fio->op_flags & mask);
|
||||
}
|
||||
|
||||
static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
|
||||
@ -911,7 +913,7 @@ alloc_new:
|
||||
if (fio->io_wbc)
|
||||
wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio));
|
||||
|
||||
inc_page_count(fio->sbi, WB_DATA_TYPE(data_folio, false));
|
||||
inc_page_count(fio->sbi, WB_DATA_TYPE(folio, false));
|
||||
|
||||
*fio->last_block = fio->new_blkaddr;
|
||||
*fio->bio = bio;
|
||||
@ -1083,7 +1085,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
|
||||
}
|
||||
|
||||
/* This can handle encryption stuffs */
|
||||
static int f2fs_submit_page_read(struct inode *inode, struct folio *folio,
|
||||
static void f2fs_submit_page_read(struct inode *inode, struct folio *folio,
|
||||
block_t blkaddr, blk_opf_t op_flags,
|
||||
bool for_write)
|
||||
{
|
||||
@ -1092,23 +1094,16 @@ static int f2fs_submit_page_read(struct inode *inode, struct folio *folio,
|
||||
|
||||
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
|
||||
folio->index, for_write);
|
||||
if (IS_ERR(bio))
|
||||
return PTR_ERR(bio);
|
||||
|
||||
/* wait for GCed page writeback via META_MAPPING */
|
||||
f2fs_wait_on_block_writeback(inode, blkaddr);
|
||||
|
||||
if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) {
|
||||
iostat_update_and_unbind_ctx(bio);
|
||||
if (bio->bi_private)
|
||||
mempool_free(bio->bi_private, bio_post_read_ctx_pool);
|
||||
bio_put(bio);
|
||||
return -EFAULT;
|
||||
}
|
||||
if (!bio_add_folio(bio, folio, PAGE_SIZE, 0))
|
||||
f2fs_bug_on(sbi, 1);
|
||||
|
||||
inc_page_count(sbi, F2FS_RD_DATA);
|
||||
f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
|
||||
f2fs_submit_read_bio(sbi, bio, DATA);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
|
||||
@ -1265,10 +1260,8 @@ got_it:
|
||||
return folio;
|
||||
}
|
||||
|
||||
err = f2fs_submit_page_read(inode, folio, dn.data_blkaddr,
|
||||
f2fs_submit_page_read(inode, folio, dn.data_blkaddr,
|
||||
op_flags, for_write);
|
||||
if (err)
|
||||
goto put_err;
|
||||
return folio;
|
||||
|
||||
put_err:
|
||||
@ -1572,6 +1565,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
|
||||
pgofs = (pgoff_t)map->m_lblk;
|
||||
end = pgofs + maxblocks;
|
||||
|
||||
if (flag == F2FS_GET_BLOCK_PRECACHE)
|
||||
mode = LOOKUP_NODE_RA;
|
||||
|
||||
next_dnode:
|
||||
if (map->m_may_create) {
|
||||
if (f2fs_lfs_mode(sbi))
|
||||
@ -1778,12 +1774,13 @@ sync_out:
|
||||
if (map->m_flags & F2FS_MAP_MAPPED) {
|
||||
unsigned int ofs = start_pgofs - map->m_lblk;
|
||||
|
||||
f2fs_update_read_extent_cache_range(&dn,
|
||||
start_pgofs, map->m_pblk + ofs,
|
||||
map->m_len - ofs);
|
||||
if (map->m_len > ofs)
|
||||
f2fs_update_read_extent_cache_range(&dn,
|
||||
start_pgofs, map->m_pblk + ofs,
|
||||
map->m_len - ofs);
|
||||
}
|
||||
if (map->m_next_extent)
|
||||
*map->m_next_extent = pgofs + 1;
|
||||
*map->m_next_extent = is_hole ? pgofs + 1 : pgofs;
|
||||
}
|
||||
f2fs_put_dnode(&dn);
|
||||
unlock_out:
|
||||
@ -2145,16 +2142,10 @@ submit_and_realloc:
|
||||
f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
|
||||
bio = NULL;
|
||||
}
|
||||
if (bio == NULL) {
|
||||
if (bio == NULL)
|
||||
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
|
||||
f2fs_ra_op_flags(rac), index,
|
||||
false);
|
||||
if (IS_ERR(bio)) {
|
||||
ret = PTR_ERR(bio);
|
||||
bio = NULL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the page is under writeback, we need to wait for
|
||||
@ -2303,18 +2294,10 @@ submit_and_realloc:
|
||||
bio = NULL;
|
||||
}
|
||||
|
||||
if (!bio) {
|
||||
if (!bio)
|
||||
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages - i,
|
||||
f2fs_ra_op_flags(rac),
|
||||
folio->index, for_write);
|
||||
if (IS_ERR(bio)) {
|
||||
ret = PTR_ERR(bio);
|
||||
f2fs_decompress_end_io(dic, ret, true);
|
||||
f2fs_put_dnode(&dn);
|
||||
*bio_ret = NULL;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (!bio_add_folio(bio, folio, blocksize, 0))
|
||||
goto submit_and_realloc;
|
||||
@ -3639,11 +3622,9 @@ repeat:
|
||||
err = -EFSCORRUPTED;
|
||||
goto put_folio;
|
||||
}
|
||||
err = f2fs_submit_page_read(use_cow ?
|
||||
f2fs_submit_page_read(use_cow ?
|
||||
F2FS_I(inode)->cow_inode : inode,
|
||||
folio, blkaddr, 0, true);
|
||||
if (err)
|
||||
goto put_folio;
|
||||
|
||||
folio_lock(folio);
|
||||
if (unlikely(folio->mapping != mapping)) {
|
||||
|
||||
@ -16,6 +16,21 @@
|
||||
#include "xattr.h"
|
||||
#include <trace/events/f2fs.h>
|
||||
|
||||
static inline bool f2fs_should_fallback_to_linear(struct inode *dir)
|
||||
{
|
||||
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
|
||||
|
||||
switch (F2FS_OPTION(sbi).lookup_mode) {
|
||||
case LOOKUP_PERF:
|
||||
return false;
|
||||
case LOOKUP_COMPAT:
|
||||
return true;
|
||||
case LOOKUP_AUTO:
|
||||
return !sb_no_casefold_compat_fallback(sbi->sb);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_UNICODE)
|
||||
extern struct kmem_cache *f2fs_cf_name_slab;
|
||||
#endif
|
||||
@ -366,7 +381,7 @@ start_find_entry:
|
||||
|
||||
out:
|
||||
#if IS_ENABLED(CONFIG_UNICODE)
|
||||
if (!sb_no_casefold_compat_fallback(dir->i_sb) &&
|
||||
if (f2fs_should_fallback_to_linear(dir) &&
|
||||
IS_CASEFOLDED(dir) && !de && use_hash) {
|
||||
use_hash = false;
|
||||
goto start_find_entry;
|
||||
|
||||
@ -604,7 +604,13 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
|
||||
p = &(*p)->rb_right;
|
||||
leftmost = false;
|
||||
} else {
|
||||
f2fs_err_ratelimited(sbi, "%s: corrupted extent, type: %d, "
|
||||
"extent node in rb tree [%u, %u, %u], age [%llu, %llu], "
|
||||
"extent node to insert [%u, %u, %u], age [%llu, %llu]",
|
||||
__func__, et->type, en->ei.fofs, en->ei.blk, en->ei.len, en->ei.age,
|
||||
en->ei.last_blocks, ei->fofs, ei->blk, ei->len, ei->age, ei->last_blocks);
|
||||
f2fs_bug_on(sbi, 1);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -664,6 +670,15 @@ static void __update_extent_tree_range(struct inode *inode,
|
||||
if (!et)
|
||||
return;
|
||||
|
||||
if (unlikely(len == 0)) {
|
||||
f2fs_err_ratelimited(sbi, "%s: extent len is zero, type: %d, "
|
||||
"extent [%u, %u, %u], age [%llu, %llu]",
|
||||
__func__, type, tei->fofs, tei->blk, tei->len,
|
||||
tei->age, tei->last_blocks);
|
||||
f2fs_bug_on(sbi, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (type == EX_READ)
|
||||
trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
|
||||
tei->blk, 0);
|
||||
|
||||
@ -131,6 +131,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
|
||||
* string rather than using the MS_LAZYTIME flag, so this must remain.
|
||||
*/
|
||||
#define F2FS_MOUNT_LAZYTIME 0x40000000
|
||||
#define F2FS_MOUNT_RESERVE_NODE 0x80000000
|
||||
|
||||
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
|
||||
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
|
||||
@ -155,6 +156,18 @@ enum blkzone_allocation_policy {
|
||||
BLKZONE_ALLOC_PRIOR_CONV, /* Prioritize writing to conventional zones */
|
||||
};
|
||||
|
||||
enum bggc_io_aware_policy {
|
||||
AWARE_ALL_IO, /* skip background GC if there is any kind of pending IO */
|
||||
AWARE_READ_IO, /* skip background GC if there is pending read IO */
|
||||
AWARE_NONE, /* don't aware IO for background GC */
|
||||
};
|
||||
|
||||
enum device_allocation_policy {
|
||||
ALLOCATE_FORWARD_NOHINT,
|
||||
ALLOCATE_FORWARD_WITHIN_HINT,
|
||||
ALLOCATE_FORWARD_FROM_HINT,
|
||||
};
|
||||
|
||||
/*
|
||||
* An implementation of an rwsem that is explicitly unfair to readers. This
|
||||
* prevents priority inversion when a low-priority reader acquires the read lock
|
||||
@ -172,6 +185,7 @@ struct f2fs_rwsem {
|
||||
struct f2fs_mount_info {
|
||||
unsigned int opt;
|
||||
block_t root_reserved_blocks; /* root reserved blocks */
|
||||
block_t root_reserved_nodes; /* root reserved nodes */
|
||||
kuid_t s_resuid; /* reserved blocks for uid */
|
||||
kgid_t s_resgid; /* reserved blocks for gid */
|
||||
int active_logs; /* # of active logs */
|
||||
@ -212,6 +226,7 @@ struct f2fs_mount_info {
|
||||
int compress_mode; /* compression mode */
|
||||
unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
|
||||
unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
|
||||
unsigned int lookup_mode;
|
||||
};
|
||||
|
||||
#define F2FS_FEATURE_ENCRYPT 0x00000001
|
||||
@ -266,14 +281,36 @@ enum {
|
||||
#define DEF_CP_INTERVAL 60 /* 60 secs */
|
||||
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
|
||||
#define DEF_DISABLE_INTERVAL 5 /* 5 secs */
|
||||
#define DEF_ENABLE_INTERVAL 16 /* 16 secs */
|
||||
#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */
|
||||
#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
|
||||
|
||||
enum cp_time {
|
||||
CP_TIME_START, /* begin */
|
||||
CP_TIME_LOCK, /* after cp_global_sem */
|
||||
CP_TIME_OP_LOCK, /* after block_operation */
|
||||
CP_TIME_FLUSH_META, /* after flush sit/nat */
|
||||
CP_TIME_SYNC_META, /* after sync_meta_pages */
|
||||
CP_TIME_SYNC_CP_META, /* after sync cp meta pages */
|
||||
CP_TIME_WAIT_DIRTY_META,/* after wait on dirty meta */
|
||||
CP_TIME_WAIT_CP_DATA, /* after wait on cp data */
|
||||
CP_TIME_FLUSH_DEVICE, /* after flush device cache */
|
||||
CP_TIME_WAIT_LAST_CP, /* after wait on last cp pack */
|
||||
CP_TIME_END, /* after unblock_operation */
|
||||
CP_TIME_MAX,
|
||||
};
|
||||
|
||||
/* time cost stats of checkpoint */
|
||||
struct cp_stats {
|
||||
ktime_t times[CP_TIME_MAX];
|
||||
};
|
||||
|
||||
struct cp_control {
|
||||
int reason;
|
||||
__u64 trim_start;
|
||||
__u64 trim_end;
|
||||
__u64 trim_minlen;
|
||||
struct cp_stats stats;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -334,7 +371,10 @@ struct ckpt_req {
|
||||
struct completion wait; /* completion for checkpoint done */
|
||||
struct llist_node llnode; /* llist_node to be linked in wait queue */
|
||||
int ret; /* return code of checkpoint */
|
||||
ktime_t queue_time; /* request queued time */
|
||||
union {
|
||||
ktime_t queue_time; /* request queued time */
|
||||
ktime_t delta_time; /* time in queue */
|
||||
};
|
||||
};
|
||||
|
||||
struct ckpt_req_control {
|
||||
@ -350,6 +390,9 @@ struct ckpt_req_control {
|
||||
unsigned int peak_time; /* peak wait time in msec until now */
|
||||
};
|
||||
|
||||
/* a time threshold that checkpoint was blocked for, unit: ms */
|
||||
#define CP_LONG_LATENCY_THRESHOLD 5000
|
||||
|
||||
/* for the bitmap indicate blocks to be discarded */
|
||||
struct discard_entry {
|
||||
struct list_head list; /* list head */
|
||||
@ -1375,6 +1418,7 @@ enum {
|
||||
DISCARD_TIME,
|
||||
GC_TIME,
|
||||
DISABLE_TIME,
|
||||
ENABLE_TIME,
|
||||
UMOUNT_DISCARD_TIMEOUT,
|
||||
MAX_TIME,
|
||||
};
|
||||
@ -1454,6 +1498,12 @@ enum {
|
||||
TOTAL_CALL = FOREGROUND,
|
||||
};
|
||||
|
||||
enum f2fs_lookup_mode {
|
||||
LOOKUP_PERF,
|
||||
LOOKUP_COMPAT,
|
||||
LOOKUP_AUTO,
|
||||
};
|
||||
|
||||
static inline int f2fs_test_bit(unsigned int nr, char *addr);
|
||||
static inline void f2fs_set_bit(unsigned int nr, char *addr);
|
||||
static inline void f2fs_clear_bit(unsigned int nr, char *addr);
|
||||
@ -1643,6 +1693,7 @@ struct f2fs_sb_info {
|
||||
unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
|
||||
long interval_time[MAX_TIME]; /* to store thresholds */
|
||||
struct ckpt_req_control cprc_info; /* for checkpoint request control */
|
||||
struct cp_stats cp_stats; /* for time stat of checkpoint */
|
||||
|
||||
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
|
||||
|
||||
@ -1810,6 +1861,9 @@ struct f2fs_sb_info {
|
||||
spinlock_t dev_lock; /* protect dirty_device */
|
||||
bool aligned_blksize; /* all devices has the same logical blksize */
|
||||
unsigned int first_seq_zone_segno; /* first segno in sequential zone */
|
||||
unsigned int bggc_io_aware; /* For adjust the BG_GC priority when pending IO */
|
||||
unsigned int allocate_section_hint; /* the boundary position between devices */
|
||||
unsigned int allocate_section_policy; /* determine the section writing priority */
|
||||
|
||||
/* For write statistics */
|
||||
u64 sectors_written_start;
|
||||
@ -2362,13 +2416,11 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
|
||||
return ofs == XATTR_NODE_OFFSET;
|
||||
}
|
||||
|
||||
static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
|
||||
static inline bool __allow_reserved_root(struct f2fs_sb_info *sbi,
|
||||
struct inode *inode, bool cap)
|
||||
{
|
||||
if (!inode)
|
||||
return true;
|
||||
if (!test_opt(sbi, RESERVE_ROOT))
|
||||
return false;
|
||||
if (IS_NOQUOTA(inode))
|
||||
return true;
|
||||
if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid()))
|
||||
@ -2389,7 +2441,7 @@ static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi,
|
||||
avail_user_block_count = sbi->user_block_count -
|
||||
sbi->current_reserved_blocks;
|
||||
|
||||
if (!__allow_reserved_blocks(sbi, inode, cap))
|
||||
if (test_opt(sbi, RESERVE_ROOT) && !__allow_reserved_root(sbi, inode, cap))
|
||||
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
|
||||
|
||||
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
|
||||
@ -2747,7 +2799,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
|
||||
struct inode *inode, bool is_inode)
|
||||
{
|
||||
block_t valid_block_count;
|
||||
unsigned int valid_node_count;
|
||||
unsigned int valid_node_count, avail_user_node_count;
|
||||
unsigned int avail_user_block_count;
|
||||
int err;
|
||||
|
||||
@ -2769,15 +2821,20 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
|
||||
spin_lock(&sbi->stat_lock);
|
||||
|
||||
valid_block_count = sbi->total_valid_block_count + 1;
|
||||
avail_user_block_count = get_available_block_count(sbi, inode, false);
|
||||
avail_user_block_count = get_available_block_count(sbi, inode,
|
||||
test_opt(sbi, RESERVE_NODE));
|
||||
|
||||
if (unlikely(valid_block_count > avail_user_block_count)) {
|
||||
spin_unlock(&sbi->stat_lock);
|
||||
goto enospc;
|
||||
}
|
||||
|
||||
avail_user_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
|
||||
if (test_opt(sbi, RESERVE_NODE) &&
|
||||
!__allow_reserved_root(sbi, inode, true))
|
||||
avail_user_node_count -= F2FS_OPTION(sbi).root_reserved_nodes;
|
||||
valid_node_count = sbi->total_valid_node_count + 1;
|
||||
if (unlikely(valid_node_count > sbi->total_node_count)) {
|
||||
if (unlikely(valid_node_count > avail_user_node_count)) {
|
||||
spin_unlock(&sbi->stat_lock);
|
||||
goto enospc;
|
||||
}
|
||||
@ -3004,13 +3061,10 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
|
||||
if (sbi->gc_mode == GC_URGENT_HIGH)
|
||||
return true;
|
||||
|
||||
if (zoned_gc) {
|
||||
if (is_inflight_read_io(sbi))
|
||||
return false;
|
||||
} else {
|
||||
if (is_inflight_io(sbi, type))
|
||||
return false;
|
||||
}
|
||||
if (sbi->bggc_io_aware == AWARE_READ_IO && is_inflight_read_io(sbi))
|
||||
return false;
|
||||
if (sbi->bggc_io_aware == AWARE_ALL_IO && is_inflight_io(sbi, type))
|
||||
return false;
|
||||
|
||||
if (sbi->gc_mode == GC_URGENT_MID)
|
||||
return true;
|
||||
@ -3770,6 +3824,7 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname);
|
||||
* node.c
|
||||
*/
|
||||
struct node_info;
|
||||
enum node_type;
|
||||
|
||||
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
|
||||
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
|
||||
@ -3792,7 +3847,8 @@ int f2fs_remove_inode_page(struct inode *inode);
|
||||
struct folio *f2fs_new_inode_folio(struct inode *inode);
|
||||
struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs);
|
||||
void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
|
||||
struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid);
|
||||
struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid,
|
||||
enum node_type node_type);
|
||||
struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino);
|
||||
struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid);
|
||||
int f2fs_move_node_folio(struct folio *node_folio, int gc_type);
|
||||
|
||||
@ -35,15 +35,23 @@
|
||||
#include <trace/events/f2fs.h>
|
||||
#include <uapi/linux/f2fs.h>
|
||||
|
||||
static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size)
|
||||
static void f2fs_zero_post_eof_page(struct inode *inode,
|
||||
loff_t new_size, bool lock)
|
||||
{
|
||||
loff_t old_size = i_size_read(inode);
|
||||
|
||||
if (old_size >= new_size)
|
||||
return;
|
||||
|
||||
if (mapping_empty(inode->i_mapping))
|
||||
return;
|
||||
|
||||
if (lock)
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
/* zero or drop pages only in range of [old_size, new_size] */
|
||||
truncate_pagecache(inode, old_size);
|
||||
truncate_inode_pages_range(inode->i_mapping, old_size, new_size);
|
||||
if (lock)
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
}
|
||||
|
||||
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
|
||||
@ -114,9 +122,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
|
||||
|
||||
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
|
||||
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true);
|
||||
|
||||
file_update_time(vmf->vma->vm_file);
|
||||
filemap_invalidate_lock_shared(inode->i_mapping);
|
||||
@ -904,8 +910,16 @@ int f2fs_truncate(struct inode *inode)
|
||||
/* we should check inline_data size */
|
||||
if (!f2fs_may_inline_data(inode)) {
|
||||
err = f2fs_convert_inline_inode(inode);
|
||||
if (err)
|
||||
if (err) {
|
||||
/*
|
||||
* Always truncate page #0 to avoid page cache
|
||||
* leak in evict() path.
|
||||
*/
|
||||
truncate_inode_pages_range(inode->i_mapping,
|
||||
F2FS_BLK_TO_BYTES(0),
|
||||
F2FS_BLK_END_BYTES(0));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
|
||||
@ -1141,7 +1155,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
if (attr->ia_size > old_size)
|
||||
f2fs_zero_post_eof_page(inode, attr->ia_size);
|
||||
f2fs_zero_post_eof_page(inode, attr->ia_size, false);
|
||||
truncate_setsize(inode, attr->ia_size);
|
||||
|
||||
if (attr->ia_size <= old_size)
|
||||
@ -1260,9 +1274,7 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
f2fs_zero_post_eof_page(inode, offset + len);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
f2fs_zero_post_eof_page(inode, offset + len, true);
|
||||
|
||||
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
|
||||
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
|
||||
@ -1547,7 +1559,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
|
||||
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
f2fs_zero_post_eof_page(inode, offset + len);
|
||||
f2fs_zero_post_eof_page(inode, offset + len, false);
|
||||
|
||||
f2fs_lock_op(sbi);
|
||||
f2fs_drop_extent_tree(inode);
|
||||
@ -1670,9 +1682,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
filemap_invalidate_lock(mapping);
|
||||
f2fs_zero_post_eof_page(inode, offset + len);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
f2fs_zero_post_eof_page(inode, offset + len, true);
|
||||
|
||||
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
|
||||
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
|
||||
@ -1806,7 +1816,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
f2fs_zero_post_eof_page(inode, offset + len);
|
||||
f2fs_zero_post_eof_page(inode, offset + len, false);
|
||||
truncate_pagecache(inode, offset);
|
||||
|
||||
while (!ret && idx > pg_start) {
|
||||
@ -1864,9 +1874,7 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
f2fs_zero_post_eof_page(inode, offset + len);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
f2fs_zero_post_eof_page(inode, offset + len, true);
|
||||
|
||||
f2fs_balance_fs(sbi, true);
|
||||
|
||||
@ -4914,9 +4922,8 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from));
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
f2fs_zero_post_eof_page(inode,
|
||||
iocb->ki_pos + iov_iter_count(from), true);
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
25
fs/f2fs/gc.c
25
fs/f2fs/gc.c
@ -1071,7 +1071,7 @@ next_step:
|
||||
}
|
||||
|
||||
/* phase == 2 */
|
||||
node_folio = f2fs_get_node_folio(sbi, nid);
|
||||
node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
|
||||
if (IS_ERR(node_folio))
|
||||
continue;
|
||||
|
||||
@ -1145,7 +1145,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
|
||||
nid = le32_to_cpu(sum->nid);
|
||||
ofs_in_node = le16_to_cpu(sum->ofs_in_node);
|
||||
|
||||
node_folio = f2fs_get_node_folio(sbi, nid);
|
||||
node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
|
||||
if (IS_ERR(node_folio))
|
||||
return false;
|
||||
|
||||
@ -1794,6 +1794,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
|
||||
struct folio *sum_folio = filemap_get_folio(META_MAPPING(sbi),
|
||||
GET_SUM_BLOCK(sbi, segno));
|
||||
|
||||
if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno))) {
|
||||
f2fs_err(sbi, "%s: segment %u is used by log",
|
||||
__func__, segno);
|
||||
f2fs_bug_on(sbi, 1);
|
||||
goto skip;
|
||||
}
|
||||
|
||||
if (get_valid_blocks(sbi, segno, false) == 0)
|
||||
goto freed;
|
||||
if (gc_type == BG_GC && __is_large_section(sbi) &&
|
||||
@ -1805,7 +1812,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
|
||||
|
||||
sum = folio_address(sum_folio);
|
||||
if (type != GET_SUM_TYPE((&sum->footer))) {
|
||||
f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
|
||||
f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SIT and SSA",
|
||||
segno, type, GET_SUM_TYPE((&sum->footer)));
|
||||
f2fs_stop_checkpoint(sbi, false,
|
||||
STOP_CP_REASON_CORRUPTED_SUMMARY);
|
||||
@ -2068,6 +2075,13 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi,
|
||||
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
|
||||
};
|
||||
|
||||
/*
|
||||
* avoid migrating empty section, as it can be allocated by
|
||||
* log in parallel.
|
||||
*/
|
||||
if (!get_valid_blocks(sbi, segno, true))
|
||||
continue;
|
||||
|
||||
if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno)))
|
||||
continue;
|
||||
|
||||
@ -2182,6 +2196,8 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
|
||||
SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
|
||||
MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
|
||||
MAIN_SECS(sbi) += secs;
|
||||
if (sbi->allocate_section_hint > MAIN_SECS(sbi))
|
||||
sbi->allocate_section_hint = MAIN_SECS(sbi);
|
||||
FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
|
||||
FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
|
||||
F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks);
|
||||
@ -2189,6 +2205,9 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
|
||||
if (f2fs_is_multi_device(sbi)) {
|
||||
int last_dev = sbi->s_ndevs - 1;
|
||||
|
||||
sbi->allocate_section_hint = FDEV(0).total_segments /
|
||||
SEGS_PER_SEC(sbi);
|
||||
|
||||
FDEV(last_dev).total_segments =
|
||||
(int)FDEV(last_dev).total_segments + segs;
|
||||
FDEV(last_dev).end_blk =
|
||||
|
||||
@ -27,12 +27,17 @@ static struct kmem_cache *free_nid_slab;
|
||||
static struct kmem_cache *nat_entry_set_slab;
|
||||
static struct kmem_cache *fsync_node_entry_slab;
|
||||
|
||||
static inline bool is_invalid_nid(struct f2fs_sb_info *sbi, nid_t nid)
|
||||
{
|
||||
return nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether the given nid is within node id range.
|
||||
*/
|
||||
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
|
||||
{
|
||||
if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
|
||||
if (unlikely(is_invalid_nid(sbi, nid))) {
|
||||
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
||||
f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.",
|
||||
__func__, nid);
|
||||
@ -871,7 +876,8 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
|
||||
}
|
||||
|
||||
if (!done) {
|
||||
nfolio[i] = f2fs_get_node_folio(sbi, nids[i]);
|
||||
nfolio[i] = f2fs_get_node_folio(sbi, nids[i],
|
||||
NODE_TYPE_NON_INODE);
|
||||
if (IS_ERR(nfolio[i])) {
|
||||
err = PTR_ERR(nfolio[i]);
|
||||
f2fs_folio_put(nfolio[0], false);
|
||||
@ -989,7 +995,7 @@ static int truncate_dnode(struct dnode_of_data *dn)
|
||||
return 1;
|
||||
|
||||
/* get direct node */
|
||||
folio = f2fs_get_node_folio(sbi, dn->nid);
|
||||
folio = f2fs_get_node_folio(sbi, dn->nid, NODE_TYPE_NON_INODE);
|
||||
if (PTR_ERR(folio) == -ENOENT)
|
||||
return 1;
|
||||
else if (IS_ERR(folio))
|
||||
@ -1033,7 +1039,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
|
||||
|
||||
trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
|
||||
|
||||
folio = f2fs_get_node_folio(F2FS_I_SB(dn->inode), dn->nid);
|
||||
folio = f2fs_get_node_folio(F2FS_I_SB(dn->inode), dn->nid,
|
||||
NODE_TYPE_NON_INODE);
|
||||
if (IS_ERR(folio)) {
|
||||
trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(folio));
|
||||
return PTR_ERR(folio);
|
||||
@ -1111,7 +1118,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
|
||||
/* get indirect nodes in the path */
|
||||
for (i = 0; i < idx + 1; i++) {
|
||||
/* reference count'll be increased */
|
||||
folios[i] = f2fs_get_node_folio(F2FS_I_SB(dn->inode), nid[i]);
|
||||
folios[i] = f2fs_get_node_folio(F2FS_I_SB(dn->inode), nid[i],
|
||||
NODE_TYPE_NON_INODE);
|
||||
if (IS_ERR(folios[i])) {
|
||||
err = PTR_ERR(folios[i]);
|
||||
idx = i - 1;
|
||||
@ -1496,21 +1504,37 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi,
|
||||
struct folio *folio, pgoff_t nid,
|
||||
enum node_type ntype)
|
||||
{
|
||||
if (unlikely(nid != nid_of_node(folio) ||
|
||||
(ntype == NODE_TYPE_INODE && !IS_INODE(folio)) ||
|
||||
(ntype == NODE_TYPE_XATTR &&
|
||||
!f2fs_has_xattr_block(ofs_of_node(folio))) ||
|
||||
time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) {
|
||||
f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, "
|
||||
"node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
|
||||
ntype, nid, nid_of_node(folio), ino_of_node(folio),
|
||||
ofs_of_node(folio), cpver_of_node(folio),
|
||||
next_blkaddr_of_node(folio));
|
||||
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
||||
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
|
||||
return -EFSCORRUPTED;
|
||||
if (unlikely(nid != nid_of_node(folio)))
|
||||
goto out_err;
|
||||
|
||||
switch (ntype) {
|
||||
case NODE_TYPE_INODE:
|
||||
if (!IS_INODE(folio))
|
||||
goto out_err;
|
||||
break;
|
||||
case NODE_TYPE_XATTR:
|
||||
if (!f2fs_has_xattr_block(ofs_of_node(folio)))
|
||||
goto out_err;
|
||||
break;
|
||||
case NODE_TYPE_NON_INODE:
|
||||
if (IS_INODE(folio))
|
||||
goto out_err;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))
|
||||
goto out_err;
|
||||
return 0;
|
||||
out_err:
|
||||
f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, "
|
||||
"node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
|
||||
ntype, nid, nid_of_node(folio), ino_of_node(folio),
|
||||
ofs_of_node(folio), cpver_of_node(folio),
|
||||
next_blkaddr_of_node(folio));
|
||||
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
||||
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
static struct folio *__get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid,
|
||||
@ -1546,7 +1570,7 @@ repeat:
|
||||
|
||||
if (unlikely(!folio_test_uptodate(folio))) {
|
||||
err = -EIO;
|
||||
goto out_err;
|
||||
goto out_put_err;
|
||||
}
|
||||
|
||||
if (!f2fs_inode_chksum_verify(sbi, folio)) {
|
||||
@ -1567,9 +1591,10 @@ out_put_err:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid)
|
||||
struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid,
|
||||
enum node_type node_type)
|
||||
{
|
||||
return __get_node_folio(sbi, nid, NULL, 0, NODE_TYPE_REGULAR);
|
||||
return __get_node_folio(sbi, nid, NULL, 0, node_type);
|
||||
}
|
||||
|
||||
struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino)
|
||||
@ -2634,6 +2659,16 @@ retry:
|
||||
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
|
||||
i = list_first_entry(&nm_i->free_nid_list,
|
||||
struct free_nid, list);
|
||||
|
||||
if (unlikely(is_invalid_nid(sbi, i->nid))) {
|
||||
spin_unlock(&nm_i->nid_list_lock);
|
||||
f2fs_err(sbi, "Corrupted nid %u in free_nid_list",
|
||||
i->nid);
|
||||
f2fs_stop_checkpoint(sbi, false,
|
||||
STOP_CP_REASON_CORRUPTED_NID);
|
||||
return false;
|
||||
}
|
||||
|
||||
*nid = i->nid;
|
||||
|
||||
__move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);
|
||||
|
||||
@ -57,6 +57,7 @@ enum node_type {
|
||||
NODE_TYPE_REGULAR,
|
||||
NODE_TYPE_INODE,
|
||||
NODE_TYPE_XATTR,
|
||||
NODE_TYPE_NON_INODE,
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@ -548,7 +548,7 @@ got_it:
|
||||
}
|
||||
|
||||
/* Get the node page */
|
||||
node_folio = f2fs_get_node_folio(sbi, nid);
|
||||
node_folio = f2fs_get_node_folio(sbi, nid, NODE_TYPE_REGULAR);
|
||||
if (IS_ERR(node_folio))
|
||||
return PTR_ERR(node_folio);
|
||||
|
||||
|
||||
@ -2774,6 +2774,8 @@ static int get_new_segment(struct f2fs_sb_info *sbi,
|
||||
unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
|
||||
unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
|
||||
unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
|
||||
unsigned int alloc_policy = sbi->allocate_section_policy;
|
||||
unsigned int alloc_hint = sbi->allocate_section_hint;
|
||||
bool init = true;
|
||||
int i;
|
||||
int ret = 0;
|
||||
@ -2807,6 +2809,21 @@ static int get_new_segment(struct f2fs_sb_info *sbi,
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Prevent allocate_section_hint from exceeding MAIN_SECS()
|
||||
* due to desynchronization.
|
||||
*/
|
||||
if (alloc_policy != ALLOCATE_FORWARD_NOHINT &&
|
||||
alloc_hint > MAIN_SECS(sbi))
|
||||
alloc_hint = MAIN_SECS(sbi);
|
||||
|
||||
if (alloc_policy == ALLOCATE_FORWARD_FROM_HINT &&
|
||||
hint < alloc_hint)
|
||||
hint = alloc_hint;
|
||||
else if (alloc_policy == ALLOCATE_FORWARD_WITHIN_HINT &&
|
||||
hint >= alloc_hint)
|
||||
hint = 0;
|
||||
|
||||
find_other_zone:
|
||||
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
|
||||
|
||||
@ -3672,7 +3689,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
|
||||
|
||||
if (file_is_hot(inode) ||
|
||||
is_inode_flag_set(inode, FI_HOT_DATA) ||
|
||||
f2fs_is_cow_file(inode))
|
||||
f2fs_is_cow_file(inode) ||
|
||||
is_inode_flag_set(inode, FI_NEED_IPU))
|
||||
return CURSEG_HOT_DATA;
|
||||
return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
|
||||
inode->i_write_hint);
|
||||
@ -3936,12 +3954,18 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
|
||||
int seg_type = log_type_to_seg_type(type);
|
||||
bool keep_order = (f2fs_lfs_mode(fio->sbi) &&
|
||||
seg_type == CURSEG_COLD_DATA);
|
||||
int err;
|
||||
|
||||
if (keep_order)
|
||||
f2fs_down_read(&fio->sbi->io_order_lock);
|
||||
|
||||
if (f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr,
|
||||
&fio->new_blkaddr, sum, type, fio)) {
|
||||
err = f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr,
|
||||
&fio->new_blkaddr, sum, type, fio);
|
||||
if (unlikely(err)) {
|
||||
f2fs_err_ratelimited(fio->sbi,
|
||||
"%s Failed to allocate data block, ino:%u, index:%lu, type:%d, old_blkaddr:0x%x, new_blkaddr:0x%x, err:%d",
|
||||
__func__, fio->ino, folio->index, type,
|
||||
fio->old_blkaddr, fio->new_blkaddr, err);
|
||||
if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host))
|
||||
fscrypt_finalize_bounce_page(&fio->encrypted_page);
|
||||
folio_end_writeback(folio);
|
||||
|
||||
@ -600,6 +600,16 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
|
||||
return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
|
||||
}
|
||||
|
||||
static inline unsigned int get_left_section_blocks(struct f2fs_sb_info *sbi,
|
||||
enum log_type type, unsigned int segno)
|
||||
{
|
||||
if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
|
||||
return CAP_BLKS_PER_SEC(sbi) - SEGS_TO_BLKS(sbi,
|
||||
(segno - GET_START_SEG_FROM_SEC(sbi, segno))) -
|
||||
CURSEG_I(sbi, type)->next_blkoff;
|
||||
return CAP_BLKS_PER_SEC(sbi) - get_ckpt_valid_blocks(sbi, segno, true);
|
||||
}
|
||||
|
||||
static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
|
||||
unsigned int node_blocks, unsigned int data_blocks,
|
||||
unsigned int dent_blocks)
|
||||
@ -614,14 +624,7 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
|
||||
if (unlikely(segno == NULL_SEGNO))
|
||||
return false;
|
||||
|
||||
if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) {
|
||||
left_blocks = CAP_BLKS_PER_SEC(sbi) -
|
||||
SEGS_TO_BLKS(sbi, (segno - GET_START_SEG_FROM_SEC(sbi, segno))) -
|
||||
CURSEG_I(sbi, i)->next_blkoff;
|
||||
} else {
|
||||
left_blocks = CAP_BLKS_PER_SEC(sbi) -
|
||||
get_ckpt_valid_blocks(sbi, segno, true);
|
||||
}
|
||||
left_blocks = get_left_section_blocks(sbi, i, segno);
|
||||
|
||||
blocks = i <= CURSEG_COLD_DATA ? data_blocks : node_blocks;
|
||||
if (blocks > left_blocks)
|
||||
@ -634,14 +637,7 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
|
||||
if (unlikely(segno == NULL_SEGNO))
|
||||
return false;
|
||||
|
||||
if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) {
|
||||
left_blocks = CAP_BLKS_PER_SEC(sbi) -
|
||||
SEGS_TO_BLKS(sbi, (segno - GET_START_SEG_FROM_SEC(sbi, segno))) -
|
||||
CURSEG_I(sbi, CURSEG_HOT_DATA)->next_blkoff;
|
||||
} else {
|
||||
left_blocks = CAP_BLKS_PER_SEC(sbi) -
|
||||
get_ckpt_valid_blocks(sbi, segno, true);
|
||||
}
|
||||
left_blocks = get_left_section_blocks(sbi, CURSEG_HOT_DATA, segno);
|
||||
|
||||
if (dent_blocks > left_blocks)
|
||||
return false;
|
||||
|
||||
121
fs/f2fs/super.c
121
fs/f2fs/super.c
@ -143,6 +143,7 @@ enum {
|
||||
Opt_extent_cache,
|
||||
Opt_data_flush,
|
||||
Opt_reserve_root,
|
||||
Opt_reserve_node,
|
||||
Opt_resgid,
|
||||
Opt_resuid,
|
||||
Opt_mode,
|
||||
@ -181,6 +182,7 @@ enum {
|
||||
Opt_nat_bits,
|
||||
Opt_jqfmt,
|
||||
Opt_checkpoint,
|
||||
Opt_lookup_mode,
|
||||
Opt_err,
|
||||
};
|
||||
|
||||
@ -244,6 +246,13 @@ static const struct constant_table f2fs_param_errors[] = {
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct constant_table f2fs_param_lookup_mode[] = {
|
||||
{"perf", LOOKUP_PERF},
|
||||
{"compat", LOOKUP_COMPAT},
|
||||
{"auto", LOOKUP_AUTO},
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct fs_parameter_spec f2fs_param_specs[] = {
|
||||
fsparam_enum("background_gc", Opt_gc_background, f2fs_param_background_gc),
|
||||
fsparam_flag("disable_roll_forward", Opt_disable_roll_forward),
|
||||
@ -265,6 +274,7 @@ static const struct fs_parameter_spec f2fs_param_specs[] = {
|
||||
fsparam_flag_no("extent_cache", Opt_extent_cache),
|
||||
fsparam_flag("data_flush", Opt_data_flush),
|
||||
fsparam_u32("reserve_root", Opt_reserve_root),
|
||||
fsparam_u32("reserve_node", Opt_reserve_node),
|
||||
fsparam_gid("resgid", Opt_resgid),
|
||||
fsparam_uid("resuid", Opt_resuid),
|
||||
fsparam_enum("mode", Opt_mode, f2fs_param_mode),
|
||||
@ -300,6 +310,7 @@ static const struct fs_parameter_spec f2fs_param_specs[] = {
|
||||
fsparam_enum("memory", Opt_memory_mode, f2fs_param_memory_mode),
|
||||
fsparam_flag("age_extent_cache", Opt_age_extent_cache),
|
||||
fsparam_enum("errors", Opt_errors, f2fs_param_errors),
|
||||
fsparam_enum("lookup_mode", Opt_lookup_mode, f2fs_param_lookup_mode),
|
||||
{}
|
||||
};
|
||||
|
||||
@ -336,6 +347,8 @@ static match_table_t f2fs_checkpoint_tokens = {
|
||||
#define F2FS_SPEC_discard_unit (1 << 21)
|
||||
#define F2FS_SPEC_memory_mode (1 << 22)
|
||||
#define F2FS_SPEC_errors (1 << 23)
|
||||
#define F2FS_SPEC_lookup_mode (1 << 24)
|
||||
#define F2FS_SPEC_reserve_node (1 << 25)
|
||||
|
||||
struct f2fs_fs_context {
|
||||
struct f2fs_mount_info info;
|
||||
@ -437,22 +450,30 @@ static void f2fs_destroy_casefold_cache(void) { }
|
||||
|
||||
static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
|
||||
{
|
||||
block_t limit = min((sbi->user_block_count >> 3),
|
||||
block_t block_limit = min((sbi->user_block_count >> 3),
|
||||
sbi->user_block_count - sbi->reserved_blocks);
|
||||
block_t node_limit = sbi->total_node_count >> 3;
|
||||
|
||||
/* limit is 12.5% */
|
||||
if (test_opt(sbi, RESERVE_ROOT) &&
|
||||
F2FS_OPTION(sbi).root_reserved_blocks > limit) {
|
||||
F2FS_OPTION(sbi).root_reserved_blocks = limit;
|
||||
F2FS_OPTION(sbi).root_reserved_blocks > block_limit) {
|
||||
F2FS_OPTION(sbi).root_reserved_blocks = block_limit;
|
||||
f2fs_info(sbi, "Reduce reserved blocks for root = %u",
|
||||
F2FS_OPTION(sbi).root_reserved_blocks);
|
||||
}
|
||||
if (!test_opt(sbi, RESERVE_ROOT) &&
|
||||
if (test_opt(sbi, RESERVE_NODE) &&
|
||||
F2FS_OPTION(sbi).root_reserved_nodes > node_limit) {
|
||||
F2FS_OPTION(sbi).root_reserved_nodes = node_limit;
|
||||
f2fs_info(sbi, "Reduce reserved nodes for root = %u",
|
||||
F2FS_OPTION(sbi).root_reserved_nodes);
|
||||
}
|
||||
if (!test_opt(sbi, RESERVE_ROOT) && !test_opt(sbi, RESERVE_NODE) &&
|
||||
(!uid_eq(F2FS_OPTION(sbi).s_resuid,
|
||||
make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
|
||||
!gid_eq(F2FS_OPTION(sbi).s_resgid,
|
||||
make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
|
||||
f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
|
||||
f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root"
|
||||
" and reserve_node",
|
||||
from_kuid_munged(&init_user_ns,
|
||||
F2FS_OPTION(sbi).s_resuid),
|
||||
from_kgid_munged(&init_user_ns,
|
||||
@ -847,6 +868,11 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32;
|
||||
ctx->spec_mask |= F2FS_SPEC_reserve_root;
|
||||
break;
|
||||
case Opt_reserve_node:
|
||||
ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_NODE);
|
||||
F2FS_CTX_INFO(ctx).root_reserved_nodes = result.uint_32;
|
||||
ctx->spec_mask |= F2FS_SPEC_reserve_node;
|
||||
break;
|
||||
case Opt_resuid:
|
||||
F2FS_CTX_INFO(ctx).s_resuid = result.uid;
|
||||
ctx->spec_mask |= F2FS_SPEC_resuid;
|
||||
@ -994,6 +1020,10 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
|
||||
break;
|
||||
case Opt_checkpoint_enable:
|
||||
F2FS_CTX_INFO(ctx).unusable_cap_perc = 0;
|
||||
ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc;
|
||||
F2FS_CTX_INFO(ctx).unusable_cap = 0;
|
||||
ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap;
|
||||
ctx_clear_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
|
||||
break;
|
||||
default:
|
||||
@ -1149,6 +1179,10 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
case Opt_nat_bits:
|
||||
ctx_set_opt(ctx, F2FS_MOUNT_NAT_BITS);
|
||||
break;
|
||||
case Opt_lookup_mode:
|
||||
F2FS_CTX_INFO(ctx).lookup_mode = result.uint_32;
|
||||
ctx->spec_mask |= F2FS_SPEC_lookup_mode;
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -1191,7 +1225,11 @@ static int f2fs_check_quota_consistency(struct fs_context *fc,
|
||||
goto err_jquota_change;
|
||||
|
||||
if (old_qname) {
|
||||
if (strcmp(old_qname, new_qname) == 0) {
|
||||
if (!new_qname) {
|
||||
f2fs_info(sbi, "remove qf_name %s",
|
||||
old_qname);
|
||||
continue;
|
||||
} else if (strcmp(old_qname, new_qname) == 0) {
|
||||
ctx->qname_mask &= ~(1 << i);
|
||||
continue;
|
||||
}
|
||||
@ -1430,6 +1468,14 @@ static int f2fs_check_opt_consistency(struct fs_context *fc,
|
||||
ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT);
|
||||
ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_ROOT;
|
||||
}
|
||||
if (test_opt(sbi, RESERVE_NODE) &&
|
||||
(ctx->opt_mask & F2FS_MOUNT_RESERVE_NODE) &&
|
||||
ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_NODE)) {
|
||||
f2fs_info(sbi, "Preserve previous reserve_node=%u",
|
||||
F2FS_OPTION(sbi).root_reserved_nodes);
|
||||
ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_NODE);
|
||||
ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_NODE;
|
||||
}
|
||||
|
||||
err = f2fs_check_test_dummy_encryption(fc, sb);
|
||||
if (err)
|
||||
@ -1629,6 +1675,9 @@ static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb)
|
||||
if (ctx->spec_mask & F2FS_SPEC_reserve_root)
|
||||
F2FS_OPTION(sbi).root_reserved_blocks =
|
||||
F2FS_CTX_INFO(ctx).root_reserved_blocks;
|
||||
if (ctx->spec_mask & F2FS_SPEC_reserve_node)
|
||||
F2FS_OPTION(sbi).root_reserved_nodes =
|
||||
F2FS_CTX_INFO(ctx).root_reserved_nodes;
|
||||
if (ctx->spec_mask & F2FS_SPEC_resgid)
|
||||
F2FS_OPTION(sbi).s_resgid = F2FS_CTX_INFO(ctx).s_resgid;
|
||||
if (ctx->spec_mask & F2FS_SPEC_resuid)
|
||||
@ -1658,6 +1707,8 @@ static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb)
|
||||
F2FS_OPTION(sbi).memory_mode = F2FS_CTX_INFO(ctx).memory_mode;
|
||||
if (ctx->spec_mask & F2FS_SPEC_errors)
|
||||
F2FS_OPTION(sbi).errors = F2FS_CTX_INFO(ctx).errors;
|
||||
if (ctx->spec_mask & F2FS_SPEC_lookup_mode)
|
||||
F2FS_OPTION(sbi).lookup_mode = F2FS_CTX_INFO(ctx).lookup_mode;
|
||||
|
||||
f2fs_apply_compression(fc, sb);
|
||||
f2fs_apply_test_dummy_encryption(fc, sb);
|
||||
@ -2349,9 +2400,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
|
||||
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
|
||||
seq_puts(seq, "fragment:block");
|
||||
seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
|
||||
if (test_opt(sbi, RESERVE_ROOT))
|
||||
seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
|
||||
if (test_opt(sbi, RESERVE_ROOT) || test_opt(sbi, RESERVE_NODE))
|
||||
seq_printf(seq, ",reserve_root=%u,reserve_node=%u,resuid=%u,"
|
||||
"resgid=%u",
|
||||
F2FS_OPTION(sbi).root_reserved_blocks,
|
||||
F2FS_OPTION(sbi).root_reserved_nodes,
|
||||
from_kuid_munged(&init_user_ns,
|
||||
F2FS_OPTION(sbi).s_resuid),
|
||||
from_kgid_munged(&init_user_ns,
|
||||
@ -2422,6 +2475,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
|
||||
if (test_opt(sbi, NAT_BITS))
|
||||
seq_puts(seq, ",nat_bits");
|
||||
|
||||
if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_PERF)
|
||||
seq_show_option(seq, "lookup_mode", "perf");
|
||||
else if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_COMPAT)
|
||||
seq_show_option(seq, "lookup_mode", "compat");
|
||||
else if (F2FS_OPTION(sbi).lookup_mode == LOOKUP_AUTO)
|
||||
seq_show_option(seq, "lookup_mode", "auto");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2486,6 +2546,8 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount)
|
||||
#endif
|
||||
|
||||
f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL);
|
||||
|
||||
F2FS_OPTION(sbi).lookup_mode = LOOKUP_PERF;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_QUOTA
|
||||
@ -2566,21 +2628,39 @@ out_unlock:
|
||||
restore_flag:
|
||||
sbi->gc_mode = gc_mode;
|
||||
sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
|
||||
f2fs_info(sbi, "f2fs_disable_checkpoint() finish, err:%d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
|
||||
{
|
||||
int retry = DEFAULT_RETRY_IO_COUNT;
|
||||
unsigned int nr_pages = get_pages(sbi, F2FS_DIRTY_DATA) / 16;
|
||||
long long start, writeback, end;
|
||||
|
||||
f2fs_info(sbi, "f2fs_enable_checkpoint() starts, meta: %lld, node: %lld, data: %lld",
|
||||
get_pages(sbi, F2FS_DIRTY_META),
|
||||
get_pages(sbi, F2FS_DIRTY_NODES),
|
||||
get_pages(sbi, F2FS_DIRTY_DATA));
|
||||
|
||||
f2fs_update_time(sbi, ENABLE_TIME);
|
||||
|
||||
start = ktime_get();
|
||||
|
||||
/* we should flush all the data to keep data consistency */
|
||||
do {
|
||||
sync_inodes_sb(sbi->sb);
|
||||
while (get_pages(sbi, F2FS_DIRTY_DATA)) {
|
||||
writeback_inodes_sb_nr(sbi->sb, nr_pages, WB_REASON_SYNC);
|
||||
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
|
||||
} while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
|
||||
|
||||
if (unlikely(retry < 0))
|
||||
f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
|
||||
if (f2fs_time_over(sbi, ENABLE_TIME))
|
||||
break;
|
||||
}
|
||||
writeback = ktime_get();
|
||||
|
||||
sync_inodes_sb(sbi->sb);
|
||||
|
||||
if (unlikely(get_pages(sbi, F2FS_DIRTY_DATA)))
|
||||
f2fs_warn(sbi, "checkpoint=enable has some unwritten data: %lld",
|
||||
get_pages(sbi, F2FS_DIRTY_DATA));
|
||||
|
||||
f2fs_down_write(&sbi->gc_lock);
|
||||
f2fs_dirty_to_prefree(sbi);
|
||||
@ -2593,6 +2673,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
|
||||
|
||||
/* Let's ensure there's no pending checkpoint anymore */
|
||||
f2fs_flush_ckpt_thread(sbi);
|
||||
|
||||
end = ktime_get();
|
||||
|
||||
f2fs_info(sbi, "f2fs_enable_checkpoint() finishes, writeback:%llu, sync:%llu",
|
||||
ktime_ms_delta(writeback, start),
|
||||
ktime_ms_delta(end, writeback));
|
||||
}
|
||||
|
||||
static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
|
||||
@ -4156,6 +4242,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
|
||||
sbi->total_node_count = SEGS_TO_BLKS(sbi,
|
||||
((le32_to_cpu(raw_super->segment_count_nat) / 2) *
|
||||
NAT_ENTRY_PER_BLOCK));
|
||||
sbi->allocate_section_hint = le32_to_cpu(raw_super->section_count);
|
||||
sbi->allocate_section_policy = ALLOCATE_FORWARD_NOHINT;
|
||||
F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino);
|
||||
F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
|
||||
F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
|
||||
@ -4179,6 +4267,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
|
||||
sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
|
||||
sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
|
||||
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
|
||||
sbi->interval_time[ENABLE_TIME] = DEF_ENABLE_INTERVAL;
|
||||
sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] =
|
||||
DEF_UMOUNT_DISCARD_TIMEOUT;
|
||||
clear_sbi_flag(sbi, SBI_NEED_FSCK);
|
||||
@ -4637,9 +4726,11 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
|
||||
|
||||
logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
|
||||
sbi->aligned_blksize = true;
|
||||
sbi->bggc_io_aware = AWARE_ALL_IO;
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
sbi->max_open_zones = UINT_MAX;
|
||||
sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ;
|
||||
sbi->bggc_io_aware = AWARE_READ_IO;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < max_devices; i++) {
|
||||
@ -4667,6 +4758,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
|
||||
SEGS_TO_BLKS(sbi,
|
||||
FDEV(i).total_segments) - 1 +
|
||||
le32_to_cpu(raw_super->segment0_blkaddr);
|
||||
sbi->allocate_section_hint = FDEV(i).total_segments /
|
||||
SEGS_PER_SEC(sbi);
|
||||
} else {
|
||||
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
|
||||
FDEV(i).end_blk = FDEV(i).start_blk +
|
||||
|
||||
119
fs/f2fs/sysfs.c
119
fs/f2fs/sysfs.c
@ -281,6 +281,22 @@ static ssize_t encoding_flags_show(struct f2fs_attr *a,
|
||||
le16_to_cpu(F2FS_RAW_SUPER(sbi)->s_encoding_flags));
|
||||
}
|
||||
|
||||
static ssize_t effective_lookup_mode_show(struct f2fs_attr *a,
|
||||
struct f2fs_sb_info *sbi, char *buf)
|
||||
{
|
||||
switch (F2FS_OPTION(sbi).lookup_mode) {
|
||||
case LOOKUP_PERF:
|
||||
return sysfs_emit(buf, "perf\n");
|
||||
case LOOKUP_COMPAT:
|
||||
return sysfs_emit(buf, "compat\n");
|
||||
case LOOKUP_AUTO:
|
||||
if (sb_no_casefold_compat_fallback(sbi->sb))
|
||||
return sysfs_emit(buf, "auto:perf\n");
|
||||
return sysfs_emit(buf, "auto:compat\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t mounted_time_sec_show(struct f2fs_attr *a,
|
||||
struct f2fs_sb_info *sbi, char *buf)
|
||||
{
|
||||
@ -866,6 +882,27 @@ out:
|
||||
return count;
|
||||
}
|
||||
|
||||
if (!strcmp(a->attr.name, "bggc_io_aware")) {
|
||||
if (t < AWARE_ALL_IO || t > AWARE_NONE)
|
||||
return -EINVAL;
|
||||
sbi->bggc_io_aware = t;
|
||||
return count;
|
||||
}
|
||||
|
||||
if (!strcmp(a->attr.name, "allocate_section_hint")) {
|
||||
if (t < 0 || t > MAIN_SECS(sbi))
|
||||
return -EINVAL;
|
||||
sbi->allocate_section_hint = t;
|
||||
return count;
|
||||
}
|
||||
|
||||
if (!strcmp(a->attr.name, "allocate_section_policy")) {
|
||||
if (t < ALLOCATE_FORWARD_NOHINT || t > ALLOCATE_FORWARD_FROM_HINT)
|
||||
return -EINVAL;
|
||||
sbi->allocate_section_policy = t;
|
||||
return count;
|
||||
}
|
||||
|
||||
*ui = (unsigned int)t;
|
||||
|
||||
return count;
|
||||
@ -1138,6 +1175,8 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
|
||||
F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
|
||||
F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
|
||||
F2FS_SBI_GENERAL_RW_ATTR(dir_level);
|
||||
F2FS_SBI_GENERAL_RW_ATTR(allocate_section_hint);
|
||||
F2FS_SBI_GENERAL_RW_ATTR(allocate_section_policy);
|
||||
#ifdef CONFIG_F2FS_IOSTAT
|
||||
F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
|
||||
F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
|
||||
@ -1175,6 +1214,7 @@ F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy);
|
||||
#endif
|
||||
F2FS_SBI_GENERAL_RW_ATTR(carve_out);
|
||||
F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section);
|
||||
F2FS_SBI_GENERAL_RW_ATTR(bggc_io_aware);
|
||||
|
||||
/* STAT_INFO ATTR */
|
||||
#ifdef CONFIG_F2FS_STAT_FS
|
||||
@ -1211,6 +1251,7 @@ F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
|
||||
F2FS_GENERAL_RO_ATTR(unusable);
|
||||
F2FS_GENERAL_RO_ATTR(encoding);
|
||||
F2FS_GENERAL_RO_ATTR(encoding_flags);
|
||||
F2FS_GENERAL_RO_ATTR(effective_lookup_mode);
|
||||
F2FS_GENERAL_RO_ATTR(mounted_time_sec);
|
||||
F2FS_GENERAL_RO_ATTR(main_blkaddr);
|
||||
F2FS_GENERAL_RO_ATTR(pending_discard);
|
||||
@ -1303,6 +1344,7 @@ static struct attribute *f2fs_attrs[] = {
|
||||
ATTR_LIST(discard_idle_interval),
|
||||
ATTR_LIST(gc_idle_interval),
|
||||
ATTR_LIST(umount_discard_timeout),
|
||||
ATTR_LIST(bggc_io_aware),
|
||||
#ifdef CONFIG_F2FS_IOSTAT
|
||||
ATTR_LIST(iostat_enable),
|
||||
ATTR_LIST(iostat_period_ms),
|
||||
@ -1329,6 +1371,7 @@ static struct attribute *f2fs_attrs[] = {
|
||||
ATTR_LIST(current_reserved_blocks),
|
||||
ATTR_LIST(encoding),
|
||||
ATTR_LIST(encoding_flags),
|
||||
ATTR_LIST(effective_lookup_mode),
|
||||
ATTR_LIST(mounted_time_sec),
|
||||
#ifdef CONFIG_F2FS_STAT_FS
|
||||
ATTR_LIST(cp_foreground_calls),
|
||||
@ -1371,6 +1414,8 @@ static struct attribute *f2fs_attrs[] = {
|
||||
ATTR_LIST(max_read_extent_count),
|
||||
ATTR_LIST(carve_out),
|
||||
ATTR_LIST(reserved_pin_section),
|
||||
ATTR_LIST(allocate_section_hint),
|
||||
ATTR_LIST(allocate_section_policy),
|
||||
NULL,
|
||||
};
|
||||
ATTRIBUTE_GROUPS(f2fs);
|
||||
@ -1723,12 +1768,15 @@ static int __maybe_unused disk_map_seq_show(struct seq_file *seq,
|
||||
seq_printf(seq, " Main : 0x%010x (%10d)\n",
|
||||
SM_I(sbi)->main_blkaddr,
|
||||
le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main));
|
||||
seq_printf(seq, " # of Sections : %12d\n",
|
||||
le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count));
|
||||
seq_printf(seq, " Block size : %12lu KB\n", F2FS_BLKSIZE >> 10);
|
||||
seq_printf(seq, " Segment size : %12d MB\n",
|
||||
(BLKS_PER_SEG(sbi) << (F2FS_BLKSIZE_BITS - 10)) >> 10);
|
||||
seq_printf(seq, " Segs/Sections : %12d\n",
|
||||
SEGS_PER_SEC(sbi));
|
||||
seq_printf(seq, " Section size : %12d MB\n",
|
||||
SEGS_PER_SEC(sbi) << 1);
|
||||
(BLKS_PER_SEC(sbi) << (F2FS_BLKSIZE_BITS - 10)) >> 10);
|
||||
seq_printf(seq, " # of Sections : %12d\n",
|
||||
le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count));
|
||||
|
||||
if (!f2fs_is_multi_device(sbi))
|
||||
return 0;
|
||||
@ -1742,6 +1790,69 @@ static int __maybe_unused disk_map_seq_show(struct seq_file *seq,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __maybe_unused donation_list_seq_show(struct seq_file *seq,
|
||||
void *offset)
|
||||
{
|
||||
struct super_block *sb = seq->private;
|
||||
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
||||
struct inode *inode;
|
||||
struct f2fs_inode_info *fi;
|
||||
struct dentry *dentry;
|
||||
char *buf, *path;
|
||||
int i;
|
||||
|
||||
buf = f2fs_getname(sbi);
|
||||
if (!buf)
|
||||
return 0;
|
||||
|
||||
seq_printf(seq, "Donation List\n");
|
||||
seq_printf(seq, " # of files : %u\n", sbi->donate_files);
|
||||
seq_printf(seq, " %-50s %10s %20s %20s %22s\n",
|
||||
"File path", "Status", "Donation offset (kb)",
|
||||
"Donation size (kb)", "File cached size (kb)");
|
||||
seq_printf(seq, "---\n");
|
||||
|
||||
for (i = 0; i < sbi->donate_files; i++) {
|
||||
spin_lock(&sbi->inode_lock[DONATE_INODE]);
|
||||
if (list_empty(&sbi->inode_list[DONATE_INODE])) {
|
||||
spin_unlock(&sbi->inode_lock[DONATE_INODE]);
|
||||
break;
|
||||
}
|
||||
fi = list_first_entry(&sbi->inode_list[DONATE_INODE],
|
||||
struct f2fs_inode_info, gdonate_list);
|
||||
list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]);
|
||||
inode = igrab(&fi->vfs_inode);
|
||||
spin_unlock(&sbi->inode_lock[DONATE_INODE]);
|
||||
|
||||
if (!inode)
|
||||
continue;
|
||||
|
||||
inode_lock_shared(inode);
|
||||
|
||||
dentry = d_find_alias(inode);
|
||||
if (!dentry) {
|
||||
path = NULL;
|
||||
} else {
|
||||
path = dentry_path_raw(dentry, buf, PATH_MAX);
|
||||
if (IS_ERR(path))
|
||||
goto next;
|
||||
}
|
||||
seq_printf(seq, " %-50s %10s %20llu %20llu %22llu\n",
|
||||
path ? path : "<unlinked>",
|
||||
is_inode_flag_set(inode, FI_DONATE_FINISHED) ?
|
||||
"Evicted" : "Donated",
|
||||
(loff_t)fi->donate_start << (PAGE_SHIFT - 10),
|
||||
(loff_t)(fi->donate_end + 1) << (PAGE_SHIFT - 10),
|
||||
(loff_t)inode->i_mapping->nrpages << (PAGE_SHIFT - 10));
|
||||
next:
|
||||
dput(dentry);
|
||||
inode_unlock_shared(inode);
|
||||
iput(inode);
|
||||
}
|
||||
f2fs_putname(buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
||||
static int __maybe_unused inject_stats_seq_show(struct seq_file *seq,
|
||||
void *offset)
|
||||
@ -1851,6 +1962,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
|
||||
discard_plist_seq_show, sb);
|
||||
proc_create_single_data("disk_map", 0444, sbi->s_proc,
|
||||
disk_map_seq_show, sb);
|
||||
proc_create_single_data("donation_list", 0444, sbi->s_proc,
|
||||
donation_list_seq_show, sb);
|
||||
#ifdef CONFIG_F2FS_FAULT_INJECTION
|
||||
proc_create_single_data("inject_stats", 0444, sbi->s_proc,
|
||||
inject_stats_seq_show, sb);
|
||||
|
||||
@ -79,6 +79,7 @@ enum stop_cp_reason {
|
||||
STOP_CP_REASON_FLUSH_FAIL,
|
||||
STOP_CP_REASON_NO_SEGMENT,
|
||||
STOP_CP_REASON_CORRUPTED_FREE_BITMAP,
|
||||
STOP_CP_REASON_CORRUPTED_NID,
|
||||
STOP_CP_REASON_MAX,
|
||||
};
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user