23e3d7f7061f jbd2: fix a potential race while discarding reserved buffers after an abort
[ 72.796117] EXT4-fs error (device sda): ext4_journal_check_start:83: comm fallocate: Detected aborted journal
[ 72.826847] EXT4-fs (sda): Remounting filesystem read-only
fallocate: fallocate failed: Read-only file system
[ 74.791830] jbd2_journal_commit_transaction: jh=0xffff9cfefe725d90 bh=0x0000000000000000 end delay
[ 74.793597] ------------[ cut here ]------------
[ 74.794203] kernel BUG at fs/jbd2/transaction.c:2063!
[ 74.794886] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[ 74.795533] CPU: 4 PID: 2260 Comm: jbd2/sda-8 Not tainted 5.17.0-rc8-next-20220315-dirty #150
[ 74.798327] RIP: 0010:__jbd2_journal_unfile_buffer+0x3e/0x60
[ 74.801971] RSP: 0018:ffffa828c24a3cb8 EFLAGS: 00010202
[ 74.802694] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[ 74.803601] RDX: 0000000000000001 RSI: ffff9cfefe725d90 RDI: ffff9cfefe725d90
[ 74.804554] RBP: ffff9cfefe725d90 R08: 0000000000000000 R09: ffffa828c24a3b20
[ 74.805471] R10: 0000000000000001 R11: 0000000000000001 R12: ffff9cfefe725d90
[ 74.806385] R13: ffff9cfefe725d98 R14: 0000000000000000 R15: ffff9cfe833a4d00
[ 74.807301] FS: 0000000000000000(0000) GS:ffff9d01afb00000(0000) knlGS:0000000000000000
[ 74.808338] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 74.809084] CR2: 00007f2b81bf4000 CR3: 0000000100056000 CR4: 00000000000006e0
[ 74.810047] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 74.810981] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 74.811897] Call Trace:
[ 74.812241] <TASK>
[ 74.812566] __jbd2_journal_refile_buffer+0x12f/0x180
jbd2_journal_refile_buffer+0x4c/0xa0
[ 74.813246] [ 74.813869] jbd2_journal_commit_transaction.cold+0xa1/0x148
kjournald2+0xf8/0x3e0
[ 74.817550] [ 74.819056] kthread+0x153/0x1c0
ret_from_fork+0x22/0x30 [ 74.819963]
3465bf5459a86084db84408653ea83789f606eee Mon Sep 17 00:00:00 2001
From : ChenXiaoSong <chenxiaosongemail@foxmail.com>
From: Wed, 20 Apr 2022 23:32:57 +0800
Date: [PATCH] reproduce jbd2 deref null ptr
Subject
-off-by: ChenXiaoSong <chenxiaosongemail@foxmail.com>
Signed---
/ext4/inode.c | 7 +++++++
fs/jbd2/commit.c | 4 ++++
fs/jbd2/journal.c | 6 ++++++
fs3 files changed, 17 insertions(+)
--git a/fs/ext4/inode.c b/fs/ext4/inode.c
diff 1ce13f69fbec..bb4b2fe8968d 100644
index --- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
-49,6 +49,7 @@
@@ #include "truncate.h"
#include <trace/events/ext4.h>
+#include <linux/delay.h>
static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
struct ext4_inode_info *ei)
-1261,6 +1262,9 @@ static int write_end_fn(handle_t *handle, struct inode *inode,
@@ if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
(bh);
set_buffer_uptodate+ printk("%s:%d,jh:%px,bh:%px,begin sleep...\n", __func__, __LINE__, bh2jh(bh), bh);
+ mdelay(2000);
+ printk("%s:%d,jh:%px,bh:%px,end sleep...\n", __func__, __LINE__, bh2jh(bh), bh);
= ext4_handle_dirty_metadata(handle, NULL, bh);
ret (bh);
clear_buffer_meta(bh);
clear_buffer_prio-4034,6 +4038,9 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
@@ = ext4_update_disksize_before_punch(inode, offset, length);
ret if (ret)
goto out_dio;
+ printk("%s:%d,begin sleep...\n", __func__, __LINE__);
+ mdelay(3000);
+ printk("%s:%d,end sleep...\n", __func__, __LINE__);
(inode, first_block_offset,
truncate_pagecache_range);
last_block_offset}
--git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
diff 5b9408e3b370..f7a4db3835ee 100644
index --- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
-25,6 +25,7 @@
@@ #include <linux/blkdev.h>
#include <linux/bitops.h>
#include <trace/events/jbd2.h>
+#include <linux/delay.h>
/*
* IO end handler for temporary buffer_heads handling writes to the journal.
@@ -511,6 +512,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
while (commit_transaction->t_reserved_list) {
= commit_transaction->t_reserved_list;
jh + printk("%s:%d,jh:%px,bh:%px,begin sleep...\n", __func__, __LINE__, jh, jh2bh(jh));
+ mdelay(5000);
+ printk("%s:%d,jh:%px,bh:%px,end sleep...\n", __func__, __LINE__, jh, jh2bh(jh));
(jh, "reserved, unused: refile");
JBUFFER_TRACE/*
* A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index fcacafa4510d..9bad5d4b2a84 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -47,6 +47,7 @@
#include <linux/uaccess.h>
#include <asm/page.h>
+#include <linux/delay.h>
#ifdef CONFIG_JBD2_DEBUG
ushort jbd2_journal_enable_debug __read_mostly;
@@ -2993,6 +2994,11 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
/* Unlink before dropping the lock */
->b_private = NULL;
bh->b_bh = NULL; /* debug, really */
jh+ if (strcmp(current->comm, "fallocate") == 0 || strcmp(current->comm, "ln") == 0) {
+ printk("%s:%d,jh:%px,bh:%px,begin sleep...\n", __func__, __LINE__, jh, bh);
+ mdelay(10000);
+ printk("%s:%d,jh:%px,bh:%px,end sleep...\n", __func__, __LINE__, jh, bh);
+ }
(bh);
clear_buffer_jbd}
--
2.25.1
fallocate -l 1M file
ls -lh
# total 1.0M
# -rw-r--r--. 1 root root 1.0M Apr 21 01:14 file
du -c file -h
# 1.0M file
# 1.0M total
fallocate -o 0 -l 412K -n -p /mnt/file
ls -lh
# total 612K
# -rw-r--r--. 1 root root 1.0M Apr 21 01:15 file
du -c file -h
# 612K file
# 612K total
fallocate -l 312K file
ls -lh
# total 924K
# -rw-r--r--. 1 root root 1.0M Apr 21 01:15 file
du -c file -h
# 924K file
# 924K total
fallocate -o 312K -l 50K file # 4K对齐,实际分配52K
ls -lh
# total 976K
# -rw-r--r--. 1 root root 1.0M Apr 21 01:20 file
du -c file -h
# 976K file
# 976K total
c程序:
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <linux/fs.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#define LEN 4096
int main()
{
int fd, res;
char buf[LEN] = {0};
= open("/mnt/file", O_RDWR);
fd if (fd < 0) {
("open fail, errno:%d\n", errno);
printfreturn 1;
}
= write(fd, buf, LEN);
res if (res != LEN) {
("write fail, errno:%d\n", errno);
printfreturn 1;
}
("success\n");
printfreturn 0;
}
umount /mnt
mkfs.ext4 -F -b 4096 /dev/sda
mount -o data=journal /dev/sda /mnt
rm /mnt/file -rf
dd if=/dev/zero of=/mnt/file oflag=sync bs=4096 count=2
sync
./a.out & # c程序
sleep 0.5
fallocate -o 0 -l 4096 -n -p /mnt/file & # 可以只指定 -p
sleep 1
mount -o remount,abort /mnt
c程序:
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <linux/fs.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#define LEN 4096
#define EXT4_JOURNAL_DATA_FL 0x00004000
int main()
{
int fd, res;
int flags = EXT4_JOURNAL_DATA_FL;
char buf[LEN] = {0};
= open("/mnt/file", O_RDWR);
fd if (fd < 0) {
("open fail, errno:%d\n", errno);
printfreturn 1;
}
= ioctl(fd, FS_IOC_SETFLAGS, &flags);
res if (res < 0) {
("ioctl fail, errno:%d\n", errno);
printfreturn 1;
}
= fsync(fd);
res if (res < 0) {
("fsync fail, errno:%d\n", errno);
printfreturn 1;
}
= write(fd, buf, LEN);
res if (res != LEN) {
("write fail, errno:%d\n", errno);
printfreturn 1;
}
("success\n");
printfreturn 0;
}
umount /mnt
mkfs.ext4 -F -b 4096 /dev/sda
mount -o nodelalloc /dev/sda /mnt # 必须要指定 nodelalloc
rm /mnt/file -rf
dd if=/dev/zero of=/mnt/file bs=4096 count=2
sync
./a.out &
sleep 0.5
fallocate -o 0 -l 4096 -n -p /mnt/file & # 可以只指定 -p
sleep 1
mount -o remount,abort /mnt
umount /mnt
mkfs.ext4 -F -b 4096 /dev/sda
mount /dev/sda /mnt
mkdir /mnt/123456789012345678901234567890/1234567890123456789012345678901234567890 -p
touch /mnt/123456789012345678901234567890/1234567890123456789012345678901234567890/file
ln -s /mnt/123456789012345678901234567890/1234567890123456789012345678901234567890/file /mnt/link &
sleep 0.5
mount -o remount,abort /mnt
修复补丁: 23e3d7f7061f jbd2: fix a potential race while discarding reserved buffers after an abort
// mkfs.ext4 -F -b 4096 /dev/sda
// mount -o nodelalloc /dev/sda /mnt
mount
do_mount
path_mount
do_new_mount
parse_monolithic_mount_data
generic_parse_monolithic
vfs_parse_fs_string
vfs_parse_fs_param
ext4_parse_param
vfs_get_tree
ext4_get_tree
get_tree_bdev
ext4_fill_super
__ext4_fill_super// EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_DELALLOC
(sb, DELALLOC)
set_opt
ext4_apply_options// -o nodelalloc 挂载,在这里清除 EXT4_MOUNT_DELALLOC
->s_mount_opt &= ~ctx->mask_s_mount_opt
sbi
ext4_load_journal
ext4_get_journal
jbd2_journal_init_inode
journal_init_common->j_flags |= JBD2_ABORT
journal
// int flags = EXT4_JOURNAL_DATA_FL
// ioctl(fd, EXT4_IOC_SETFLAGS, &flags)
ioctl
do_vfs_ioctl
ioctl_setflags
vfs_fileattr_set
ext4_fileattr_set
ext4_ioctl_setflags
ext4_change_inode_journal_flag(inode, EXT4_INODE_JOURNAL_DATA)
ext4_set_inode_flag
ext4_set_aops
ext4_inode_journal_modeif (!test_opt(inode->i_sb, DELALLOC) // delay alloc 不支持 journal 模式
// mount -o remount,abort /mnt
mount
do_mount
path_mount
do_remount
reconfigure_super
ext4_reconfigure
__ext4_remount
__ext4_error
ext4_handle_error
jbd2_journal_abort->j_flags |= JBD2_ABORT
journal
write
ksys_write
vfs_write
new_sync_write
call_write_iter
ext4_file_write_iter
ext4_buffered_write_iter
generic_perform_write
ext4_write_begin
ext4_should_journal_data
ext4_inode_journal_mode(inode, EXT4_INODE_JOURNAL_DATA)
ext4_test_inode_flag
ext4_walk_page_buffers
do_journal_get_write_access
ext4_journal_get_write_access
__ext4_journal_get_write_access
jbd2_journal_get_write_access
do_get_write_access(jh, transaction, BJ_Reserved)
__jbd2_journal_file_buffer
ext4_journalled_write_end
ext4_walk_page_buffers
write_end_fn
ext4_handle_dirty_metadata
__ext4_handle_dirty_metadata
jbd2_journal_dirty_metadata
is_handle_aborted
is_journal_abortedreturn journal->j_flags & JBD2_ABORT
return -EROFS
fallocate
ksys_fallocate
vfs_fallocate
ext4_fallocate
ext4_punch_hole
truncate_pagecache_range
truncate_inode_pages_range
truncate_cleanup_folio
folio_invalidate
ext4_journalled_invalidate_folio
__ext4_journalled_invalidate_folio
jbd2_journal_invalidate_folio
journal_unmap_buffer(&journal->j_state_lock)
write_lock:
zap_buffer(&journal->j_state_lock)
write_unlock
jbd2_journal_put_journal_head
__journal_remove_journal_head->b_bh = NULL
jh
clear_buffer_jbd
symlinkat
do_symlinkat
vfs_symlink
ext4_symlink
__page_symlink
pagecache_write_end
ext4_journalled_write_end
ext4_walk_page_buffers
write_end_fn
ext4_handle_dirty_metadata
__ext4_handle_dirty_metadata
jbd2_journal_dirty_metadata
is_handle_aborted
is_journal_abortedreturn journal->j_flags & JBD2_ABORT
return -EROFS
goto err_drop_inode
:
err_drop_inode
iput
iput_final
evict
ext4_evict_inode
truncate_inode_pages_final
truncate_inode_pages
truncate_inode_pages_range
truncate_cleanup_folio
folio_invalidate
ext4_journalled_invalidate_folio
__ext4_journalled_invalidate_folio
jbd2_jouranl_invalidate_folio
journal_unmap_bufferif (transaction == NULL) { // 条件不满足
} else if (transaction == journal->j_committing_transaction) { // 条件也不满足
} else {
= __dispose_buffer
may_free
__jbd2_journal_unfile_buffer
__jbd2_journal_temp_unlink_buffercase BJ_Reserved:
= &transaction->t_reserved_list
list // 从 reserved 链表中删除
__blist_del_buffer
jbd2_journal_put_journal_headif (!jh->b_jcount) // jh->b_jcount == 1
jbd_unlock_bh_journal_head:
zap_buffer(&journal->j_state_lock)
write_unlock
jbd2_journal_put_journal_head
__journal_remove_journal_head->b_bh = NULL
jh
clear_buffer_jbd
kthread// kthread_run(kjournald2, ...)
kjournald2
jbd2_journal_commit_transactionwhile (commit_transaction->t_reserved_list)
= commit_transaction->t_reserved_list jh