nfsv3挂载卡在nlmclnt_init()的问题

点击这里查看配套的教学视频

点击跳转到nfs课程所有目录

1 问题描述

内核版本:

uname -a
  # Linux xxxx 4.19.90-24.4.v2101.ky10.x86_64 #1 SMP Mon May 24 12:14:55 CST 2021 x86_64 x86_64 x86_64 GNU/Linux

加了nolock选项可以挂载成功:

mount -t nfs -o vers=3,nolock 192.168.53.225:/tmp/s_test /mnt

不加nolock选项,挂载卡住:

mount -t nfs -o vers=3 192.168.53.225:/tmp/s_test /mnt

查看进程栈:

cat /proc/64997/stack
[<0>] nlmclnt_init+0x1d/0xa0 [lockd]
[<0>] nfs_start_lockd+0xd7/0x110 [nfs]
[<0>] nfs_init_server+0x1a1/0x2d0 [nfs]
[<0>] nfs_create_server+0x57/0x1b0 [nfs]
[<0>] nfs3_create_server+0xb/0x30 [nfsv3]
[<0>] nfs_try_mount+0x14f/0x2c0 [nfs]
[<0>] nfs_fs_mount+0x627/0xdc0 [nfs]
[<0>] mount_fs+0x35/0x160
[<0>] vfs_kern_mount.part.28+0x54/0x120
[<0>] do_mount+0x5c2/0xc60
[<0>] ksys_mount+0x80/0xd0
[<0>] __x64_sys_mount+0x21/0x30
[<0>] do_syscall_64+0x5b/0x1d0
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

2 调试

用以下脚本保存所有线程的栈:

output_file=stacks.txt
# grep_string="mount"
# full_cmd_result=$(ps aux | grep "${grep_string}" | grep -v "grep ${grep_string}")
full_cmd_result=$(ps aux | sed '1d') # 删除标题行
pids=$(echo "${full_cmd_result}" | awk '{print $2}')
> ${output_file} # 清空

if [ -z "$pids" ]; then
    echo "没有找到进程"
    exit 0
fi

echo "找到以下进程:" >> ${output_file}
echo "${full_cmd_result}" >> ${output_file}

echo -e "\n获取进程栈信息:" >> ${output_file}
for pid in $pids; do
    if [ -d "/proc/$pid" ]; then
        # 遍历该进程的所有线程
        for task in /proc/$pid/task/*; do
            tid=$(basename "$task")  # 提取线程ID
            echo -e "\n=============== 进程 $pid 线程 $tid $(echo -n "$(</proc/$pid/task/$tid/comm)") 栈信息 ===============" >> ${output_file}
            sudo cat /proc/$pid/task/$tid/stack >> ${output_file}
            echo "=======================================================" >> ${output_file}
        done
    else
        echo "进程 $pid 已退出" >> ${output_file}
    fi
done

找到以下几种栈:

cat /proc/23462/task/23462/stack
[<0>] nfs_free_server+0x22/0x90 [nfs]
[<0>] nfs_kill_super+0x2b/0x40 [nfs]
[<0>] deactivate_locked_super+0x3f/0x70
[<0>] cleanup_mnt+0x3b/0x80
[<0>] task_work_run+0x8a/0xb0
[<0>] exit_to_usermode_loop+0xeb/0xf0
[<0>] do_syscall_64+0x1a3/0x1d0
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

cat /proc/65384/task/65384/stack
[<0>] nlmclnt_init+0x1d/0xa0 [lockd]
[<0>] nfs_start_lockd+0xd7/0x110 [nfs]
[<0>] nfs_init_server+0x1a1/0x2d0 [nfs]
[<0>] nfs_create_server+0x57/0x1b0 [nfs]
[<0>] nfs3_create_server+0xb/0x30 [nfsv3]
[<0>] nfs_try_mount+0x14f/0x2c0 [nfs] 
[<0>] nfs_fs_mount+0x627/0xdc0 [nfs]
[<0>] mount_fs+0x35/0x160 
[<0>] vfs_kern_mount.part.28+0x54/0x120
[<0>] do_mount+0x5c2/0xc60
[<0>] ksys_mount+0x80/0xd0
[<0>] __x64_sys_mount+0x21/0x30 
[<0>] do_syscall_64+0x5b/0x1d0
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

cat /proc/17310/task/17310/stack
[<0>] lockd_up+0x14b/0x350 [lockd]
[<0>] nfs_start_lockd+0xd7/0x110 [nfs]
[<0>] nfs_init_server+0x1a1/0x2d0 [nfs]
[<0>] nfs_create_server+0x57/0x1b0 [nfs]
[<0>] nfs3_create_server+0xb/0x30 [nfsv3]
[<0>] nfs_try_mount+0x14f/0x2c0 [nfs]
[<0>] nfs_fs_mount+0x627/0xdc0 [nfs]
[<0>] mount_fs+0x35/0x160
[<0>] vfs_kern_mount.part.28+0x54/0x120
[<0>] do_mount+0x5c2/0xc60    
[<0>] ksys_mount+0x80/0xd0
[<0>] __x64_sys_mount+0x21/0x30
[<0>] do_syscall_64+0x5b/0x1d0
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

没搜索到reclaimer()的栈。

解析:

rpm2cpio kernel-debuginfo-4.19.90-24.4.v2101.ky10.x86_64.rpm | cpio -div
./scripts/faddr2line usr/lib/debug/lib/modules/4.19.90-24.4.v2101.ky10.x86_64/kernel/fs/lockd/lockd.ko.debug nlmclnt_init+0x1d/0xa0
  # nlmclnt_init+0x1d/0xa0:
  # nlmclnt_init at /usr/src/debug/kernel-4.19.90/linux-4.19.90-24.4.v2101.ky10.x86_64/fs/lockd/clntlock.c:60
  # 60: if (status < 0),应该是执行到59行: status = lockd_up(nlm_init->net);
./scripts/faddr2line usr/lib/debug/lib/modules/4.19.90-24.4.v2101.ky10.x86_64/kernel/fs/nfs/nfs.ko.debug nfs_free_server+0x22/0x90
  # nfs_free_server+0x22/0x90:
  # nfs_free_server at /usr/src/debug/kernel-4.19.90/linux-4.19.90-24.4.v2101.ky10.x86_64/fs/nfs/client.c:924
  # 924: if (!IS_ERR(server->client_acl))

3 代码分析

// 进程 17310
nfs_init_server
  nfs_start_lockd
    nlmclnt_init
      lockd_up
        mutex_lock(&nlmsvc_mutex) // 持有锁
        lockd_up_net // 发生错误
        lockd_unregister_notifiers
          // 休眠直到 nlm_ntf_refcnt == 0
          // 当前 nlm_ntf_refcnt 值为 1
          wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0)
        lockd_start_svc // nlm_ntf_refcnt增加,未执行到
          atomic_inc(&nlm_ntf_refcnt)

// 进程 65384
nfs_init_server
  nfs_start_lockd
    nlmclnt_init
      lockd_up
        mutex_lock(&nlmsvc_mutex) // 等待进程 17310 释放锁

4 补丁

2018-10-22 84df9525b0c2 Linux 4.19 Greg Kroah-Hartman <gregkh@linuxfoundation.org>

git log origin/master --oneline --date=short --format="%cd %h %s %an <%ae>" --grep=nlm_ntf_refcnt
git log origin/master --oneline --date=short --format="%cd %h %s %an <%ae>" --grep=nlm_ntf_wq
  # 2021-12-13 5a8a7ff57421 lockd: simplify management of network status notifiers NeilBrown <neilb@suse.de>
  # 2018-03-19 554faf281988 lockd: make nlm_ntf_refcnt and nlm_ntf_wq static Colin Ian King <colin.king@canonical.com>
git log origin/master --oneline --date=short --format="%cd %h %s %an <%ae>" --grep=lockd_unregister_notifiers
  # 2017-11-07 dc3033e16c59 lockd: double unregister of inetaddr notifiers Vasily Averin <vvs@virtuozzo.com>
git log origin/master --oneline --date=short --format="%cd %h %s %an <%ae>" --grep=lockd_up
  # 2021-12-13 865b674069e0 lockd: introduce lockd_put() NeilBrown <neilb@suse.de>
  # 2021-12-13 b73a2972041b lockd: move lockd_start_svc() call into lockd_create_svc() NeilBrown <neilb@suse.de>
  # 2021-12-13 5a8a7ff57421 lockd: simplify management of network status notifiers NeilBrown <neilb@suse.de>