Skip to content

Commit 4ba34ea

Browse files
biger410sfrothwell
authored andcommitted
ocfs2: fix deadlock due to wrong locking order
For commit ocfs2 journal, ocfs2 journal thread will acquire the mutex osb->journal->j_trans_barrier and wake up jbd2 commit thread, then it will wait until jbd2 commit thread done. In order journal mode, jbd2 needs flushing dirty data pages first, and this needs get page lock. So osb->journal->j_trans_barrier should be got before page lock. But ocfs2_write_zero_page() and ocfs2_write_begin_inline() obey this locking order, and this will cause deadlock and hung the whole cluster. One deadlock catched is the following: PID: 13449 TASK: ffff8802e2f08180 CPU: 31 COMMAND: "oracle" #0 [ffff8802ee3f79b0] __schedule at ffffffff8150a524 #1 [ffff8802ee3f7a58] schedule at ffffffff8150acbf #2 [ffff8802ee3f7a68] rwsem_down_failed_common at ffffffff8150cb85 #3 [ffff8802ee3f7ad8] rwsem_down_read_failed at ffffffff8150cc55 #4 [ffff8802ee3f7ae8] call_rwsem_down_read_failed at ffffffff812617a4 #5 [ffff8802ee3f7b50] ocfs2_start_trans at ffffffffa0498919 [ocfs2] torvalds#6 [ffff8802ee3f7ba0] ocfs2_zero_start_ordered_transaction at ffffffffa048b2b8 [ocfs2] torvalds#7 [ffff8802ee3f7bf0] ocfs2_write_zero_page at ffffffffa048e9bd [ocfs2] torvalds#8 [ffff8802ee3f7c80] ocfs2_zero_extend_range at ffffffffa048ec83 [ocfs2] torvalds#9 [ffff8802ee3f7ce0] ocfs2_zero_extend at ffffffffa048edfd [ocfs2] torvalds#10 [ffff8802ee3f7d50] ocfs2_extend_file at ffffffffa049079e [ocfs2] torvalds#11 [ffff8802ee3f7da0] ocfs2_setattr at ffffffffa04910ed [ocfs2] torvalds#12 [ffff8802ee3f7e70] notify_change at ffffffff81187d29 torvalds#13 [ffff8802ee3f7ee0] do_truncate at ffffffff8116bbc1 torvalds#14 [ffff8802ee3f7f50] sys_ftruncate at ffffffff8116bcbd torvalds#15 [ffff8802ee3f7f80] system_call_fastpath at ffffffff81515142 RIP: 00007f8de750c6f7 RSP: 00007fffe786e478 RFLAGS: 00000206 RAX: 000000000000004d RBX: ffffffff81515142 RCX: 0000000000000000 RDX: 0000000000000200 RSI: 0000000000028400 RDI: 000000000000000d RBP: 00007fffe786e040 R8: 0000000000000000 R9: 000000000000000d R10: 0000000000000000 R11: 0000000000000206 R12: 000000000000000d R13: 00007fffe786e710 R14: 00007f8de70f8340 R15: 0000000000028400 ORIG_RAX: 000000000000004d CS: 0033 SS: 002b crash64> bt PID: 7610 TASK: ffff88100fd56140 CPU: 1 COMMAND: "ocfs2cmt" #0 [ffff88100f4d1c50] __schedule at ffffffff8150a524 #1 [ffff88100f4d1cf8] schedule at ffffffff8150acbf #2 [ffff88100f4d1d08] jbd2_log_wait_commit at ffffffffa01274fd [jbd2] #3 [ffff88100f4d1d98] jbd2_journal_flush at ffffffffa01280b4 [jbd2] #4 [ffff88100f4d1dd8] ocfs2_commit_cache at ffffffffa0499b14 [ocfs2] #5 [ffff88100f4d1e38] ocfs2_commit_thread at ffffffffa0499d38 [ocfs2] torvalds#6 [ffff88100f4d1ee8] kthread at ffffffff81090db6 torvalds#7 [ffff88100f4d1f48] kernel_thread_helper at ffffffff81516284 crash64> bt PID: 7609 TASK: ffff88100f2d4480 CPU: 0 COMMAND: "jbd2/dm-20-86" #0 [ffff88100def3920] __schedule at ffffffff8150a524 #1 [ffff88100def39c8] schedule at ffffffff8150acbf #2 [ffff88100def39d8] io_schedule at ffffffff8150ad6c #3 [ffff88100def39f8] sleep_on_page at ffffffff8111069e #4 [ffff88100def3a08] __wait_on_bit_lock at ffffffff8150b30a #5 [ffff88100def3a58] __lock_page at ffffffff81110687 torvalds#6 [ffff88100def3ab8] write_cache_pages at ffffffff8111b752 torvalds#7 [ffff88100def3be8] generic_writepages at ffffffff8111b901 torvalds#8 [ffff88100def3c48] journal_submit_data_buffers at ffffffffa0120f67 [jbd2] torvalds#9 [ffff88100def3cf8] jbd2_journal_commit_transaction at ffffffffa0121372[jbd2] torvalds#10 [ffff88100def3e68] kjournald2 at ffffffffa0127a86 [jbd2] torvalds#11 [ffff88100def3ee8] kthread at ffffffff81090db6 torvalds#12 [ffff88100def3f48] kernel_thread_helper at ffffffff81516284 Signed-off-by: Junxiao Bi <[email protected]> Cc: Mark Fasheh <[email protected]> Cc: Joel Becker <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent c9ac86d commit 4ba34ea

File tree

2 files changed

+32
-35
lines changed

2 files changed

+32
-35
lines changed

fs/ocfs2/aops.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1485,8 +1485,16 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
14851485
handle_t *handle;
14861486
struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
14871487

1488+
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1489+
if (IS_ERR(handle)) {
1490+
ret = PTR_ERR(handle);
1491+
mlog_errno(ret);
1492+
goto out;
1493+
}
1494+
14881495
page = find_or_create_page(mapping, 0, GFP_NOFS);
14891496
if (!page) {
1497+
ocfs2_commit_trans(osb, handle);
14901498
ret = -ENOMEM;
14911499
mlog_errno(ret);
14921500
goto out;
@@ -1498,13 +1506,6 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
14981506
wc->w_pages[0] = wc->w_target_page = page;
14991507
wc->w_num_pages = 1;
15001508

1501-
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1502-
if (IS_ERR(handle)) {
1503-
ret = PTR_ERR(handle);
1504-
mlog_errno(ret);
1505-
goto out;
1506-
}
1507-
15081509
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
15091510
OCFS2_JOURNAL_ACCESS_WRITE);
15101511
if (ret) {

fs/ocfs2/file.c

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -760,7 +760,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
760760
struct address_space *mapping = inode->i_mapping;
761761
struct page *page;
762762
unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
763-
handle_t *handle = NULL;
763+
handle_t *handle;
764764
int ret = 0;
765765
unsigned zero_from, zero_to, block_start, block_end;
766766
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -769,11 +769,17 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
769769
BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
770770
BUG_ON(abs_from & (inode->i_blkbits - 1));
771771

772+
handle = ocfs2_zero_start_ordered_transaction(inode, di_bh);
773+
if (IS_ERR(handle)) {
774+
ret = PTR_ERR(handle);
775+
goto out;
776+
}
777+
772778
page = find_or_create_page(mapping, index, GFP_NOFS);
773779
if (!page) {
774780
ret = -ENOMEM;
775781
mlog_errno(ret);
776-
goto out;
782+
goto out_commit_trans;
777783
}
778784

779785
/* Get the offsets within the page that we want to zero */
@@ -805,15 +811,6 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
805811
goto out_unlock;
806812
}
807813

808-
if (!handle) {
809-
handle = ocfs2_zero_start_ordered_transaction(inode,
810-
di_bh);
811-
if (IS_ERR(handle)) {
812-
ret = PTR_ERR(handle);
813-
handle = NULL;
814-
break;
815-
}
816-
}
817814

818815
/* must not update i_size! */
819816
ret = block_commit_write(page, block_start + 1,
@@ -824,27 +821,26 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
824821
ret = 0;
825822
}
826823

827-
if (handle) {
828-
/*
829-
* fs-writeback will release the dirty pages without page lock
830-
* whose offset are over inode size, the release happens at
831-
* block_write_full_page().
832-
*/
833-
i_size_write(inode, abs_to);
834-
inode->i_blocks = ocfs2_inode_sector_count(inode);
835-
di->i_size = cpu_to_le64((u64)i_size_read(inode));
836-
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
837-
di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
838-
di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
839-
di->i_mtime_nsec = di->i_ctime_nsec;
840-
ocfs2_journal_dirty(handle, di_bh);
841-
ocfs2_update_inode_fsync_trans(handle, inode, 1);
842-
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
843-
}
824+
/*
825+
* fs-writeback will release the dirty pages without page lock
826+
* whose offset are over inode size, the release happens at
827+
* block_write_full_page().
828+
*/
829+
i_size_write(inode, abs_to);
830+
inode->i_blocks = ocfs2_inode_sector_count(inode);
831+
di->i_size = cpu_to_le64((u64)i_size_read(inode));
832+
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
833+
di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
834+
di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
835+
di->i_mtime_nsec = di->i_ctime_nsec;
836+
ocfs2_journal_dirty(handle, di_bh);
837+
ocfs2_update_inode_fsync_trans(handle, inode, 1);
844838

845839
out_unlock:
846840
unlock_page(page);
847841
page_cache_release(page);
842+
out_commit_trans:
843+
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
848844
out:
849845
return ret;
850846
}

0 commit comments

Comments
 (0)