Bug 151323
| Summary: | Kernel BUG at pageattr:107 | ||||||
|---|---|---|---|---|---|---|---|
| Product: | Red Hat Enterprise Linux 4 | Reporter: | Tom Coughlan <coughlan> | ||||
| Component: | kernel | Assignee: | Jason Baron <jbaron> | ||||
| Status: | CLOSED ERRATA | QA Contact: | Brian Brock <bbrock> | ||||
| Severity: | medium | Docs Contact: | |||||
| Priority: | medium | ||||||
| Version: | 4.0 | CC: | davej, knoel, laurie.barry, linville, riel | ||||
| Target Milestone: | --- | ||||||
| Target Release: | --- | ||||||
| Hardware: | ia32e | ||||||
| OS: | Linux | ||||||
| Whiteboard: | |||||||
| Fixed In Version: | RHSA-2005-514 | Doc Type: | Bug Fix | ||||
| Doc Text: | Story Points: | --- | |||||
| Clone Of: | Environment: | ||||||
| Last Closed: | 2005-10-05 12:50:38 UTC | Type: | --- | ||||
| Regression: | --- | Mount Type: | --- | ||||
| Documentation: | --- | CRM: | |||||
| Verified Versions: | Category: | --- | |||||
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | |||||
| Cloudforms Team: | --- | Target Upstream Version: | |||||
| Embargoed: | |||||||
| Bug Depends On: | |||||||
| Bug Blocks: | 156322 | ||||||
| Attachments: |
|
||||||
Jason looked at this and has a patch. Send to Emulex for testing. No reply yet. Tom, could you post the patch here. thanks. Created attachment 112104 [details]
Jason's patch. Sent to Emulex for testing.
Here is some more background from Jason:
i think the patch i sent you is worth a try, if it doesn't work we can dig
deeper. The changset that i took it from is below. We'll need the x86 bits
too, i guess.
-Jason
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2005/01/11 17:49:19-08:00 ak
# [PATCH] x86_64: change_page_attr logic fixes from Andrea
#
# change_page_attr logic fixes from Andrea
#
# This avoids reference counting leaks and adds BUGs for more wrong cases.
#
# Signed-off-by: Andi Kleen <ak>
# Signed-off-by: Andrew Morton <akpm>
# Signed-off-by: Linus Torvalds <torvalds>
#
# arch/x86_64/mm/pageattr.c
# 2005/01/11 16:42:45-08:00 ak +17 -9
# x86_64: change_page_attr logic fixes from Andrea
#
# arch/i386/mm/pageattr.c
# 2005/01/11 16:42:45-08:00 ak +18 -10
# x86_64: change_page_attr logic fixes from Andrea
#
diff -Nru a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
--- a/arch/i386/mm/pageattr.c 2005-03-01 14:54:53 -05:00
+++ b/arch/i386/mm/pageattr.c 2005-03-01 14:54:53 -05:00
@@ -120,27 +120,35 @@
kpte_page = virt_to_page(kpte);
if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
- pte_t old = *kpte;
- pte_t standard = mk_pte(page, PAGE_KERNEL);
set_pte_atomic(kpte, mk_pte(page, prot));
- if (pte_same(old,standard))
- get_page(kpte_page);
} else {
struct page *split = split_large_page(address, prot);
if (!split)
return -ENOMEM;
- get_page(kpte_page);
set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL));
+ kpte_page = split;
}
+ get_page(kpte_page);
} else if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
__put_page(kpte_page);
- }
+ } else
+ BUG();
+
+ /*
+ * If the pte was reserved, it means it was created at boot
+ * time (not via split_large_page) and in turn we must not
+ * replace it with a largepage.
+ */
+ if (!PageReserved(kpte_page)) {
+ /* memleak and potential failed 2M page regeneration */
+ BUG_ON(!page_count(kpte_page));
- if (cpu_has_pse && (page_count(kpte_page) == 1)) {
- list_add(&kpte_page->lru, &df_list);
- revert_page(kpte_page, address);
- }
+ if (cpu_has_pse && (page_count(kpte_page) == 1)) {
+ list_add(&kpte_page->lru, &df_list);
+ revert_page(kpte_page, address);
+ }
+ }
return 0;
}
diff -Nru a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
--- a/arch/x86_64/mm/pageattr.c 2005-03-01 14:54:53 -05:00
+++ b/arch/x86_64/mm/pageattr.c 2005-03-01 14:54:53 -05:00
@@ -131,28 +131,36 @@
kpte_flags = pte_val(*kpte);
if (pgprot_val(prot) != pgprot_val(ref_prot)) {
if ((kpte_flags & _PAGE_PSE) == 0) {
- pte_t old = *kpte;
- pte_t standard = pfn_pte(pfn, ref_prot);
-
set_pte(kpte, pfn_pte(pfn, prot));
- if (pte_same(old,standard))
- get_page(kpte_page);
} else {
+ /*
+ * split_large_page will take the reference for this change_page_attr
+ * on the split page.
+ */
struct page *split = split_large_page(address, prot, ref_prot);
if (!split)
return -ENOMEM;
- get_page(split);
set_pte(kpte,mk_pte(split, ref_prot));
+ kpte_page = split;
}
+ get_page(kpte_page);
} else if ((kpte_flags & _PAGE_PSE) == 0) {
set_pte(kpte, pfn_pte(pfn, ref_prot));
__put_page(kpte_page);
- }
+ } else
+ BUG();
+
+ /* on x86-64 the direct mapping set at boot is not using 4k pages */
+ BUG_ON(PageReserved(kpte_page));
- if (page_count(kpte_page) == 1) {
+ switch (page_count(kpte_page)) {
+ case 1:
save_page(address, kpte_page);
revert_page(address, ref_prot);
- }
+ break;
+ case 0:
+ BUG(); /* memleak and failed 2M page regeneration */
+ }
return 0;
}
The testing of Jason's patch at Emulex did not go so well:
Patch causes panic on boot:
Kernel BUG at pageattr:140
invalid operand: 0000 [1] SMP
CPU 0
Modules linked in:
Pid: 0, comm: swapper Not tainted 2.6.9-5.ELXcustom
RIP: 0010:[<ffffffff80122d47>] <ffffffff80122d47>{__change_page_attr+797}
RSP: 0018:ffffffff804c3e60 EFLAGS: 00010282
RAX: 00000000000001e3 RBX: 00000100010001f8 RCX: 0000000000000000
RDX: 0000010000009000 RSI: 0000010001000000 RDI: 000001000000e000
RBP: 0000010000000000 R08: 000ffffffffff000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000010000009000
R13: 8000000000000163 R14: 8000000000000163 R15: 0000000000000000
FS: 0000000000000000(0000) GS:ffffffff804bf300(0000) knlGS:0000000000000000
CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000000 CR3: 0000000000101000 CR4: 00000000000006e0
Process swapper (pid: 0, threadinfo ffffffff804c2000, task ffffffff803ba980)
Stack: 0000010001000000 0000000000000000 0000000000000000 0000000000000002
8000000000000163 ffffffff80122f21 ffffff000000040e 0000010009f85c40
0000000000000000 0000000000000000
Call Trace:<ffffffff80122f21>{change_page_attr+144}
<ffffffff8012280a>{iounmap+309}
<ffffffff8020b46a>{acpi_tb_find_rsdp+92}
<ffffffff8020b59e>{acpi_find_root_pointer+20}
<ffffffff8020d5f8>{acpi_ut_update_object_reference+596}
<ffffffff80203235>{acpi_ns_root_initialize+624}
<ffffffff801f5ce2>{acpi_os_get_root_pointer+6}
<ffffffff8020a50f>{acpi_load_tables+19}
<ffffffff804db7df>{acpi_early_init+87}
<ffffffff804c5671>{start_kernel+460}
<ffffffff804c51d5>{_sinittext+469}
I'll second the notion that the patch is busted...my Glenwood box wouldn't boot with my jwltest.5 kernels... Bug 151315 is another "Kernel BUG at pageattr:107" problem. I have incorporated a patch which fixes that issue in the test kernels here: http://people.redhat.com/linville/kernels/rhel4/ It may be worthwhile to give that a try? Please do so and post the results. Thanks! latest testing kernel is: http://people.redhat.com/~jbaron/rhel4/RPMS.kernel/ thanks. Emulex has confirmed that the latest RH patch resolves the RHEL4 EM64T iounmap issue. Thank you Laurie Barry Linux Driver Group Emulex Thanks for verifying the fix :) An advisory has been issued which should help the problem described in this bug report. This report is therefore being closed with a resolution of ERRATA. For more information on the solution and/or where to find the updated files, please follow the link below. You may reopen this bug report if the solution does not work for you. http://rhn.redhat.com/errata/RHSA-2005-514.html |
From Bugzilla Helper: User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030922 Description of problem: Reported by Emulex on emulex-list, Mon, 28 Feb 2005 14:41:59 -0500. We are seeing an issue with RHEL4 on a Dell Poweredge 2800 EM64T system. This appears to be an issue with 64bit PCI and unmapping io registers (bars) on driver unload. We are seeing the kernel panic when we unload our driver. The stack trace is as follows: Kernel BUG at pageattr:107 invalid operand: 0000 [1] SMP CPU 0 Modules linked in: md5 ipv6 parport_pc lp parport autofs4 i2c_dev i2c_core sunrpc ds yenta_socket pcmcia_core dm_mod button battery ac uhci_hcd ehci_hcd hw_rad Pid: 4157, comm: modprobe Not tainted2.6.9-5.ELsmp RIP: 0010:[<ffffffff80122e39>] <ffffffff80122e39>{__change_page_attr+1039} RSP: 0018:00000101266d7d78 EFLAGS: 00010282 RAX: 00000000000007e8 RBX: 00000100010002a0 RCX: 000000000000c7e8 RDX: 000001000000c7e8 RSI: 0000010000000000 RDI: ffffffff80312975 RBP: 00000100dfafe000 R08: 000ffffffffff000 R09: 0000000000000003 R10: 0000000000000000 R11: 0000000000000000 R12: 000001000000c7e8 R13: 8000000000000163 R14: 8000000000000163 R15: 0000010037d6e180 FS: 0000002a95565b00(0000) GS:ffffffff804bf300(0000) knlGS:0000000000000000CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000007fbfffefd0 CR3: 0000000000101000 CR4: 00000000000006e0 Process modprobe (pid: 4157, threadinfo 00000101266d6000,task 0000010125e807f0)Stack: 00000100040ee790 0000000000000000 00000000000000000000000000000002 8000000000000163 ffffffff80122f13 ffffff000001ac00000001012fc67180 00000000ffffffff 000001012cb09a48 Call Trace: <ffffffff80122f13>{change_page_attr+144} <ffffffff8012280a>{iounmap+309} <ffffffffa009911a>{:lpfc:lpfc_pci_remove_one+628} <ffffffff801e53d9>{pci_device_remove+34} <ffffffff802383d3>{device_release_driver+83} <ffffffff80238781>{bus_remove_driver+153} <ffffffff80238aa4>{driver_unregister+17} <ffffffff801e5207>{pci_unregister_driver+15} <ffffffffa00a1d85>{:lpfc:lpfc_exit+13} <ffffffff8014ac11>{sys_delete_module+479} <ffffffff801dccf8>{__up_write+19} <ffffffff80165e1b>{sys_munmap+94} <ffffffff8010ffd2>{system_call+126} Code: 0f 0b 31 00 31 80 ff ff ff ff 6b 00 4c 89 f1 48 b8 ff ff ff RIP <ffffffff80122e39>{__change_page_attr+1039} RSP <00000101266d7d78> The platform is a Dell Poweredge 2800. We have other EM64T systems that do not exhibit the problem. In isolating the issue, we noted that we are the only 64-bit card inthe system. We replaced our card with a Qlogic card (w/ 64 bit bar's)and saw the same issue when unloading their driver. Version-Release number of selected component (if applicable): How reproducible: Always Steps to Reproduce: 1.boot 2.load lpfc 3.rmmod lpfc Actual Results: crash Expected Results: no crash Additional info: