Bug 432831

Summary: RHEL3u7 BUG mm_inline.h:122
Product: Red Hat Enterprise Linux 3 Reporter: Issue Tracker <tao>
Component: kernelAssignee: Don Howard <dhoward>
Status: CLOSED NOTABUG QA Contact: Martin Jenner <mjenner>
Severity: high Docs Contact:
Priority: high    
Version: 3.7CC: tao
Target Milestone: ---   
Target Release: ---   
Hardware: All   
OS: Linux   
Whiteboard:
Fixed In Version: Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: 2008-11-12 23:26:07 UTC Type: ---
Regression: --- Mount Type: ---
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: --- Target Upstream Version:
Embargoed:

Description Issue Tracker 2008-02-14 17:28:16 UTC
Escalated to Bugzilla from IssueTracker

Comment 3 Takuma Umeya 2008-02-25 14:30:39 UTC
ENV:  

RHEL3u7 (kernel 2.4.21-40.ELsmp)



PROBLEM:

System crashed (frequency == once):


kernel BUG at
/usr/src/build/689586-i686/BUILD/kernel-2.4.21/linux-2.4.21/include/linux/mm_inline.h:122!
invalid operand: 0000
st audit autofs4 nfs lockd sunrpc bcm5700 bonding2 bonding1 e1000 bonding floppy
sg sr_mod block_dump scsi_dump diskdump diskdumplib scs
i_dump_register crc32
CPU:    2
EIP:    0060:[<c015484e>]    Tainted: P
EFLAGS: 00010206

EIP is at add_page_to_active_cache_list [kernel] 0xbe (2.4.21-40.ELsmp/i686)
eax: 014c0001   ebx: f7afa144   ecx: 00000000   edx: 00000000
esi: c1cc242c   edi: c03abf00   ebp: 00000003   esp: c8027e60
ds: 0068   es: 0068   ss: 0068
Process dump (pid: 11604, stackpage=c8027000)
Stack: f7afa144 c1cc242c 00000000 00008719 c015438a c1cc242c 00000003 f7afa144
       c1cc242c 00000000 c0148cca f7afa144 00008719 c67a8560 f7b30780 c0148f0c
       c1cc242c f7afa144 00008719 c67a8560 c1cc242c 0000001c 000086fd f7b30780






ANALYSIS:

The system paniced at the BUG_ON statement indicated:


117 static inline void add_page_to_active_cache_list(struct page * page, int age)
118 {
119         struct zone_struct * zone = page_zone(page);
120         DEBUG_LRU_PAGE(page);
121         SetPageActiveCache(page);
122         BUG_ON(PageCompound(page));                <-------------  BUG here
123         list_add(&page->lru, &zone->active_cache_list[age]);
124         page->age = age + zone->cache_age_bias;
125         zone->active_cache_count[age]++;
126         zone->active_cache_pages++;
127 }


The failed task:

crash> bt
PID: 11604  TASK: c8026000  CPU: 2   COMMAND: "dump"
 #0 [c8027cf4] disk_dump at f8bc2f52
 #1 [c8027d5c] try_crashdump at c0128f7a
 #2 [c8027d6c] die at c010c6f2
 #3 [c8027d80] do_invalid_op at c010c902
 #4 [c8027e20] error_code (via invalid_op) at c02af1c0
    EAX: 014c0001  EBX: f7afa144  ECX: 00000000  EDX: 00000000  EBP: 00000003
    DS:  0068      ESI: c1cc242c  ES:  0068      EDI: c03abf00
    CS:  0060      EIP: c015484e  ERR: ffffffff  EFLAGS: 00010206
 #5 [c8027e5c] add_page_to_active_cache_list at c015484e
 #6 [c8027e88] add_to_page_cache_unique at c0148cc5
 #7 [c8027e9c] page_cache_read at c0148f07
 #8 [c8027ec4] generic_file_readahead at c0149942
 #9 [c8027ee0] do_generic_file_read at c0149f1f
#10 [c8027f20] generic_file_new_read at c014a7d6
#11 [c8027f74] generic_file_read at c014a91a
#12 [c8027f94] sys_read at c0165125
#13 [c8027fc0] system_call at c02af068
    EAX: 00000003  EBX: 00000003  ECX: 09d3a000  EDX: 00000800
    DS:  002b      ESI: 00000000  ES:  002b      EDI: 00000800
    SS:  002b      ESP: bfff63c8  EBP: bfff6448
    CS:  0023      EIP: 08085b0d  ERR: 00000003  EFLAGS: 00000246
crash>    


See where and how we went wrong:

crash> gdb set dis intel
crash> dis -r c015484e
0xc0154790 <add_page_to_active_cache_list>:     push   ebp
0xc0154791 <add_page_to_active_cache_list+1>:   push   edi
0xc0154792 <add_page_to_active_cache_list+2>:   push   esi
0xc0154793 <add_page_to_active_cache_list+3>:   push   ebx
0xc0154794 <add_page_to_active_cache_list+4>:   mov    esi,DWORD PTR [esp+20]
0xc0154798 <add_page_to_active_cache_list+8>:   mov    ebp,DWORD PTR [esp+24]
0xc015479c <add_page_to_active_cache_list+12>:  movzx  eax,BYTE PTR [esi+27]
0xc01547a0 <add_page_to_active_cache_list+16>:  mov    edi,DWORD PTR
[eax*4-1068668020]
0xc01547a7 <add_page_to_active_cache_list+23>:  mov    eax,DWORD PTR [esi+24]
0xc01547aa <add_page_to_active_cache_list+26>:  and    eax,0x20
0xc01547ad <add_page_to_active_cache_list+29>:  je     0xc01547b7
<add_page_to_active_cache_list+39>
0xc01547af <add_page_to_active_cache_list+31>:  ud2a
0xc01547b7 <add_page_to_active_cache_list+39>:  mov    eax,DWORD PTR [esi+24]
0xc01547ba <add_page_to_active_cache_list+42>:  test   eax,0x80000
0xc01547bf <add_page_to_active_cache_list+47>:  je     0xc01547c9
<add_page_to_active_cache_list+57>
0xc01547c1 <add_page_to_active_cache_list+49>:  ud2a
0xc01547c9 <add_page_to_active_cache_list+57>:  mov    eax,DWORD PTR [esi+24]
0xc01547cc <add_page_to_active_cache_list+60>:  test   eax,0x80
0xc01547d1 <add_page_to_active_cache_list+65>:  je     0xc01547db
<add_page_to_active_cache_list+75>
0xc01547d3 <add_page_to_active_cache_list+67>:  ud2a
0xc01547db <add_page_to_active_cache_list+75>:  mov    eax,DWORD PTR [esi+24]
0xc01547de <add_page_to_active_cache_list+78>:  test   eax,0x100
0xc01547e3 <add_page_to_active_cache_list+83>:  je     0xc01547ed
<add_page_to_active_cache_list+93>
0xc01547e5 <add_page_to_active_cache_list+85>:  ud2a
0xc01547ed <add_page_to_active_cache_list+93>:  mov    eax,DWORD PTR [esi+24]
0xc01547f0 <add_page_to_active_cache_list+96>:  test   eax,0x200
0xc01547f5 <add_page_to_active_cache_list+101>: je     0xc01547ff
<add_page_to_active_cache_list+111>
0xc01547f7 <add_page_to_active_cache_list+103>: ud2a
0xc01547ff <add_page_to_active_cache_list+111>: lock bts DWORD PTR [esi+24],0x13

0xc0154805 <add_page_to_active_cache_list+117>: mov    eax,DWORD PTR [esi+24]  
<----- ESI: c1cc242c

crash> p/x 0xc1cc242c +24
$1 = 0xc1cc2444
crash> rd 0xc1cc2444
c1cc2444:  014c0001                              ..L.

This value is also matched in the register dump (good):

0xc0154808 <add_page_to_active_cache_list+120>: test   eax,0x400000            
<----- EAX: 014c0001

crash> p/x 0x014c0001 && 0x400000
$2 = 0x1   <--- true (ZF = 0)

0xc015480d <add_page_to_active_cache_list+125>: jne    0xc015484e
<add_page_to_active_cache_list+190>

75 cb      JNE rel8   Jump short if not equal (ZF=0)  to our BUG statement



0xc015480f <add_page_to_active_cache_list+127>: lea    edx,[edi+ebp*8]
0xc0154812 <add_page_to_active_cache_list+130>: mov    eax,DWORD PTR [edx+4416]
0xc0154818 <add_page_to_active_cache_list+136>: lea    ecx,[esi+28]
0xc015481b <add_page_to_active_cache_list+139>: lea    ebx,[edx+4416]
0xc0154821 <add_page_to_active_cache_list+145>: mov    DWORD PTR [eax+4],ecx
0xc0154824 <add_page_to_active_cache_list+148>: mov    DWORD PTR [esi+28],eax
0xc0154827 <add_page_to_active_cache_list+151>: mov    DWORD PTR [ecx+4],ebx
0xc015482a <add_page_to_active_cache_list+154>: mov    DWORD PTR [edx+4416],ecx
0xc0154830 <add_page_to_active_cache_list+160>: movzx  eax,BYTE PTR [edi+4277]
0xc0154837 <add_page_to_active_cache_list+167>: add    eax,ebp
0xc0154839 <add_page_to_active_cache_list+169>: mov    BYTE PTR [esi+44],al
0xc015483c <add_page_to_active_cache_list+172>: inc    DWORD PTR [edi+ebp*4+4212]
0xc0154843 <add_page_to_active_cache_list+179>: inc    DWORD PTR [edi+4108]
0xc0154849 <add_page_to_active_cache_list+185>: pop    ebx
0xc015484a <add_page_to_active_cache_list+186>: pop    esi
0xc015484b <add_page_to_active_cache_list+187>: pop    edi
0xc015484c <add_page_to_active_cache_list+188>: pop    ebp
0xc015484d <add_page_to_active_cache_list+189>: ret
0xc015484e <add_page_to_active_cache_list+190>: ud2a              <-------- BUG
statement
0xc0154856 <add_page_to_active_cache_list+198>: jmp    0xc015480f
<add_page_to_active_cache_list+127>
0xc0154858 <add_page_to_active_cache_list+200>: nop
0xc0154859 <add_page_to_active_cache_list+201>: lea    esi,[esi]

0xc0154860 <del_page_from_active_anon_list>:    push   esi


So if 0x400000 bit is set we panic. 


So what do we know about the page:

The page must be in ESI since we get to flags at offset 24, eg:

crash> struct -o page
struct page {
   [0] struct list_head list;
   [8] struct address_space *mapping;
  [12] long unsigned int index;
  [16] struct page *next_hash;
  [20] atomic_t count;
  [24] long unsigned int flags;
  [28] struct list_head lru;
       union {
           struct pte_chain *chain;
           pte_addr_t direct;
  [36] } pte;
  [44] unsigned char age;
  [48] struct page **pprev_hash;
  [52] struct buffer_head *buffers;
  [56] void *virtual;
}
SIZE: 60



So from our register dump, here it is:


crash> struct page c1cc242c   <--- looks reasonable
struct page {
  list = {
    next = 0xc16a574c,
    prev = 0xf7afa144
  },
  mapping = 0xf7afa144,
  index = 34585,
  next_hash = 0x0,
  count = {
    counter = 2
  },
  flags = 21757953,  == 0x14c0001   PG_locked PG_reclaim PG_compound PG_truncate
(Zone#1 Normal)
  lru = {
    next = 0xc1cc242c,    <--- does point to the head page, correct for compound
    prev = 0x0            <--- default destructor
  },
  pte = {
    chain = 0x0,
    direct = 0
  },
  age = 0 '\\0',
  pprev_hash = 0xc67a8560,
  buffers = 0x0,
  virtual = 0xf6700000
}



I checked some of the associated pages and they appear to be ok. So it looks to me
like we've gone through an unexpected code path and based upon what I've found that
doesn't seem completely unexpected, since the compound page stuff looks like a
bit of
a hack and we would need special handling of the LRU fields!  Would presumably 
also explain the liberal use of the BUG statements.


The mapping:

crash> struct address_space 0xf7afa144
struct address_space {
  clean_pages = {
    next = 0xc1cc242c,
    prev = 0xc1d0a6b4
  },
  dirty_pages = {
    next = 0xf7afa14c,
    prev = 0xf7afa14c
  },
  locked_pages = {
    next = 0xf7afa154,
    prev = 0xf7afa154
  },
  nrpages = 4514,
  a_ops = 0xc03af040,
  host = 0xf7afa080,
  i_mmap = 0x0,
  i_mmap_shared = 0x0,
  i_shared_lock = {
    lock = 1
  },
  gfp_mask = 464  == __GFP_WAIT __GFP_IO  __GFP_HIGHIO  __GFP_FS == GFP_USER (maybe)
}


It is associated with the block dev:

crash> struct address_space_operations 0xc03af040
struct address_space_operations {
  writepage = 0xc016d840 <blkdev_writepage>,
  readpage = 0xc016d860 <blkdev_readpage>,
  sync_page = 0xc016ac20 <block_sync_page>,
  prepare_write = 0xc016d880 <blkdev_prepare_write>,
  commit_write = 0xc016d8b0 <blkdev_commit_write>,
  bmap = 0,
  flushpage = 0,
  releasepage = 0,
  direct_IO = 0,
  direct_sector_IO = 0xc016d7d0 <blkdev_direct_IO>,
  removepage = 0
}


The complete (relevant) call flow appears to be:


117 static inline void add_page_to_active_cache_list(struct page * page, int age)
118 {
119         struct zone_struct * zone = page_zone(page);
120         DEBUG_LRU_PAGE(page);
121         SetPageActiveCache(page);
122         BUG_ON(PageCompound(page));
123         list_add(&page->lru, &zone->active_cache_list[age]);
124         page->age = age + zone->cache_age_bias;
125         zone->active_cache_count[age]++;
126         zone->active_cache_pages++;
127 }
128 
129 static inline void add_page_to_active_list(struct page * page, int age)
130 {
131         if (page_anon(page))
132                 add_page_to_active_anon_list(page, age);
133         else
134                 add_page_to_active_cache_list(page, age);   <----------
135 }
136 


 97 void lru_cache_add(struct page * page)
 98 {
 99         if (!PageLRU(page)) {
100                 lru_lock(page_zone(page));
101                 /* pages from a WIRED inode go directly to the wired list */
102                 if (page->mapping && (page->mapping->gfp_mask & __GFP_WIRED))
103                         add_page_to_wired_list(page);
104                 else if (!TestandSetPageLRU(page))
105                         add_page_to_active_list(page, INITIAL_AGE);  <------
106                 lru_unlock(page_zone(page));
107         }
108 }
109 


700 int add_to_page_cache_unique(struct page * page,
701         struct address_space *mapping, unsigned long offset,
702         struct page **hash)
703 {
704         int err;
705         struct page *alias;
706 
707         lock_pagecache();
708         alias = __find_page_nolock(mapping, offset, *hash);
709 
710         err = 1;
711         if (!alias) {
712                 __add_to_page_cache(page,mapping,offset,hash);
713                 err = 0;
714         }
715 
716         unlock_pagecache();
717         if (!err)
718                 lru_cache_add(page);  <-----------
719         return err;
720 }




*
743  * This adds the requested page to the page cache if it isn't already there,
744  * and schedules an I/O to read in its contents from disk.
745  */
746 static int FASTCALL(page_cache_read(struct file * file, unsigned long offset));
747 static int page_cache_read(struct file * file, unsigned long offset)
748 {
749         struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
750         struct page **hash = page_hash(mapping, offset);
751         struct page *page; 
752 
753         lock_pagecache_readonly();
754         page = __find_page_nolock(mapping, offset, *hash);
755         unlock_pagecache_readonly();
756         if (page)
757                 return 0;
758 
759         page = page_cache_alloc(mapping);
760         if (!page)
761                 return -ENOMEM;
762 
763         if (!add_to_page_cache_unique(page, mapping, offset, hash)) {   <-----
764                 int error = mapping->a_ops->readpage(file, page);
765                 SetPageFresh(page);
766                 page_cache_release(page);
767                 return error;
768         }
769         /*
770          * We arrive here in the unlikely event that someone 
771          * raced with us and added our page to the cache first.
772          */
773         page_cache_release(page);
774         return 0;
775 }