[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

kern/73321: Reproducible Panic (LOR: I4B / INET6)

>Number:         73321
>Category:       kern
>Synopsis:       Reproducible Panic (LOR: I4B / INET6)
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Sat Oct 30 19:10:22 GMT 2004
>Originator:     Stefan Esser
>Release:        FreeBSD 5.3-STABLE i386
System: FreeBSD gw.athome FreeBSD 5.3-STABLE #0: Sat Oct 30 13:00:11 CEST 2004
 gw.athome:/usr/src/sys/i386/compile/GW i386
	kernel configured with I4B (ISDN) and INET6
There appears to be a reproducible LOR in a kernel built with both
IPv6 and ISDN support. This combination leads to a panic after 1 hour
of apparently correct operation:

Fatal trap 12: page fault while in kernel mode
fault virtual address   = 0x8
fault code              = supervisor read, page not present
instruction pointer     = 0x8:0xc0589be6
stack pointer           = 0x10:0xc748ccb8
frame pointer           = 0x10:0xc748ccc4
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 26 (swi5: clock sio)
Uptime: 1h0m2s

(kgdb) bt
#7  0xc04d01ba in kdb_trap (type=12, code=0, tf=0xc748cc78)
    at ../../../kern/subr_kdb.c:418
#8  0xc0615c99 in trap_fatal (frame=0xc748cc78, eva=8)
    at ../../../i386/i386/trap.c:804
#9  0xc06159bb in trap_pfault (frame=0xc748cc78, usermode=0, eva=8)
    at ../../../i386/i386/trap.c:727
#10 0xc0615581 in trap (frame=
      {tf_fs = 24, tf_es = 16, tf_ds = 16, tf_edi = 14848, tf_esi = -1066833824, tf_ebp = -951530300, tf_isp = -951530332, tf_ebx = -1057769088, tf_edx = 2326,tf_ecx = -1066665168, tf_eax = 0, tf_trapno = 12, tf_err = 0, tf_eip = -1067934746, tf_cs = 8, tf_eflags = 66182, tf_esp = 6, tf_ss = 4})
    at ../../../i386/i386/trap.c:417
#11 0xc0589be6 in nd6_slowtimo (ignored_arg=0x0)
    at ../../../netinet6/nd6.c:1801
#12 0xc04c3ae7 in softclock (dummy=0x0) at ../../../kern/kern_timeout.c:259
#13 0xc04a1775 in ithread_loop (arg=0xc0e86500)
    at ../../../kern/kern_intr.c:547
#14 0xc04a06a9 in fork_exit (callout=0xc04a1600 <ithread_loop>,
    arg=0xc0e86500, frame=0xc748cd48) at ../../../kern/kern_fork.c:811

(kgdb) frame 11
#11 0xc0589be6 in nd6_slowtimo (ignored_arg=0x0)
    at ../../../netinet6/nd6.c:1801
1801                    nd6if = ND_IFINFO(ifp);

(kgdb) list
1797            callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
1798                nd6_slowtimo, NULL);
1799            IFNET_RLOCK();
1800            for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
1801        ======>     nd6if = ND_IFINFO(ifp);
1802                    if (nd6if->basereachable && /* already initialized */
1803                        (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
1804                            /*
1805                             * Since reachable time rarely changes by router

(kgdb) up
#12 0xc04c3ae7 in softclock (dummy=0x0) at ../../../kern/kern_timeout.c:259
259                                     c_func(c_arg);

(kgdb) list
254                                     }
255     #ifdef DIAGNOSTIC
256                                     binuptime(&bt1);
257                                     mtx_lock(&dont_sleep_in_callout);
258     #endif
259         ======>                         c_func(c_arg);
260     #ifdef DIAGNOSTIC
261                                     mtx_unlock(&dont_sleep_in_callout);
262                                     binuptime(&bt2);
263                                     bintime_sub(&bt2, &bt1);

#13 0xc04a1775 in ithread_loop (arg=0xc0e86500)
    at ../../../kern/kern_intr.c:547
547                                     ih->ih_handler(ih->ih_argument);

(kgdb) list
542                                             mtx_unlock(&ithd->it_lock);
543                                             goto restart;
544                                     }
545                                     if ((ih->ih_flags & IH_MPSAFE) == 0)
546                                             mtx_lock(&Giant);
547         ======>                     ih->ih_handler(ih->ih_argument);
548                                     if ((ih->ih_flags & IH_MPSAFE) == 0)
549                                             mtx_unlock(&Giant);
550                             }
551                             if (ithd->it_enable != NULL) {

Surprisingly, the panic always occurs after exactly 1h0m2s, with
a literally identical panic message each time ...

The ifp argument to ND_IFINFO always corresponds to some I4N device,
e.g. "isp0" or "ipr0".

A kernel built with DIAGNOSTICS and WITNESS halts after 1h0m8s (yes,
I was warned about reduced performance, but I didn't expect it to
affect the time required to enter the kernel debugger ;-)

lock order reversal
 1st 0xc06d7980 ifnet (ifnet) @ netinet6/nd6.c:1799
 2nd 0xc06a9d24 user map (user map) @ vm/vm_map.c:2997
KDB: stack backtrace:
kdb_backtrace(0,ffffffff,c06b37e8,c06b40a8,c0662dec) at kdb_backtrace+0x29
witness_checkorder(c06a9d24,9,c0644508,bb5) at witness_checkorder+0x570
_sx_xlock(c06a9d24,c06444ff,bb5) at _sx_xlock+0x5c
_vm_map_lock_read(c06a9ce0,c06444ff,bb5,1000046,c0e9677c) at _vm_map_lock_read+0x3b
vm_map_lookup(c748cb6c,0,1,c748cb70,c748cb60) at vm_map_lookup+0x30
vm_fault(c06a9ce0,0,1,0,c0e97000) at vm_fault+0x69
trap_pfault(c748cc34,0,8) at trap_pfault+0xdc
trap(18,10,10,3a00,c0684bc0) at trap+0x331
calltrap() at calltrap+0x5
--- trap 0xc, eip = 0xc0579aa6, esp = 0xc748cc74, ebp = 0xc748cc80 ---
nd6_slowtimo(0,c06af2e0,0,c0630943,101) at nd6_slowtimo+0x66
softclock(0) at softclock+0x1d7
ithread_loop(c0e86500,c748cd48,c0e86500,c049e610,0) at ithread_loop+0x144
fork_exit(c049e610,c0e86500,c748cd48) at fork_exit+0xa8
fork_trampoline() at fork_trampoline+0x8
--- trap 0x1, eip = 0, esp = 0xc748cd7c, ebp = 0 ---

This is exactly the same situation that lead to the panic without
WITNESS (frame #17 here corresponds to frame #11 without WITNESS):

#9  0xc04c8beb in kdb_enter (msg=0x0) at cpufunc.h:56
#10 0xc04d2543 in witness_checkorder (lock=0xc06a9d24, flags=9,
    file=0xc0644508 "vm/vm_map.c", line=2997)
    at ../../../kern/subr_witness.c:952
#11 0xc04b63bc in _sx_xlock (sx=0xc06a9d24,
    file=0xc06444ff "../../../vm/vm_map.c", line=2997)
    at ../../../kern/kern_sx.c:169
#12 0xc05ba03b in _vm_map_lock_read (map=0x0, file=0x0, line=0)
    at ../../../vm/vm_map.c:380
#13 0xc05bd850 in vm_map_lookup (var_map=0xc748cb6c, vaddr=0,
    fault_typea=1 '\001', out_entry=0xc748cb70, object=0x0, pindex=0x0,
    out_prot=0x0, wired=0xc748cb48) at ../../../vm/vm_map.c:2997
#14 0xc05b5cb9 in vm_fault (map=0xc06a9ce0, vaddr=0, fault_type=1 '\001',
    fault_flags=0) at ../../../vm/vm_fault.c:234
#15 0xc05fee8c in trap_pfault (frame=0xc748cc34, usermode=0, eva=8)
    at ../../../i386/i386/trap.c:704
#16 0xc05feb61 in trap (frame=
      {tf_fs = 24, tf_es = 16, tf_ds = 16, tf_edi = 14848, tf_esi = -1066906688, tf_ebp = -951530368, tf_isp = -951530400, tf_ebx = -1057768640, tf_edx = 950, tf_ecx = 1, tf_eax = 0, tf_trapno = 12, tf_err = 0, tf_eip = -1068000602, tf_cs = 8, tf_eflags = 66182, tf_esp = 0, tf_ss = -1058511616})
    at ../../../i386/i386/trap.c:417
#17 0xc0579aa6 in nd6_slowtimo (ignored_arg=0x0)
    at ../../../netinet6/nd6.c:1801
#18 0xc04bcde7 in softclock (dummy=0x0) at ../../../kern/kern_timeout.c:259
#19 0xc049e754 in ithread_loop (arg=0xc0e86500)
    at ../../../kern/kern_intr.c:547
#20 0xc049da78 in fork_exit (callout=0xc049e610 <ithread_loop>,
    arg=0xc0e86500, frame=0xc748cd48) at ../../../kern/kern_fork.c:811

Seems that this is a "real" LOR leading to a panic.

A kernel without INET6 but with IB4 works flawlessly.

Boot message log:

Copyright (c) 1992-2004 The FreeBSD Project.
Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994
	The Regents of the University of California. All rights reserved.
FreeBSD 5.3-STABLE #1: Mon Oct 25 23:39:43 CEST 2004
WARNING: debug.mpsafenet forced to 0 as i4b_ipr requires Giant
WARNING: MPSAFE network stack disabled, expect reduced performance.
Timecounter "i8254" frequency 1193182 Hz quality 0
CPU: Pentium/P54C (99.72-MHz 586-class CPU)
  Origin = "GenuineIntel"  Id = 0x526  Stepping = 6
real memory  = 125829120 (120 MB)
avail memory = 117673984 (112 MB)
npx0: [FAST]
npx0: <math processor> on motherboard
npx0: INT 16 interface
pcib0: <Host to PCI bridge> pcibus 0 on motherboard
pci0: <PCI bus> on pcib0
isab0: <PCI-ISA bridge> at device 1.0 on pci0
isa0: <ISA bus> on isab0
atapci0: <Intel PIIX3 WDMA2 controller> port 0xffa0-0xffaf,0x376,0x170-0x177,0x3f6,0x1f0-0x1f7 at device 1.1 on pci0
ata0: channel #0 on atapci0
ata1: channel #1 on atapci0
pci0: <serial bus, USB> at device 1.2 (no driver attached)
xl0: <3Com 3c905B-TX Fast Etherlink XL> port 0x5000-0x507f mem 0x80000000-0x8000007f irq 10 at device 6.0 on pci0
miibus0: <MII bus> on xl0
xlphy0: <3Com internal media interface> on miibus0
xlphy0:  10baseT, 10baseT-FDX, 100baseTX, 100baseTX-FDX, auto
xl0: Ethernet address: 00:50:da:48:eb:64
vx0: <3COM 3C590 Etherlink III PCI> port 0x5080-0x509f irq 9 at device 7.0 on pci0
utp/aui/bnc[*utp*]: disable 'auto select' with DOS util!vx0: Ethernet address: 00:a0:24:9e:cc:54
pci0: <display, VGA> at device 8.0 (no driver attached)
cpu0 on motherboard
orm0: <ISA Option ROM> at iomem 0xc0000-0xc7fff on isa0
atkbdc0: <Keyboard controller (i8042)> at port 0x64,0x60 on isa0
atkbd0: <AT Keyboard> flags 0x1 irq 1 on atkbdc0
atkbd0: [GIANT-LOCKED]
fdc0: <Enhanced floppy controller> at port 0x3f0-0x3f5 irq 6 drq 2 on isa0
fdc0: [FAST]
fd0: <1440-KB 3.5" drive> on fdc0 drive 0
isic0 at port 0x580-0x59f,0x180-0x19f,0x980-0x99f,0xd80-0xd9f irq 5 flags 0x3 on isa0
isic0: passive stack unit 0
isic0: Teles S0/16.3
ppc0: <Parallel port> at port 0x3bc-0x3c3 irq 7 flags 0xc on isa0
ppc0: PC87306 chipset (ECP/EPP) in ECP+EPP mode (EPP 1.9)
ppc0: FIFO with 16/16/8 bytes threshold
ppbus0: <Parallel port bus> on ppc0
lpt0: <Printer> on ppbus0
lpt0: Interrupt-driven port
ppi0: <Parallel I/O> on ppbus0
sc0: <System console> at flags 0x100 on isa0
sc0: VGA <16 virtual consoles, flags=0x300>
sio0 at port 0x3f8-0x3ff irq 4 flags 0x10 on isa0
sio0: type 16550A
sio1: configured irq 3 not in bitmap of probed irqs 0
sio1: port may not be enabled
vga0: <Generic ISA VGA> at port 0x3c0-0x3df iomem 0xa0000-0xbffff on isa0
unknown: <PNP0303> can't assign resources (port)
unknown: <PNP0700> can't assign resources (port)
unknown: <PNP0501> can't assign resources (port)
unknown: <PNP0401> can't assign resources (port)
Timecounter "TSC" frequency 99716832 Hz quality 800
Timecounters tick every 10.000 msec
i4bing: 4 i4b NetGraph ISDN B-channel device(s) attached
i4brbch: 4 raw B channel access device(s) attached
i4btel: 4 ISDN telephony interface device(s) attached
i4bisppp: 4 ISDN SyncPPP device(s) attached
i4b: ISDN call control device attached
i4btrc: 4 ISDN trace device(s) attached
i4bipr: 4 IP over raw HDLC ISDN device(s) attached (VJ header compression)
i4bctl: ISDN system control port attached
ad0: 4028MB <IBM-DHEA-34330/HE4IA43B> [8184/16/63] at ata0-master WDMA2
acd0: CDROM <GCD-R540/1.16> at ata1-master PIO3
Mounting root from ufs:/dev/ad0s1a
WARNING: / was not properly dismounted

Kernel config file (simplified for debugging, with DIAGNOSTICS/WITNESS):

# ISDN586 -- Pentium with i4b ISDN driver

machine		i386
cpu		I586_CPU
ident		"ISDN586"

makeoptions	DEBUG="-g"

options         MUTEX_DEBUG
options         WITNESS
options         WITNESS_KDB
options         WITNESS_SKIPSPIN

options		DDB
options		KDB
options		KDB_TRACE

options		SCHED_4BSD

options 	INET
options 	INET6

options 	NETGRAPH

options 	FFS
options		UFS_DIRHASH
options 	COMPAT_43
options         SYSVSHM
options         SYSVMSG
options         SYSVSEM
options		NO_F00F_HACK

device		isa
device		pci

device		fdc

device		ata
device		atadisk
device		atapicd
options 	ATA_STATIC_ID

device		miibus
device		vx
device		xl

device		atkbdc
device		atkbd
device		psm
device		vga
device		sc

device		npx

device		sio
device		ppc
device		ppbus
device		lpt
device		ppi

device		loop
device		bpf
device		ether
device		tun
device		pty
device		md
device		random
device		io
device		mem

device		isic
device		"i4b"
device		"i4bctl"
device		"i4bq921"
device		"i4bq931"
device		"i4btrc"	4
device		"i4brbch"	4
device		"i4btel"	4
device		"i4bipr"	4
device		"i4bisppp"	4
device		"i4bing"	4
options		ELSA_QS1PCI
options		TEL_S0_16_3
options		IPR_VJ
options		IPR_LOG=32

Build a kernel with IB4 and INET6, boot and wait.

I've got kernel images and crash dumps with and without DIAGNOSTICS and
WITNESS in case that some information is missing in this PR.
Work around: Remove either I4B or INET6 support from your kernel.


Visit your host, monkey.org