Under gdb: Starting program: /usr/sbin/dnsmasq -k [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib64/libthread_db.so.1". Program received signal SIGSEGV, Segmentation fault. allocate_rfd (fdlp=0xaaaaaab31e00, serv=0xaaaaaab20010) at forward.c:2097 2097 return serv->sfd->fd; (gdb) bt #0 allocate_rfd (fdlp=0xaaaaaab31e00, serv=0xaaaaaab20010) at forward.c:2097 #1 0x0000aaaaaaabd528 in forward_query (udpfd=4, udpaddr=udpaddr@entry=0xfffffffff078, dst_addr=dst_addr@entry=0xfffffffff058, dst_iface=dst_iface@entry=1, header=header@entry=0xaaaaaab2b1a0, plen=plen@entry=54, now=now@entry=1681389382, forward=0xaaaaaab31db0, forward@entry=0x0, ad_reqd=ad_reqd@entry=1, do_bit=do_bit@entry=0) at forward.c:500 #2 0x0000aaaaaaabe160 in receive_query (listen=0xaaaaaab24150, now=1681389382) at forward.c:1490 #3 0x0000aaaaaaac24d4 in check_dns_listeners (now=1681389382) at dnsmasq.c:1619 #4 0x0000aaaaaaaa9620 in main (argc=<optimized out>, argv=<optimized out>) at dnsmasq.c:1070
(gdb) bt #0 allocate_rfd (fdlp=0xaaaaaab31e00, serv=0xaaaaaab20010) at forward.c:2097 #1 0x0000aaaaaaabd528 in forward_query (udpfd=4, udpaddr=udpaddr@entry=0xfffffffff078, dst_addr=dst_addr@entry=0xfffffffff058, dst_iface=dst_iface@entry=1, header=header@entry=0xaaaaaab2b1a0, plen=plen@entry=54, now=now@entry=1681389382, forward=0xaaaaaab31db0, forward@entry=0x0, ad_reqd=ad_reqd@entry=1, do_bit=do_bit@entry=0) at forward.c:500 #2 0x0000aaaaaaabe160 in receive_query (listen=0xaaaaaab24150, now=1681389382) at forward.c:1490 #3 0x0000aaaaaaac24d4 in check_dns_listeners (now=1681389382) at dnsmasq.c:1619 #4 0x0000aaaaaaaa9620 in main (argc=<optimized out>, argv=<optimized out>) at dnsmasq.c:1070 (gdb) frame 1 #1 0x0000aaaaaaabd528 in forward_query (udpfd=4, udpaddr=udpaddr@entry=0xfffffffff078, dst_addr=dst_addr@entry=0xfffffffff058, dst_iface=dst_iface@entry=1, header=header@entry=0xaaaaaab2b1a0, plen=plen@entry=54, now=now@entry=1681389382, forward=0xaaaaaab31db0, forward@entry=0x0, ad_reqd=ad_reqd@entry=1, do_bit=do_bit@entry=0) at forward.c:500 500 if (type == (start->flags & SERV_TYPE) && (gdb) info local fd = <optimized out> firstsentto = 0xaaaaaab20010 oph = <optimized out> pheader = 0xaaaaaab2b1c2 "\020" subnet = 0 forwarded = 0 edns0_len = 23 domain = 0x0 type = 0 norebind = 0 addrp = 0x0 flags = 0 fwd_flags = <optimized out> start = 0xaaaaaab20010 sd = 0xaaaaaab21d00 hash = <optimized out> do_dnssec = <optimized out> gotname = 128 oph = <optimized out> (gdb) frame 0 #0 allocate_rfd (fdlp=0xaaaaaab31e00, serv=0xaaaaaab20010) at forward.c:2097 2097 return serv->sfd->fd; (gdb) info local i = <optimized out> rfl = <optimized out> finger = 0 rfl_poll = <optimized out> (gdb) p serv->sfd $10 = (struct serverfd *) 0x1000100000000 (gdb) p *serv->sfd Cannot access memory at address 0x1000100000000 (gdb) p *serv $11 = {addr = {sa = {sa_family = 6, sa_data = "\a\000\006\000\006\000\003\000\a\000\005\000\a"}, in = {sin_family = 6, sin_port = 7, sin_addr = {s_addr = 393222}, sin_zero = "\003\000\a\000\005\000\a"}, in6 = {sin6_family = 6, sin6_port = 7, sin6_flowinfo = 393222, sin6_addr = {__in6_u = {__u6_addr8 = "\003\000\a\000\005\000\a\000\005\000\005\000\005\000\a", __u6_addr16 = {3, 7, 5, 7, 5, 5, 5, 7}, __u6_addr32 = {458755, 458757, 327685, 458757}}}, sin6_scope_id = 458759}}, source_addr = {sa = {sa_family = 1, sa_data = "\000\000\003\000\002\000\001\000\000\000\000\000\000"}, in = {sin_family = 1, sin_port = 0, sin_addr = {s_addr = 131075}, sin_zero = "\001\000\000\000\000\000\000"}, in6 = {sin6_family = 1, sin6_port = 0, sin6_flowinfo = 131075, sin6_addr = {__in6_u = { __u6_addr8 = "\001", '\000' <repeats 14 times>, __u6_addr16 = {1, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {1, 0, 0, 0}}}, sin6_scope_id = 0}}, interface = "\000\000\001\000\000\000\a\000\001\000\004\000\000\000\001\000\002", ifindex = 65536, sfd = 0x1000100000000, domain = 0x1 <error: Cannot access memory at address 0x1>, flags = 0, tcpfd = 0, edns_pktsz = 65538, pktsz_reduced = 562954248454144, queries = 1, failed_queries = 0, uid = 2863813776, serv_domain = 0xaaaaaab32170, next = 0xaaaaaab32f90}
There were two forwarded queries at the time. First seem okay, another one is obviously corrupted. It is not yet obvious how that happened. (gdb) set $f = $daemon->frec_list (gdb) p *$f $29 = {frec_src = {source = {sa = {sa_family = 0, sa_data = '\000' <repeats 13 times>}, in = {sin_family = 0, sin_port = 0, sin_addr = { s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, in6 = {sin6_family = 0, sin6_port = 0, sin6_flowinfo = 0, sin6_addr = { __in6_u = {__u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, sin6_scope_id = 0}}, dest = {addr = {addr4 = {s_addr = 0}, addr6 = {__in6_u = {__u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = { 0, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, log = {keytag = 0, algo = 0, digest = 0}, dnssec = {class = 0, type = 0}}}, iface = 0, log_id = 0, fd = 0, orig_id = 0, next = 0x0}, sentto = 0x0, rfds = 0x0, new_id = 0, forwardall = 0, flags = 0, time = 1681389374, hash = {0x0 <repeats 32 times>}, class = 0, work_counter = 0, stash = 0x0, stash_len = 0, dependent = 0x0, blocking_query = 0x0, next = 0xaaaaaab31db0} (gdb) p *$f->next $30 = {frec_src = {source = {sa = {sa_family = 2, sa_data = "\324\376\177\000\000\001\000\000\000\000\000\000\000"}, in = {sin_family = 2, sin_port = 65236, sin_addr = {s_addr = 16777343}, sin_zero = "\000\000\000\000\000\000\000"}, in6 = {sin6_family = 2, sin6_port = 65236, sin6_flowinfo = 16777343, sin6_addr = {__in6_u = { __u6_addr8 = "\000\000\000\000\000\000\000\000h\362\377\377\377\377\000", __u6_addr16 = {0, 0, 0, 0, 62056, 65535, 65535, 0}, __u6_addr32 = {0, 0, 4294963816, 65535}}}, sin6_scope_id = 1681389374}}, dest = {addr = {addr4 = {s_addr = 16777343}, addr6 = { __in6_u = {__u6_addr8 = "\177\000\000\001\252\252\000\000@\034\261\252\252\252\000", __u6_addr16 = {127, 256, 43690, 0, 7232, 43697, 43690, 0}, __u6_addr32 = {16777343, 43690, 2863733824, 43690}}}, log = {keytag = 127, algo = 256, digest = 43690}, dnssec = { class = 127, type = 256}}}, iface = 1, log_id = 31, fd = 4, orig_id = 51516, next = 0x0}, sentto = 0x0, rfds = 0x0, new_id = 38919, forwardall = 0, flags = 1568, time = 1681389382, hash = {0x62881b9fffdaaf63 <error: Cannot access memory at address 0x62881b9fffdaaf63>, 0x490cbb3edf6d061c <error: Cannot access memory at address 0x490cbb3edf6d061c>, 0x321ced3370ce1fc8 <error: Cannot access memory at address 0x321ced3370ce1fc8>, 0x678576a3dfa214ec <error: Cannot access memory at address 0x678576a3dfa214ec>, 0x0 <repeats 28 times>}, class = 0, work_counter = 50, stash = 0x0, stash_len = 0, dependent = 0x0, blocking_query = 0x0, next = 0x0}
After analysing provided core dump, I think this is caused by a change for the bug #1919894. It added a new structure, which seems not behaving well in this specific configuration.
It seems the required change happens when existing record TTL is reduced to 0 and new one is fetched instead. It happens faster on names with short TTL.
Minimal reproducer: server=/example.com/# max-cache-ttl=1 address=/#/ Then retry query a few after delay: dig @localhost example.com && sleep 1 && dig @localhost example.com && sleep 1 && dig @localhost example.com
Created candidate fix at MR https://gitlab.com/redhat/centos-stream/rpms/dnsmasq/-/merge_requests/12 The fix improves freeing at cleanup_servers function. It ensures even servers not used as last_server are unassigned in case server_domain is released. At the same time it adds check to handle also case where a domain is using common resolvers. That is marked by SERV_USE_RESOLV flag in struct server. server_domain_check() is done also in this case, ensuring the domain used in this case is not released. Additional check helps to not create server_domain duplicates.
Backport to RHEL 8.6 requested and later included. The regression is present since bug #1919894 change.
There is nothing strictly internal in public comments, opening this bug to public to be viewable from bug #2182342.
Additional change is needed for passing test Regression/domain-query-the-last-known-server with DBus part. Additional change makes sure server_domain records are reused more often, prevents creation of duplicate entries for the same domain. Ensures also DBus added entries do not have to wait until check_servers to have serv_domain pointer set properly.