Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 145862 Details for
Bug 214212
[RHEL3 RFE] Add Linux Virtual Server SyncID support
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
Backported ipvs 1.0.10 and syncd patch
linux-2.4.21-ipvs-syncd.patch (text/x-patch), 44.51 KB, created by
Lawrence Walton
on 2007-01-17 21:45:08 UTC
(
hide
)
Description:
Backported ipvs 1.0.10 and syncd patch
Filename:
MIME Type:
Creator:
Lawrence Walton
Created:
2007-01-17 21:45:08 UTC
Size:
44.51 KB
patch
obsolete
>diff -urN linux-2.4.21/net/ipv4/ipvs/Config.in linux-2.4.21-build-01-17/net/ipv4/ipvs/Config.in >--- linux-2.4.21/net/ipv4/ipvs/Config.in 2007-01-17 03:17:30.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/Config.in 2007-01-15 16:45:53.000000000 -0800 >@@ -1,13 +1,13 @@ > # >-# IPVS configuration >+# IP VS configuration > # > mainmenu_option next_comment > comment ' IP: Virtual Server Configuration' > >-tristate 'IP virtual server support (EXPERIMENTAL)' CONFIG_IP_VS >+tristate 'virtual server support (EXPERIMENTAL)' CONFIG_IP_VS > if [ "$CONFIG_IP_VS" != "n" ]; then > bool ' IP virtual server debugging' CONFIG_IP_VS_DEBUG >- int ' IPVS connection hash table size (the Nth power of 2)' CONFIG_IP_VS_TAB_BITS 12 >+ int ' IPVS connection table size (the Nth power of 2)' CONFIG_IP_VS_TAB_BITS 12 > comment 'IPVS scheduler' > dep_tristate ' round-robin scheduling' CONFIG_IP_VS_RR $CONFIG_IP_VS > dep_tristate ' weighted round-robin scheduling' CONFIG_IP_VS_WRR $CONFIG_IP_VS >@@ -17,6 +17,8 @@ > dep_tristate ' locality-based least-connection with replication scheduling' CONFIG_IP_VS_LBLCR $CONFIG_IP_VS > dep_tristate ' destination hashing scheduling' CONFIG_IP_VS_DH $CONFIG_IP_VS > dep_tristate ' source hashing scheduling' CONFIG_IP_VS_SH $CONFIG_IP_VS >+ dep_tristate ' shortest expected delay scheduling' CONFIG_IP_VS_SED $CONFIG_IP_VS >+ dep_tristate ' never queue scheduling' CONFIG_IP_VS_NQ $CONFIG_IP_VS > comment 'IPVS application helper' > dep_tristate ' FTP protocol helper' CONFIG_IP_VS_FTP $CONFIG_IP_VS > fi >diff -urN linux-2.4.21/net/ipv4/ipvs/ip_vs_conn.c linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_conn.c >--- linux-2.4.21/net/ipv4/ipvs/ip_vs_conn.c 2007-01-17 03:17:41.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_conn.c 2007-01-15 16:45:54.000000000 -0800 >@@ -5,7 +5,7 @@ > * high-performance and highly available server based on a > * cluster of servers. > * >- * Version: $Id: ip_vs_conn.c,v 1.28.2.1 2002/11/14 10:05:23 wensong Exp $ >+ * Version: $Id: ip_vs_conn.c,v 1.28.2.5 2003/08/09 13:27:08 wensong Exp $ > * > * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> > * Peter Kese <peter.kese@ijs.si> >@@ -43,9 +43,8 @@ > #include <net/route.h> /* for ip_route_output_key */ > #include <linux/netfilter.h> > #include <linux/netfilter_ipv4.h> >- >-#include <linux/random.h> > #include <linux/jhash.h> >+#include <linux/random.h> > > #include <net/ip_vs.h> > >@@ -54,19 +53,18 @@ > * Connection hash table: for input and output packets lookups of IPVS > */ > static struct list_head *ip_vs_conn_tab; >-static __u32 ip_vs_conn_hashrnd; > >-/* SLAB cache for IPVS connections */ >+/* SLAB cache for IPVS connections */ > static kmem_cache_t *ip_vs_conn_cachep; > >-/* counter for current IPVS connections */ >+/* counter for current IPVS connections */ > static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); > >-/* >- * No client port connection counter >- */ >+/* counter for no-client-port connections */ > static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); > >+/* random value for IPVS connection hash */ >+static unsigned int ip_vs_conn_rnd; > > /* > * Fine locking granularity for big connection hash table >@@ -124,21 +122,18 @@ > write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); > } > >-void ip_vs_conn_hashrnd_init(void) >-{ >- get_random_bytes(&ip_vs_conn_hashrnd, sizeof(__u32)); >-} > > /* > * Returns hash value for IPVS connection entry > */ >-static __u32 ip_vs_conn_hashkey(__u16 proto, __u32 addr, __u16 port) >+static unsigned >+ip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port) > { >- return jhash_3words((__u32) proto, addr, (__u32) port, >- ip_vs_conn_hashrnd) >+ return jhash_3words(addr, port, proto, ip_vs_conn_rnd) > & IP_VS_CONN_TAB_MASK; > } > >+ > /* > * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. > * returns bool success. >@@ -301,7 +296,7 @@ > void ip_vs_conn_put(struct ip_vs_conn *cp) > { > /* reset it expire in its timeout */ >- mod_timer(&cp->conn_timer, jiffies+cp->timeout); >+ mod_timer(&cp->timer, jiffies+cp->timeout); > > __ip_vs_conn_put(cp); > } >@@ -479,7 +474,7 @@ > } > > >-int ip_vs_set_state_timeout(struct ip_vs_conn *cp, int state) >+static inline int vs_set_state_timeout(struct ip_vs_conn *cp, int state) > { > struct ip_vs_timeout_table *vstim = cp->timeout_table; > >@@ -561,7 +556,7 @@ > } > } > >- return ip_vs_set_state_timeout(cp, new_state); >+ return vs_set_state_timeout(cp, new_state); > } > > >@@ -579,10 +574,10 @@ > ret = vs_tcp_state(cp, state_off, tp); > break; > case IPPROTO_UDP: >- ret = ip_vs_set_state_timeout(cp, IP_VS_S_UDP); >+ ret = vs_set_state_timeout(cp, IP_VS_S_UDP); > break; > case IPPROTO_ICMP: >- ret = ip_vs_set_state_timeout(cp, IP_VS_S_ICMP); >+ ret = vs_set_state_timeout(cp, IP_VS_S_ICMP); > break; > default: > ret = -1; >@@ -598,7 +593,7 @@ > */ > int ip_vs_conn_listen(struct ip_vs_conn *cp) > { >- ip_vs_set_state_timeout(cp, IP_VS_S_LISTEN); >+ vs_set_state_timeout(cp, IP_VS_S_LISTEN); > return cp->timeout; > } > >@@ -615,20 +610,20 @@ > u8 tos = iph->tos; > int mtu; > >- EnterFunction(10); >+ EnterFunction(10); > >- { >- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->daddr, >- .tos = RT_TOS(tos) } } }; >- if (ip_route_output_key(&rt,&fl)) { >- IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output_key error, " >- "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr)); >- goto tx_error_icmp; >- } >- } >+ { >+ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->daddr, >+ .tos = RT_TOS(tos) } } }; >+ if (ip_route_output_key(&rt,&fl)) { >+ IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output_key error, " >+ "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr)); >+ goto tx_error_icmp; >+ } >+ } > > /* MTU checking */ >- mtu = dst_pmtu(&rt->u.dst); >+ mtu = dst_pmtu(&rt->u.dst); > if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { > ip_rt_put(rt); > icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); >@@ -636,10 +631,8 @@ > goto tx_error; > } > >-#if 0 >- if (skb_is_nonlinear(skb) && skb->len <= mtu) >-#endif >- ip_send_check(iph); >+ /* update checksum because skb might be defragmented */ >+ ip_send_check(iph); > > if (unlikely(skb_headroom(skb) < rt->u.dst.dev->hard_header_len)) { > if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) { >@@ -751,7 +744,7 @@ > goto tx_error_icmp; > > /* MTU checking */ >- mtu = dst_pmtu(&rt->u.dst); >+ mtu = dst_pmtu(&rt->u.dst); > if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { > ip_rt_put(rt); > icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); >@@ -898,10 +891,8 @@ > goto tx_error; > } > >-#if 0 >- if (skb_is_nonlinear(skb)) >-#endif >- ip_send_check(old_iph); >+ /* update checksum because skb might be defragmented */ >+ ip_send_check(old_iph); > > skb->h.raw = skb->nh.raw; > >@@ -922,6 +913,7 @@ > } > kfree_skb(skb); > skb = new_skb; >+ old_iph = skb->nh.iph; > } > > skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); >@@ -989,10 +981,8 @@ > goto tx_error; > } > >-#if 0 >- if (skb_is_nonlinear(skb) && skb->len <= mtu) >-#endif >- ip_send_check(iph); >+ /* update checksum because skb might be defragmented */ >+ ip_send_check(iph); > > if (unlikely(skb_headroom(skb) < rt->u.dst.dev->hard_header_len)) { > if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) { >@@ -1068,9 +1058,7 @@ > if (!dest) > return; > >- /* >- * Increase the refcnt counter of the dest. >- */ >+ /* Increase the refcnt counter of the dest */ > atomic_inc(&dest->refcnt); > > /* Bind with the destination and its corresponding transmitter */ >@@ -1097,38 +1085,40 @@ > { > struct ip_vs_dest *dest = cp->dest; > >- if (dest) { >- IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d " >- "v:%u.%u.%u.%u:%d d:%u.%u.%u.%u:%d fwd:%c " >- "s:%s flg:%X cnt:%d destcnt:%d", >- ip_vs_proto_name(cp->protocol), >- NIPQUAD(cp->caddr), ntohs(cp->cport), >- NIPQUAD(cp->vaddr), ntohs(cp->vport), >- NIPQUAD(cp->daddr), ntohs(cp->dport), >- ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state), >- cp->flags, atomic_read(&cp->refcnt), >- atomic_read(&dest->refcnt)); >+ /* if dest is NULL, then return directly */ >+ if (!dest) >+ return; > >- /* >- * Decrease the inactconns or activeconns counter >- * if it is not a connection template ((cp->cport!=0) >- * || (cp->flags & IP_VS_CONN_F_NO_CPORT)). >- */ >- if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { >- if (cp->flags & IP_VS_CONN_F_INACTIVE) { >- atomic_dec(&dest->inactconns); >- } else { >- atomic_dec(&dest->activeconns); >- } >- } >+ IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d " >+ "v:%u.%u.%u.%u:%d d:%u.%u.%u.%u:%d fwd:%c " >+ "s:%s flg:%X cnt:%d destcnt:%d", >+ ip_vs_proto_name(cp->protocol), >+ NIPQUAD(cp->caddr), ntohs(cp->cport), >+ NIPQUAD(cp->vaddr), ntohs(cp->vport), >+ NIPQUAD(cp->daddr), ntohs(cp->dport), >+ ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state), >+ cp->flags, atomic_read(&cp->refcnt), >+ atomic_read(&dest->refcnt)); > >- /* >- * Simply decrease the refcnt of the dest, because the >- * dest will be either in service's destination list >- * or in the trash. >- */ >- atomic_dec(&dest->refcnt); >+ /* >+ * Decrease the inactconns or activeconns counter >+ * if it is not a connection template ((cp->cport!=0) >+ * || (cp->flags & IP_VS_CONN_F_NO_CPORT)). >+ */ >+ if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { >+ if (cp->flags & IP_VS_CONN_F_INACTIVE) { >+ atomic_dec(&dest->inactconns); >+ } else { >+ atomic_dec(&dest->activeconns); >+ } > } >+ >+ /* >+ * Simply decrease the refcnt of the dest, because the >+ * dest will be either in service's destination list >+ * or in the trash. >+ */ >+ atomic_dec(&dest->refcnt); > } > > >@@ -1222,8 +1212,8 @@ > */ > if (likely(atomic_read(&cp->refcnt) == 1)) { > /* make sure that there is no timer on it now */ >- if (timer_pending(&cp->conn_timer)) >- del_timer(&cp->conn_timer); >+ if (timer_pending(&cp->timer)) >+ del_timer(&cp->timer); > > /* does anybody control me? */ > if (cp->control) >@@ -1255,7 +1245,8 @@ > void ip_vs_conn_expire_now(struct ip_vs_conn *cp) > { > cp->timeout = 0; >- mod_timer(&cp->conn_timer, jiffies); >+ mod_timer(&cp->timer, jiffies); >+ __ip_vs_conn_put(cp); > } > > /* >@@ -1276,9 +1267,9 @@ > > memset(cp, 0, sizeof(*cp)); > INIT_LIST_HEAD(&cp->c_list); >- init_timer(&cp->conn_timer); >- cp->conn_timer.data = (unsigned long)cp; >- cp->conn_timer.function = ip_vs_conn_expire; >+ init_timer(&cp->timer); >+ cp->timer.data = (unsigned long)cp; >+ cp->timer.function = ip_vs_conn_expire; > ip_vs_timeout_attach(cp, ip_vs_timeout_table); > cp->protocol = proto; > cp->caddr = caddr; >@@ -1306,7 +1297,7 @@ > ip_vs_bind_dest(cp, dest); > > /* Set its state and timeout */ >- ip_vs_set_state_timeout(cp, IP_VS_S_NONE); >+ vs_set_state_timeout(cp, IP_VS_S_NONE); > > /* Bind its packet transmitter */ > ip_vs_bind_xmit(cp); >@@ -1363,7 +1354,7 @@ > ntohl(cp->vaddr), ntohs(cp->vport), > ntohl(cp->daddr), ntohs(cp->dport), > ip_vs_state_name(cp->state), >- (cp->conn_timer.expires-jiffies)/HZ); >+ (cp->timer.expires-jiffies)/HZ); > len += sprintf(buffer+len, "%-127s\n", temp); > if (pos >= offset+length) { > ct_read_unlock_bh(idx); >@@ -1400,7 +1391,7 @@ > /* if the conn entry hasn't lasted for 60 seconds, don't drop it. > This will leave enough time for normal connection to get > through. */ >- if (cp->timeout+jiffies-cp->conn_timer.expires < 60*HZ) >+ if (cp->timeout+jiffies-cp->timer.expires < 60*HZ) > return 0; > > /* Don't drop the entry if its number of incoming packets is not >@@ -1421,6 +1412,7 @@ > int idx; > struct ip_vs_conn *cp; > struct list_head *l,*e; >+ struct ip_vs_conn *ct; > > /* > * Randomly scan 1/32 of the whole table every second >@@ -1454,12 +1446,21 @@ > continue; > } > >+ /* >+ * Drop the entry, and drop its ct if not referenced >+ */ >+ atomic_inc(&cp->refcnt); >+ ct_write_unlock(hash); >+ >+ if ((ct = cp->control)) >+ atomic_inc(&ct->refcnt); > IP_VS_DBG(4, "del connection\n"); > ip_vs_conn_expire_now(cp); >- if (cp->control) { >+ if (ct) { > IP_VS_DBG(4, "del conn template\n"); >- ip_vs_conn_expire_now(cp->control); >+ ip_vs_conn_expire_now(ct); > } >+ ct_write_lock(hash); > } > ct_write_unlock(hash); > } >@@ -1474,6 +1475,7 @@ > int idx; > struct ip_vs_conn *cp; > struct list_head *l,*e; >+ struct ip_vs_conn *ct; > > flush_again: > for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) { >@@ -1485,12 +1487,18 @@ > l = &ip_vs_conn_tab[idx]; > for (e=l->next; e!=l; e=e->next) { > cp = list_entry(e, struct ip_vs_conn, c_list); >+ atomic_inc(&cp->refcnt); >+ ct_write_unlock(idx); >+ >+ if ((ct = cp->control)) >+ atomic_inc(&ct->refcnt); > IP_VS_DBG(4, "del connection\n"); > ip_vs_conn_expire_now(cp); >- if (cp->control) { >+ if (ct) { > IP_VS_DBG(4, "del conn template\n"); >- ip_vs_conn_expire_now(cp->control); >+ ip_vs_conn_expire_now(ct); > } >+ ct_write_lock(idx); > } > ct_write_unlock_bh(idx); > } >@@ -1515,15 +1523,6 @@ > if (!ip_vs_conn_tab) > return -ENOMEM; > >- /* Allocate ip_vs_conn slab cache */ >- ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn", >- sizeof(struct ip_vs_conn), 0, >- SLAB_HWCACHE_ALIGN, NULL, NULL); >- if (!ip_vs_conn_cachep) { >- vfree(ip_vs_conn_tab); >- return -ENOMEM; >- } >- > IP_VS_INFO("Connection hash table configured " > "(size=%d, memory=%ldKbytes)\n", > IP_VS_CONN_TAB_SIZE, >@@ -1539,8 +1538,20 @@ > __ip_vs_conntbl_lock_array[idx].l = RW_LOCK_UNLOCKED; > } > >+ /* Allocate ip_vs_conn slab cache */ >+ ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn", >+ sizeof(struct ip_vs_conn), 0, >+ SLAB_HWCACHE_ALIGN, NULL, NULL); >+ if (!ip_vs_conn_cachep) { >+ vfree(ip_vs_conn_tab); >+ return -ENOMEM; >+ } >+ > proc_net_create("ip_vs_conn", 0, ip_vs_conn_getinfo); > >+ /* calculate the random value for connection hash */ >+ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); >+ > return 0; > } > >diff -urN linux-2.4.21/net/ipv4/ipvs/ip_vs_core.c linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_core.c >--- linux-2.4.21/net/ipv4/ipvs/ip_vs_core.c 2007-01-17 03:17:41.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_core.c 2007-01-15 16:45:54.000000000 -0800 >@@ -5,7 +5,7 @@ > * high-performance and highly available server based on a > * cluster of servers. > * >- * Version: $Id: ip_vs_core.c,v 1.31.2.1 2002/11/14 10:05:23 wensong Exp $ >+ * Version: $Id: ip_vs_core.c,v 1.31.2.5 2003/07/29 14:37:12 wensong Exp $ > * > * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> > * Peter Kese <peter.kese@ijs.si> >@@ -506,7 +506,7 @@ > > /* reassemble IP fragments, but will it happen in ICMP packets?? */ > if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { >- skb = ip_defrag(skb, IP_DEFRAG_VS_OUT); >+ skb = ip_defrag(skb, IP_DEFRAG_VS_OUT); > if (!skb) > return NF_STOLEN; > *skb_p = skb; >@@ -843,10 +843,11 @@ > > > /* >- * Handle ICMP messages in the outside-to-inside direction (incoming). >+ * Handle ICMP messages in the outside-to-inside direction (incoming) >+ * and sometimes in outgoing direction from ip_vs_forward_icmp. > * Find any that might be relevant, check against existing connections, > * forward to the right destination host if relevant. >- * Currently handles error types - unreachable, quench, ttl exceeded >+ * Currently handles error types - unreachable, quench, ttl exceeded. > */ > static int ip_vs_in_icmp(struct sk_buff **skb_p) > { >@@ -864,14 +865,11 @@ > if (skb_is_nonlinear(skb)) { > if (skb_linearize(skb, GFP_ATOMIC) != 0) > return NF_DROP; >-#if 0 >- ip_send_check(skb->nh.iph); >-#endif > } > > iph = skb->nh.iph; > ip_send_check(iph); >- icmph = (struct icmphdr *)((char *)iph+(iph->ihl<<2)); >+ icmph = (struct icmphdr *)((char *)iph + (iph->ihl << 2)); > len = ntohs(iph->tot_len) - (iph->ihl<<2); > if (len < sizeof(struct icmphdr)) > return NF_DROP; >@@ -1166,7 +1164,7 @@ > return NF_ACCEPT; > > if (iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { >- skb = ip_defrag(skb, IP_DEFRAG_VS_FWD); >+ skb = ip_defrag(skb, IP_DEFRAG_VS_OUT); > if (!skb) > return NF_STOLEN; > *skb_p = skb; >@@ -1220,7 +1218,7 @@ > ret = ip_vs_conn_init(); > if (ret < 0) { > IP_VS_ERR("can't setup connection table.\n"); >- goto cleanup_sltimer; >+ goto cleanup_control; > } > > ret = ip_vs_app_init(); >@@ -1263,7 +1261,7 @@ > ip_vs_app_cleanup(); > cleanup_conn: > ip_vs_conn_cleanup(); >- cleanup_sltimer: >+ cleanup_control: > ip_vs_control_cleanup(); > cleanup_nothing: > return ret; >diff -urN linux-2.4.21/net/ipv4/ipvs/ip_vs_ctl.c linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_ctl.c >--- linux-2.4.21/net/ipv4/ipvs/ip_vs_ctl.c 2007-01-17 03:17:30.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_ctl.c 2007-01-15 16:45:54.000000000 -0800 >@@ -5,7 +5,7 @@ > * high-performance and highly available server based on a > * cluster of servers. > * >- * Version: $Id: ip_vs_ctl.c,v 1.30.2.1 2002/11/14 10:05:23 wensong Exp $ >+ * Version: $Id: ip_vs_ctl.c,v 1.30.2.3 2003/07/29 14:37:12 wensong Exp $ > * > * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> > * Peter Kese <peter.kese@ijs.si> >@@ -92,7 +92,7 @@ > /* > * update_defense_level is called from timer bh and from sysctl. > */ >-void update_defense_level(void) >+static void update_defense_level(void) > { > int ip_vs_amem = nr_free_pages() + atomic_read(&page_cache_size) + > atomic_read(&buffermem_pages); >@@ -189,6 +189,22 @@ > > > /* >+ * Timer for checking the defense >+ */ >+static struct timer_list defense_timer; >+#define DEFENSE_TIMER_PERIOD 1*HZ >+ >+static void defense_timer_handler(unsigned long data) >+{ >+ update_defense_level(); >+ if (atomic_read(&ip_vs_dropentry)) >+ ip_vs_random_dropentry(); >+ >+ mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD); >+} >+ >+ >+/* > * Hash table: for virtual service lookups > */ > #define IP_VS_SVC_TAB_BITS 8 >@@ -845,7 +861,7 @@ > EnterFunction(2); > > if (ur->weight < 0) { >- IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n"); >+ IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n"); > return -ERANGE; > } > >@@ -1655,8 +1671,6 @@ > return 0; > } > >-static int hashrnds_initted; >-extern void ip_vs_conn_hashrnd_init(void); > > static int > do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len) >@@ -1668,11 +1682,6 @@ > if (!capable(CAP_NET_ADMIN)) > return -EPERM; > >- if (!hashrnds_initted) { >- ip_vs_conn_hashrnd_init(); >- hashrnds_initted = 1; >- } >- > /* > * Check the size of mm, no overflow... > * len > 128000 is a sanity check. >@@ -1707,10 +1716,11 @@ > ret = ip_vs_set_timeouts(urule); > goto out_unlock; > } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { >- ret = start_sync_thread(urule->state, urule->mcast_ifn); >+ ret = start_sync_thread(urule->state, urule->mcast_ifn, >+ urule->syncid); > goto out_unlock; > } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { >- ret = stop_sync_thread(); >+ ret = stop_sync_thread(urule->state); > goto out_unlock; > } else if (cmd == IP_VS_SO_SET_ZERO) { > /* if no service address is set, zero counters in all */ >@@ -2058,7 +2068,10 @@ > goto out; > } > u.state = ip_vs_sync_state; >- strcpy(u.mcast_ifn, ip_vs_mcast_ifn); >+ if (ip_vs_sync_state & IP_VS_STATE_MASTER) >+ strcpy(u.mcast_master_ifn, ip_vs_mcast_master_ifn); >+ if (ip_vs_sync_state & IP_VS_STATE_BACKUP) >+ strcpy(u.mcast_backup_ifn, ip_vs_mcast_backup_ifn); > if (copy_to_user(user, &u, sizeof(u)) != 0) > ret = -EFAULT; > } >@@ -2115,6 +2128,12 @@ > ip_vs_stats.lock = SPIN_LOCK_UNLOCKED; > ip_vs_new_estimator(&ip_vs_stats); > >+ /* Hook the defense timer */ >+ init_timer(&defense_timer); >+ defense_timer.function = defense_timer_handler; >+ defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD; >+ add_timer(&defense_timer); >+ > LeaveFunction(2); > return 0; > } >@@ -2123,6 +2142,7 @@ > { > EnterFunction(2); > ip_vs_trash_cleanup(); >+ del_timer_sync(&defense_timer); > ip_vs_kill_estimator(&ip_vs_stats); > unregister_sysctl_table(ipv4_vs_table.sysctl_header); > proc_net_remove("ip_vs_stats"); >diff -urN linux-2.4.21/net/ipv4/ipvs/ip_vs_est.c linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_est.c >--- linux-2.4.21/net/ipv4/ipvs/ip_vs_est.c 2007-01-17 03:17:30.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_est.c 2007-01-15 16:45:53.000000000 -0800 >@@ -1,7 +1,7 @@ > /* > * ip_vs_est.c Simple rate estimator for IPVS > * >- * Version: $Id: ip_vs_est.c,v 1.3 2002/07/11 14:26:41 wensong Exp $ >+ * Version: $Id: ip_vs_est.c,v 1.3.2.1 2003/07/29 14:37:13 wensong Exp $ > * > * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> > * >@@ -77,6 +77,8 @@ > read_lock(&est_lock); > for (e = est_list; e; e = e->next) { > s = e->stats; >+ >+ spin_lock(&s->lock); > n_conns = s->conns; > n_inpkts = s->inpkts; > n_outpkts = s->outpkts; >@@ -108,6 +110,7 @@ > e->last_outbytes = n_outbytes; > e->outbps += ((long)rate - (long)e->outbps)>>2; > s->outbps = (e->outbps+0xF)>>5; >+ spin_unlock(&s->lock); > } > read_unlock(&est_lock); > mod_timer(&est_timer, jiffies + 2*HZ); >diff -urN linux-2.4.21/net/ipv4/ipvs/ip_vs_lc.c linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_lc.c >--- linux-2.4.21/net/ipv4/ipvs/ip_vs_lc.c 2007-01-17 03:17:30.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_lc.c 2007-01-15 16:45:53.000000000 -0800 >@@ -1,7 +1,7 @@ > /* > * IPVS: Least-Connection Scheduling module > * >- * Version: $Id: ip_vs_lc.c,v 1.8 2001/10/19 15:05:17 wensong Exp $ >+ * Version: $Id: ip_vs_lc.c,v 1.8.2.1 2003/04/11 14:02:35 wensong Exp $ > * > * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> > * >diff -urN linux-2.4.21/net/ipv4/ipvs/ip_vs_nq.c linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_nq.c >--- linux-2.4.21/net/ipv4/ipvs/ip_vs_nq.c 1969-12-31 16:00:00.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_nq.c 2007-01-15 16:45:53.000000000 -0800 >@@ -0,0 +1,177 @@ >+/* >+ * IPVS: Never Queue scheduling module >+ * >+ * Version: $Id: ip_vs_nq.c,v 1.1.2.1 2003/05/20 17:05:02 wensong Exp $ >+ * >+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> >+ * >+ * This program is free software; you can redistribute it and/or >+ * modify it under the terms of the GNU General Public License >+ * as published by the Free Software Foundation; either version >+ * 2 of the License, or (at your option) any later version. >+ * >+ * Changes: >+ * >+ */ >+ >+/* >+ * The NQ algorithm adopts a two-speed model. When there is an idle server >+ * available, the job will be sent to the idle server, instead of waiting >+ * for a fast one. When there is no idle server available, the job will be >+ * sent to the server that minimize its expected delay (The Shortest >+ * Expected Delay scheduling algorithm). >+ * >+ * See the following paper for more information: >+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing >+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88, >+ * pages 986-994, 1988. >+ * >+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me. >+ * >+ * The difference between NQ and SED is that NQ can improve overall >+ * system utilization. >+ * >+ */ >+ >+#include <linux/config.h> >+#include <linux/module.h> >+#include <linux/init.h> >+#include <linux/types.h> >+#include <linux/kernel.h> >+#include <linux/errno.h> >+ >+#include <net/ip_vs.h> >+ >+ >+static int >+ip_vs_nq_init_svc(struct ip_vs_service *svc) >+{ >+ return 0; >+} >+ >+ >+static int >+ip_vs_nq_done_svc(struct ip_vs_service *svc) >+{ >+ return 0; >+} >+ >+ >+static int >+ip_vs_nq_update_svc(struct ip_vs_service *svc) >+{ >+ return 0; >+} >+ >+ >+static inline unsigned int >+ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) >+{ >+ /* >+ * We only use the active connection number in the cost >+ * calculation here. >+ */ >+ return atomic_read(&dest->activeconns) + 1; >+} >+ >+ >+/* >+ * Weighted Least Connection scheduling >+ */ >+static struct ip_vs_dest * >+ip_vs_nq_schedule(struct ip_vs_service *svc, struct iphdr *iph) >+{ >+ register struct list_head *l, *e; >+ struct ip_vs_dest *dest, *least; >+ unsigned int loh, doh; >+ >+ IP_VS_DBG(6, "ip_vs_nq_schedule(): Scheduling...\n"); >+ >+ /* >+ * We calculate the load of each dest server as follows: >+ * (server expected overhead) / dest->weight >+ * >+ * Remember -- no floats in kernel mode!!! >+ * The comparison of h1*w2 > h2*w1 is equivalent to that of >+ * h1/w1 > h2/w2 >+ * if every weight is larger than zero. >+ * >+ * The server with weight=0 is quiesced and will not receive any >+ * new connections. >+ */ >+ >+ l = &svc->destinations; >+ for (e=l->next; e!=l; e=e->next) { >+ least = list_entry(e, struct ip_vs_dest, n_list); >+ if (atomic_read(&least->weight) > 0) { >+ loh = ip_vs_nq_dest_overhead(least); >+ >+ /* return the server directly if it is idle */ >+ if (atomic_read(&least->activeconns) == 0) >+ goto out; >+ >+ goto nextstage; >+ } >+ } >+ return NULL; >+ >+ /* >+ * Find the destination with the least load. >+ */ >+ nextstage: >+ for (e=e->next; e!=l; e=e->next) { >+ dest = list_entry(e, struct ip_vs_dest, n_list); >+ doh = ip_vs_nq_dest_overhead(dest); >+ >+ /* return the server directly if it is idle */ >+ if (atomic_read(&dest->activeconns) == 0) { >+ least = dest; >+ loh = doh; >+ goto out; >+ } >+ >+ if (loh * atomic_read(&dest->weight) > >+ doh * atomic_read(&least->weight)) { >+ least = dest; >+ loh = doh; >+ } >+ } >+ >+ out: >+ IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u " >+ "activeconns %d refcnt %d weight %d overhead %d\n", >+ NIPQUAD(least->addr), ntohs(least->port), >+ atomic_read(&least->activeconns), >+ atomic_read(&least->refcnt), >+ atomic_read(&least->weight), loh); >+ >+ return least; >+} >+ >+ >+static struct ip_vs_scheduler ip_vs_nq_scheduler = >+{ >+ .name = "nq", >+ .refcnt = ATOMIC_INIT(0), >+ .module = THIS_MODULE, >+ .init_service = ip_vs_nq_init_svc, >+ .done_service = ip_vs_nq_done_svc, >+ .update_service = ip_vs_nq_update_svc, >+ .schedule = ip_vs_nq_schedule, >+}; >+ >+ >+static int __init ip_vs_nq_init(void) >+{ >+ INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list); >+ return register_ip_vs_scheduler(&ip_vs_nq_scheduler); >+} >+ >+static void __exit ip_vs_nq_cleanup(void) >+{ >+ unregister_ip_vs_scheduler(&ip_vs_nq_scheduler); >+} >+ >+module_init(ip_vs_nq_init); >+module_exit(ip_vs_nq_cleanup); >+MODULE_LICENSE("GPL"); >diff -urN linux-2.4.21/net/ipv4/ipvs/ip_vs_sed.c linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_sed.c >--- linux-2.4.21/net/ipv4/ipvs/ip_vs_sed.c 1969-12-31 16:00:00.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_sed.c 2007-01-15 16:45:53.000000000 -0800 >@@ -0,0 +1,167 @@ >+/* >+ * IPVS: Shortest Expected Delay scheduling module >+ * >+ * Version: $Id: ip_vs_sed.c,v 1.1.2.1 2003/05/20 17:05:02 wensong Exp $ >+ * >+ * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> >+ * >+ * This program is free software; you can redistribute it and/or >+ * modify it under the terms of the GNU General Public License >+ * as published by the Free Software Foundation; either version >+ * 2 of the License, or (at your option) any later version. >+ * >+ * Changes: >+ * >+ */ >+ >+/* >+ * The SED algorithm attempts to minimize each job's expected delay until >+ * completion. The expected delay that the job will experience is >+ * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of >+ * jobs on the the ith server and Ui is the fixed service rate (weight) of >+ * the ith server. The SED algorithm adopts a greedy policy that each does >+ * what is in its own best interest, i.e. to join the queue which would >+ * minimize its expected delay of completion. >+ * >+ * See the following paper for more information: >+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing >+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88, >+ * pages 986-994, 1988. >+ * >+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me. >+ * >+ * The difference between SED and WLC is that SED includes the incoming >+ * job in the cost function (the increment of 1). SED may outperform >+ * WLC, while scheduling big jobs under larger heterogeneous systems >+ * (the server weight varies a lot). >+ * >+ */ >+ >+#include <linux/config.h> >+#include <linux/module.h> >+#include <linux/init.h> >+#include <linux/types.h> >+#include <linux/kernel.h> >+#include <linux/errno.h> >+ >+#include <net/ip_vs.h> >+ >+ >+static int >+ip_vs_sed_init_svc(struct ip_vs_service *svc) >+{ >+ return 0; >+} >+ >+ >+static int >+ip_vs_sed_done_svc(struct ip_vs_service *svc) >+{ >+ return 0; >+} >+ >+ >+static int >+ip_vs_sed_update_svc(struct ip_vs_service *svc) >+{ >+ return 0; >+} >+ >+ >+static inline unsigned int >+ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) >+{ >+ /* >+ * We only use the active connection number in the cost >+ * calculation here. >+ */ >+ return atomic_read(&dest->activeconns) + 1; >+} >+ >+ >+/* >+ * Weighted Least Connection scheduling >+ */ >+static struct ip_vs_dest * >+ip_vs_sed_schedule(struct ip_vs_service *svc, struct iphdr *iph) >+{ >+ register struct list_head *l, *e; >+ struct ip_vs_dest *dest, *least; >+ unsigned int loh, doh; >+ >+ IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n"); >+ >+ /* >+ * We calculate the load of each dest server as follows: >+ * (server expected overhead) / dest->weight >+ * >+ * Remember -- no floats in kernel mode!!! >+ * The comparison of h1*w2 > h2*w1 is equivalent to that of >+ * h1/w1 > h2/w2 >+ * if every weight is larger than zero. >+ * >+ * The server with weight=0 is quiesced and will not receive any >+ * new connections. >+ */ >+ >+ l = &svc->destinations; >+ for (e=l->next; e!=l; e=e->next) { >+ least = list_entry(e, struct ip_vs_dest, n_list); >+ if (atomic_read(&least->weight) > 0) { >+ loh = ip_vs_sed_dest_overhead(least); >+ goto nextstage; >+ } >+ } >+ return NULL; >+ >+ /* >+ * Find the destination with the least load. >+ */ >+ nextstage: >+ for (e=e->next; e!=l; e=e->next) { >+ dest = list_entry(e, struct ip_vs_dest, n_list); >+ doh = ip_vs_sed_dest_overhead(dest); >+ if (loh * atomic_read(&dest->weight) > >+ doh * atomic_read(&least->weight)) { >+ least = dest; >+ loh = doh; >+ } >+ } >+ >+ IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u " >+ "activeconns %d refcnt %d weight %d overhead %d\n", >+ NIPQUAD(least->addr), ntohs(least->port), >+ atomic_read(&least->activeconns), >+ atomic_read(&least->refcnt), >+ atomic_read(&least->weight), loh); >+ >+ return least; >+} >+ >+ >+static struct ip_vs_scheduler ip_vs_sed_scheduler = >+{ >+ .name = "sed", >+ .refcnt = ATOMIC_INIT(0), >+ .module = THIS_MODULE, >+ .init_service = ip_vs_sed_init_svc, >+ .done_service = ip_vs_sed_done_svc, >+ .update_service = ip_vs_sed_update_svc, >+ .schedule = ip_vs_sed_schedule, >+}; >+ >+ >+static int __init ip_vs_sed_init(void) >+{ >+ INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list); >+ return register_ip_vs_scheduler(&ip_vs_sed_scheduler); >+} >+ >+static void __exit ip_vs_sed_cleanup(void) >+{ >+ unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); >+} >+ >+module_init(ip_vs_sed_init); >+module_exit(ip_vs_sed_cleanup); >+MODULE_LICENSE("GPL"); >diff -urN linux-2.4.21/net/ipv4/ipvs/ip_vs_sync.c linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_sync.c >--- linux-2.4.21/net/ipv4/ipvs/ip_vs_sync.c 2007-01-17 03:17:30.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_sync.c 2007-01-15 16:45:54.000000000 -0800 >@@ -11,6 +11,11 @@ > * > * ip_vs_sync: sync connection info from master load balancer to backups > * through multicast >+ * >+ * Changes: >+ * Alexandre Cassen : Added master & backup support at a time. >+ * Alexandre Cassen : Added SyncID support for incoming sync >+ * messages filtering. > */ > > #define __KERNEL_SYSCALLS__ /* for waitpid */ >@@ -66,6 +71,7 @@ > struct ip_vs_seq out_seq; /* outgoing seq. struct */ > }; > >+#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ) > #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) > #define FULL_CONN_SIZE \ > (sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) >@@ -79,6 +85,7 @@ > 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > | Count Conns | Reserved | Size | >+ | Count Conns | Sync ID | Size | > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > | | > | IPVS Sync Connection (1) | >@@ -90,11 +97,16 @@ > | | > | IPVS Sync Connection (n) | > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ >+ >+ Count Conns : Number of IPVS sync Connection entries. >+ Sync ID : IPVS sync group we belong to. >+ Size : Size of packet. >+ > */ > #define SYNC_MESG_MAX_SIZE (24*50+4) > struct ip_vs_sync_mesg { > __u8 nr_conns; >- __u8 reserved; >+ __u8 syncid; > __u16 size; > > /* ip_vs_sync_conn entries start here */ >@@ -120,6 +132,18 @@ > static struct ip_vs_sync_buff *curr_sb = NULL; > static spinlock_t curr_sb_lock = SPIN_LOCK_UNLOCKED; > >+/* ipvs sync daemon state */ >+volatile int ip_vs_sync_state = IP_VS_STATE_NONE; >+volatile int ip_vs_master_syncid = 0; >+volatile int ip_vs_backup_syncid = 0; >+ >+/* multicast interface name */ >+char ip_vs_mcast_master_ifn[IP_VS_IFNAME_MAXLEN]; >+char ip_vs_mcast_backup_ifn[IP_VS_IFNAME_MAXLEN]; >+ >+/* multicast addr */ >+static struct sockaddr_in mcast_addr; >+ > static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) > { > spin_lock(&ip_vs_sync_lock); >@@ -157,6 +181,7 @@ > return NULL; > } > sb->mesg->nr_conns = 0; >+ sb->mesg->syncid = ip_vs_master_syncid; > sb->mesg->size = 4; > sb->head = (unsigned char *)sb->mesg + 4; > sb->end = (unsigned char *)sb->mesg + SYNC_MESG_MAX_SIZE; >@@ -266,6 +291,13 @@ > return; > } > >+ /* SyncID sanity check */ >+ if (ip_vs_backup_syncid != 255 && m->syncid != ip_vs_backup_syncid) { >+ IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", >+ m->syncid); >+ return; >+ } >+ > p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); > for (i=0; i<m->nr_conns; i++) { > s = (struct ip_vs_sync_conn *)p; >@@ -298,7 +330,7 @@ > p += SIMPLE_CONN_SIZE; > > atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold); >- ip_vs_set_state_timeout(cp, cp->state); >+ cp->timeout = IP_VS_SYNC_CONN_TIMEOUT; > ip_vs_conn_put(cp); > > if (p > buffer+buflen) { >@@ -308,17 +340,6 @@ > } > } > >- >-/* ipvs sync daemon state */ >-volatile int ip_vs_sync_state = IP_VS_STATE_NONE; >- >-/* multicast interface name */ >-char ip_vs_mcast_ifn[IP_VS_IFNAME_MAXLEN]; >- >-/* multicast addr */ >-static struct sockaddr_in mcast_addr; >- >- > /* > * Setup loopback of outgoing multicasts on a sending socket > */ >@@ -430,7 +451,7 @@ > return NULL; > } > >- if (set_mcast_if(sock->sk, ip_vs_mcast_ifn) < 0) { >+ if (set_mcast_if(sock->sk, ip_vs_mcast_master_ifn) < 0) { > IP_VS_ERR("Error setting outbound mcast interface\n"); > goto error; > } >@@ -438,7 +459,7 @@ > set_mcast_loop(sock->sk, 0); > set_mcast_ttl(sock->sk, 1); > >- if (bind_mcastif_addr(sock, ip_vs_mcast_ifn) < 0) { >+ if (bind_mcastif_addr(sock, ip_vs_mcast_master_ifn) < 0) { > IP_VS_ERR("Error binding address of the mcast interface\n"); > goto error; > } >@@ -484,7 +505,7 @@ > /* join the multicast group */ > if (join_mcast_group(sock->sk, > (struct in_addr*)&mcast_addr.sin_addr, >- ip_vs_mcast_ifn) < 0) { >+ ip_vs_mcast_backup_ifn) < 0) { > IP_VS_ERR("Error joining to the multicast group\n"); > goto error; > } >@@ -561,6 +582,8 @@ > static int errno; > > static DECLARE_WAIT_QUEUE_HEAD(sync_wait); >+static pid_t sync_master_pid = 0; >+static pid_t sync_backup_pid = 0; > static pid_t sync_pid = 0; > > static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait); >@@ -677,11 +700,13 @@ > oldmm = get_fs(); > set_fs(KERNEL_DS); > >- if (ip_vs_sync_state == IP_VS_STATE_MASTER) >- sprintf(current->comm, "ipvs syncmaster"); >- else if (ip_vs_sync_state == IP_VS_STATE_BACKUP) >- sprintf(current->comm, "ipvs syncbackup"); >- else IP_VS_BUG(); >+ if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) { >+ state = IP_VS_STATE_MASTER; >+ sprintf(current->comm, "ipvs_syncmaster"); >+ } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { >+ state = IP_VS_STATE_BACKUP; >+ sprintf(current->comm, "ipvs_syncbackup"); >+ } else IP_VS_BUG(); > > spin_lock_irq(¤t->sighand->siglock); > siginitsetinv(¤t->blocked, 0); >@@ -733,7 +758,7 @@ > } > > >-int start_sync_thread(int state, char *mcast_ifn) >+int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) > { > DECLARE_COMPLETION(startup); > pid_t pid; >@@ -746,8 +771,17 @@ > IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %d bytes\n", > sizeof(struct ip_vs_sync_conn)); > >- ip_vs_sync_state = state; >- strcpy(ip_vs_mcast_ifn, mcast_ifn); >+ ip_vs_sync_state |= state; >+ if (state == IP_VS_STATE_MASTER) { >+ strncpy(ip_vs_mcast_master_ifn, mcast_ifn, sizeof(ip_vs_mcast_master_ifn)); >+ ip_vs_mcast_master_ifn[sizeof(ip_vs_mcast_master_ifn) - 1] = 0; >+ ip_vs_master_syncid = syncid; >+ } else { >+ strncpy(ip_vs_mcast_backup_ifn, mcast_ifn, sizeof(ip_vs_mcast_backup_ifn)); >+ ip_vs_mcast_backup_ifn[sizeof(ip_vs_mcast_backup_ifn) - 1] = 0; >+ ip_vs_backup_syncid = syncid; >+ } >+ > > if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) > IP_VS_BUG(); >@@ -763,7 +797,7 @@ > } > > >-int stop_sync_thread(void) >+int stop_sync_thread(int state) > { > DECLARE_WAITQUEUE(wait, current); > >diff -urN linux-2.4.21/net/ipv4/ipvs/ip_vs_wlc.c linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_wlc.c >--- linux-2.4.21/net/ipv4/ipvs/ip_vs_wlc.c 2007-01-17 03:17:30.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/ip_vs_wlc.c 2007-01-15 16:45:53.000000000 -0800 >@@ -1,7 +1,7 @@ > /* > * IPVS: Weighted Least-Connection Scheduling module > * >- * Version: $Id: ip_vs_wlc.c,v 1.10 2002/03/25 12:44:35 wensong Exp $ >+ * Version: $Id: ip_vs_wlc.c,v 1.10.2.1 2003/04/11 14:02:35 wensong Exp $ > * > * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> > * Peter Kese <peter.kese@ijs.si> >diff -urN linux-2.4.21/net/ipv4/ipvs/Makefile linux-2.4.21-build-01-17/net/ipv4/ipvs/Makefile >--- linux-2.4.21/net/ipv4/ipvs/Makefile 2007-01-17 03:17:41.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/ipvs/Makefile 2007-01-15 16:45:53.000000000 -0800 >@@ -31,6 +31,8 @@ > obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o > obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o > obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o >+obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o >+obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o > > # IPVS application helpers > obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o >diff -urN linux-2.4.21/net/ipv4/netfilter/ip_fw_compat.c linux-2.4.21-build-01-17/net/ipv4/netfilter/ip_fw_compat.c >--- linux-2.4.21/net/ipv4/netfilter/ip_fw_compat.c 2007-01-17 03:17:31.000000000 -0800 >+++ linux-2.4.21-build-01-17/net/ipv4/netfilter/ip_fw_compat.c 2007-01-15 16:45:53.000000000 -0800 >@@ -47,9 +47,11 @@ > extern int __init masq_init(void); > extern void masq_cleanup(void); > >+#ifdef CONFIG_IP_VS > /* From ip_vs_core.c */ > extern unsigned int > check_for_ip_vs_out(struct sk_buff **skb_p, int (*okfn)(struct sk_buff *)); >+#endif > > /* They call these; we do what they want. */ > int register_firewall(int pf, struct firewall_ops *fw) >diff -urN linux-2.4.21/include/net/ip_vs.h linux-2.4.21-build-01-17/include/net/ip_vs.h >--- linux-2.4.21/include/net/ip_vs.h 2007-01-17 03:17:41.000000000 -0800 >+++ linux-2.4.21-build-01-17/include/net/ip_vs.h 2007-01-15 16:48:38.000000000 -0800 >@@ -8,7 +8,7 @@ > > #include <asm/types.h> /* For __uXX types */ > >-#define IP_VS_VERSION_CODE 0x010008 >+#define IP_VS_VERSION_CODE 0x01000A > #define NVERSION(version) \ > (version >> 16) & 0xFF, \ > (version >> 8) & 0xFF, \ >@@ -82,6 +82,7 @@ > #define IP_VS_CONN_F_IN_SEQ 0x0400 /* must do input seq adjust */ > #define IP_VS_CONN_F_SEQ_MASK 0x0600 /* in/out sequence mask */ > #define IP_VS_CONN_F_NO_CPORT 0x0800 /* no client port set yet */ >+#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ > > /* Move it to better place one day, for now keep it unique */ > #define NFC_IPVS_PROPERTY 0x10000 >@@ -97,6 +98,7 @@ > int state; /* sync daemon state */ > char mcast_ifn[IP_VS_IFNAME_MAXLEN]; > /* multicast interface name */ >+ int syncid; /* sync daemon id */ > > /* virtual service options */ > u_int16_t protocol; >@@ -213,8 +215,9 @@ > > /* The argument to IP_VS_SO_GET_DAEMON */ > struct ip_vs_daemon_user { >- int state; /* sync daemon state */ >- char mcast_ifn[IP_VS_IFNAME_MAXLEN]; /* multicast interface name */ >+ int state; /* sync daemon state */ >+ char mcast_master_ifn[IP_VS_IFNAME_MAXLEN]; /* mcast master interface name */ >+ char mcast_backup_ifn[IP_VS_IFNAME_MAXLEN]; /* mcast backup interface name */ > }; > > >@@ -317,6 +320,7 @@ > NET_IPV4_VS_EXPIRE_NODEST_CONN=23, > NET_IPV4_VS_SYNC_THRESHOLD=24, > NET_IPV4_VS_NAT_ICMP_SEND=25, >+ NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE=26, > NET_IPV4_VS_LAST > }; > >@@ -412,7 +416,7 @@ > > /* counter and timer */ > atomic_t refcnt; /* reference count */ >- struct timer_list conn_timer; /* Expiration timer */ >+ struct timer_list timer; /* Expiration timer */ > volatile unsigned long timeout; /* timeout */ > struct ip_vs_timeout_table *timeout_table; > >@@ -614,8 +618,6 @@ > extern void ip_vs_random_dropentry(void); > extern int ip_vs_conn_init(void); > extern void ip_vs_conn_cleanup(void); >-extern int ip_vs_set_state_timeout(struct ip_vs_conn *cp, int state); >- > > static inline void ip_vs_control_del(struct ip_vs_conn *cp) > { >@@ -704,7 +706,6 @@ > extern int sysctl_ip_vs_expire_nodest_conn; > extern int sysctl_ip_vs_sync_threshold; > extern int sysctl_ip_vs_nat_icmp_send; >-extern atomic_t ip_vs_dropentry; > extern struct ip_vs_stats ip_vs_stats; > > extern struct ip_vs_service *ip_vs_service_get(__u32 fwmark, >@@ -717,7 +718,6 @@ > > extern struct ip_vs_dest * > ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport); >-extern void update_defense_level(void); > extern void ip_vs_random_dropentry(void); > extern int ip_vs_control_init(void); > extern void ip_vs_control_cleanup(void); >@@ -728,9 +728,10 @@ > * (from ip_vs_sync.c) > */ > extern volatile int ip_vs_sync_state; >-extern char ip_vs_mcast_ifn[IP_VS_IFNAME_MAXLEN]; >-extern int start_sync_thread(int state, char *mcast_ifn); >-extern int stop_sync_thread(void); >+extern char ip_vs_mcast_master_ifn[IP_VS_IFNAME_MAXLEN]; >+extern char ip_vs_mcast_backup_ifn[IP_VS_IFNAME_MAXLEN]; >+extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); >+extern int stop_sync_thread(int state); > extern void ip_vs_sync_conn(struct ip_vs_conn *cp); > > >@@ -760,6 +761,17 @@ > > > /* >+ * Slow timer functions for IPVS >+ * (from ip_vs_timer.c) >+ */ >+extern void add_sltimer(struct timer_list * timer); >+extern int del_sltimer(struct timer_list * timer); >+extern void mod_sltimer(struct timer_list *timer, unsigned long expires); >+extern void ip_vs_sltimer_init(void); >+extern void ip_vs_sltimer_cleanup(void); >+ >+ >+/* > * ip_vs_fwd_tag returns the forwarding tag of the connection > */ > #define IP_VS_FWD_METHOD(cp) (cp->flags & IP_VS_CONN_F_FWD_MASK) >@@ -873,6 +885,7 @@ > "dest: %u.%u.%u.%u\n", > NIPQUAD(dest->addr)); > return NULL; >+ > } > __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); > IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n", >@@ -912,7 +925,8 @@ > else > checkp = &h->uh->check; > *checkp = ip_vs_check_diff(~oldip, newip, >- ip_vs_check_diff(oldport ^ 0xFFFF, newport, *checkp)); >+ ip_vs_check_diff(oldport ^ 0xFFFF, >+ newport, *checkp)); > if (!*checkp && protocol == IPPROTO_UDP) > *checkp = 0xFFFF; > } >@@ -926,7 +940,7 @@ > if (delta < 0) > delta = 0; > >- if (delta ||skb_cloned(skb)) { >+ if (delta || skb_cloned(skb)) { > if (pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC)) > return -ENOMEM; > >diff -urN linux-2.4.21/include/net/route.h linux-2.4.21-build-01-17/include/net/route.h >--- linux-2.4.21/include/net/route.h 2007-01-17 03:17:30.000000000 -0800 >+++ linux-2.4.21-build-01-17/include/net/route.h 2007-01-15 16:48:36.000000000 -0800 >@@ -119,6 +119,7 @@ > extern int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); > extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin); > extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); >+extern void ip_rt_update_pmtu(struct dst_entry *dst, unsigned mtu); > extern void ip_rt_send_redirect(struct sk_buff *skb); > > extern unsigned inet_addr_type(u32 addr);
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 214212
:
140487
| 145862