Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 1452875 Details for
Bug 1526306
ovs-vswitchd service hangs with Error too many open files
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
patch for the kernel data path
0001-openvswitch-send-upcalls-in-round-robin.patch (text/plain), 10.90 KB, created by
Matteo Croce
on 2018-06-19 10:31:15 UTC
(
hide
)
Description:
patch for the kernel data path
Filename:
MIME Type:
Creator:
Matteo Croce
Created:
2018-06-19 10:31:15 UTC
Size:
10.90 KB
patch
obsolete
>From 9cd99e24d5c23c91f524f8e6581f6b967acc38a6 Mon Sep 17 00:00:00 2001 >From: Matteo Croce <mcroce@redhat.com> >Date: Fri, 1 Jun 2018 16:37:12 +0200 >Subject: [PATCH] openvswitch: send upcalls in round robin > >Open vSwitch sends packet with no associated flows to userspace to >create a new flow and determine the actions to apply. >When a single port is generating a lot of upcalls, it can prevent other >ports to generating upcalls. vswitchd prevents this by creating many >netlink sockets per port, but this craetes problems when dealing with >huge port numbers. > >This patch queues all the upcall packets into a per port list, and run a >task which sends upcalls one per port. This ensures fairness among ports >even with a few netlink sockets. > >Signed-off-by: Matteo Croce <mcroce@redhat.com> >--- > net/openvswitch/datapath.c | 192 ++++++++++++++++++++++++++++--------- > net/openvswitch/datapath.h | 23 +++++ > net/openvswitch/vport.c | 15 +++ > net/openvswitch/vport.h | 7 ++ > 4 files changed, 193 insertions(+), 44 deletions(-) > >diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c >index 0f5ce77460d4..1ce8b874d556 100644 >--- a/net/openvswitch/datapath.c >+++ b/net/openvswitch/datapath.c >@@ -59,6 +59,8 @@ > #include "vport-internal_dev.h" > #include "vport-netdev.h" > >+#define UPCALL_QUEUE_LIMIT 16 >+ > unsigned int ovs_net_id __read_mostly; > > static struct genl_family dp_packet_genl_family; >@@ -225,6 +227,75 @@ void ovs_dp_detach_port(struct vport *p) > ovs_vport_del(p); > } > >+int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, >+ const struct sw_flow_key *key, >+ const struct dp_upcall_info *upcall_info, >+ uint32_t cutlen) >+{ >+ struct dp_stats_percpu *stats; >+ int ret = 0; >+ >+ if (upcall_info->portid == 0) { >+ ret = -ENOTCONN; >+ goto err; >+ } >+ >+ if (!skb_is_gso(skb)) >+ ret = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); >+ else >+ ret = queue_gso_packets(dp, skb, key, upcall_info, cutlen); >+ if (!ret) >+ return 0; >+ >+err: >+ stats = this_cpu_ptr(dp->stats_percpu); >+ >+ u64_stats_update_begin(&stats->syncp); >+ stats->n_lost++; >+ u64_stats_update_end(&stats->syncp); >+ >+ return ret; >+} >+ >+static int ovs_dp_deferred_upcall(struct upcall_packet *upcall_pkt) >+{ >+ struct dp_stats_percpu *stats; >+ struct vport *vport = OVS_CB(upcall_pkt->skb)->input_vport; >+ int ret = 0; >+ >+ if (upcall_pkt->upcall_info.portid == 0) { >+ ret = -ENOTCONN; >+ goto err; >+ } >+ >+ spin_lock_bh(&vport->upcall_lock); >+ if (vport->upcall_len >= UPCALL_QUEUE_LIMIT) { >+ spin_unlock_bh(&vport->upcall_lock); >+ pr_debug("upcall queue full, dropping packet\n"); >+ ret = -ENOMEM; >+ goto err; >+ } >+ list_add_tail(&upcall_pkt->list, &vport->upcall_list); >+ vport->upcall_len++; >+ spin_unlock_bh(&vport->upcall_lock); >+ >+ schedule_work(&upcall_pkt->dp->upcall_rr); >+ >+ return 0; >+ >+err: >+ kfree_skb(upcall_pkt->skb); >+ kfree(upcall_pkt); >+ >+ stats = this_cpu_ptr(vport->dp->stats_percpu); >+ >+ u64_stats_update_begin(&stats->syncp); >+ stats->n_lost++; >+ u64_stats_update_end(&stats->syncp); >+ >+ return ret; >+} >+ > /* Must be called with rcu_read_lock. */ > void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) > { >@@ -241,27 +312,32 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) > /* Look up flow. */ > flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); > if (unlikely(!flow)) { >- struct dp_upcall_info upcall; >- int error; >- >- memset(&upcall, 0, sizeof(upcall)); >- upcall.cmd = OVS_PACKET_CMD_MISS; >- upcall.portid = ovs_vport_find_upcall_portid(p, skb); >- upcall.mru = OVS_CB(skb)->mru; >- error = ovs_dp_upcall(dp, skb, key, &upcall, 0); >- if (unlikely(error)) >- kfree_skb(skb); >- else >- consume_skb(skb); >+ struct upcall_packet *upcall_pkt; >+ > stats_counter = &stats->n_missed; >- goto out; >- } > >- ovs_flow_stats_update(flow, key->tp.flags, skb); >- sf_acts = rcu_dereference(flow->sf_acts); >- ovs_execute_actions(dp, skb, sf_acts, key); >+ upcall_pkt = kzalloc(sizeof(*upcall_pkt), GFP_ATOMIC); >+ if (!upcall_pkt) { >+ kfree_skb(skb); >+ goto out; >+ } >+ >+ upcall_pkt->upcall_info.cmd = OVS_PACKET_CMD_MISS; >+ upcall_pkt->upcall_info.portid = >+ ovs_vport_find_upcall_portid(p, skb); >+ upcall_pkt->upcall_info.mru = OVS_CB(skb)->mru; >+ upcall_pkt->dp = dp; >+ upcall_pkt->skb = skb; >+ upcall_pkt->key = *key; > >- stats_counter = &stats->n_hit; >+ ovs_dp_deferred_upcall(upcall_pkt); >+ } else { >+ stats_counter = &stats->n_hit; >+ >+ ovs_flow_stats_update(flow, key->tp.flags, skb); >+ sf_acts = rcu_dereference(flow->sf_acts); >+ ovs_execute_actions(dp, skb, sf_acts, key); >+ } > > out: > /* Update datapath statistics. */ >@@ -271,42 +347,66 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) > u64_stats_update_end(&stats->syncp); > } > >-int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, >- const struct sw_flow_key *key, >- const struct dp_upcall_info *upcall_info, >- uint32_t cutlen) >+static int send_upcall(struct upcall_packet *upcall) > { >- struct dp_stats_percpu *stats; >- int err; >- >- if (upcall_info->portid == 0) { >- err = -ENOTCONN; >- goto err; >- } >- >- if (!skb_is_gso(skb)) >- err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); >+ if (!skb_is_gso(upcall->skb)) >+ return queue_userspace_packet(upcall->dp, upcall->skb, >+ &upcall->key, >+ &upcall->upcall_info, >+ upcall->cutlen); > else >- err = queue_gso_packets(dp, skb, key, upcall_info, cutlen); >- if (err) >- goto err; >- >- return 0; >+ return queue_gso_packets(upcall->dp, upcall->skb, &upcall->key, >+ &upcall->upcall_info, upcall->cutlen); >+} > >-err: >- stats = this_cpu_ptr(dp->stats_percpu); >+static void upcall_rr(struct work_struct *work) >+{ >+ struct datapath *dp = container_of(work, struct datapath, upcall_rr); >+ struct vport *vport; >+ int i, sent = 0; > >- u64_stats_update_begin(&stats->syncp); >- stats->n_lost++; >- u64_stats_update_end(&stats->syncp); >+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { >+ rcu_read_lock(); >+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { >+ struct upcall_packet *upcall; >+ >+ spin_lock_bh(&vport->upcall_lock); >+ upcall = list_first_entry_or_null(&vport->upcall_list, >+ struct upcall_packet, >+ list); >+ if (!upcall) { >+ spin_unlock_bh(&vport->upcall_lock); >+ continue; >+ } >+ list_del(&upcall->list); >+ vport->upcall_len--; >+ spin_unlock_bh(&vport->upcall_lock); >+ >+ if (unlikely(send_upcall(upcall))) { >+ struct dp_stats_percpu *stats = >+ this_cpu_ptr(upcall->dp->stats_percpu); >+ >+ u64_stats_update_begin(&stats->syncp); >+ stats->n_lost++; >+ u64_stats_update_end(&stats->syncp); >+ kfree_skb(upcall->skb); >+ } else { >+ consume_skb(upcall->skb); >+ } > >- return err; >+ kfree(upcall); >+ sent = 1; >+ } >+ rcu_read_unlock(); >+ } >+ if (sent) >+ schedule_work(&dp->upcall_rr); > } > > static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, > const struct sw_flow_key *key, > const struct dp_upcall_info *upcall_info, >- uint32_t cutlen) >+ u32 cutlen) > { > unsigned int gso_type = skb_shinfo(skb)->gso_type; > struct sw_flow_key later_key; >@@ -1628,6 +1728,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) > info->snd_seq, 0, OVS_DP_CMD_NEW); > BUG_ON(err < 0); > >+ INIT_WORK(&dp->upcall_rr, upcall_rr); >+ > ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); > list_add_tail_rcu(&dp->list_node, &ovs_net->dps); > >@@ -1694,6 +1796,8 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) > if (IS_ERR(dp)) > goto err_unlock_free; > >+ cancel_work_sync(&dp->upcall_rr); >+ > err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, > info->snd_seq, 0, OVS_DP_CMD_DEL); > BUG_ON(err < 0); >diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h >index c9eb267c6f7e..aca32a23ee78 100644 >--- a/net/openvswitch/datapath.h >+++ b/net/openvswitch/datapath.h >@@ -24,6 +24,7 @@ > #include <linux/mutex.h> > #include <linux/netdevice.h> > #include <linux/skbuff.h> >+#include <linux/workqueue.h> > #include <linux/u64_stats_sync.h> > #include <net/ip_tunnels.h> > >@@ -70,6 +71,7 @@ struct dp_stats_percpu { > * @net: Reference to net namespace. > * @max_headroom: the maximum headroom of all vports in this datapath; it will > * be used by all the internal vports in this dp. >+ * @upcall_rr: task which sends deferred upcalls to userspace. > * > * Context: See the comment on locking at the top of datapath.c for additional > * locking information. >@@ -96,6 +98,9 @@ struct datapath { > > /* Switch meters. */ > struct hlist_head *meters; >+ >+ /* upcalls round robins sender */ >+ struct work_struct upcall_rr; > }; > > /** >@@ -136,6 +141,24 @@ struct dp_upcall_info { > u16 mru; > }; > >+/** >+ * struct upcall_packet - represent an upcall to be sent >+ * @list: list within vport. >+ * @dp: Datapath the upcall belongs to. >+ * @skb: The sk_buff which generated the upcall. >+ * @key: Flow key. >+ * @upcall_info: struct dp_upcall with metadata to send to userspace. >+ * @cutlen: needed by queue_userspace_packet(). >+ */ >+struct upcall_packet { >+ struct list_head list; >+ struct datapath *dp; >+ struct sk_buff *skb; >+ struct sw_flow_key key; >+ struct dp_upcall_info upcall_info; >+ u32 cutlen; >+}; >+ > /** > * struct ovs_net - Per net-namespace data for ovs. > * @dps: List of datapaths to enable dumping them all out. >diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c >index 19f6765566e7..fc240312295b 100644 >--- a/net/openvswitch/vport.c >+++ b/net/openvswitch/vport.c >@@ -148,6 +148,8 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, > vport->port_no = parms->port_no; > vport->ops = ops; > INIT_HLIST_NODE(&vport->dp_hash_node); >+ INIT_LIST_HEAD(&vport->upcall_list); >+ spin_lock_init(&vport->upcall_lock); > > if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) { > kfree(vport); >@@ -170,6 +172,19 @@ EXPORT_SYMBOL_GPL(ovs_vport_alloc); > */ > void ovs_vport_free(struct vport *vport) > { >+ struct list_head *q; >+ >+ spin_lock_bh(&vport->upcall_lock); >+ list_for_each(q, &vport->upcall_list) { >+ struct upcall_packet *upcall; >+ >+ upcall = list_entry(q, struct upcall_packet, list); >+ kfree_skb(upcall->skb); >+ kfree(upcall); >+ } >+ >+ spin_unlock_bh(&vport->upcall_lock); >+ > /* vport is freed from RCU callback or error path, Therefore > * it is safe to use raw dereference. > */ >diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h >index cda66c26ad08..733239322b3c 100644 >--- a/net/openvswitch/vport.h >+++ b/net/openvswitch/vport.h >@@ -79,6 +79,9 @@ struct vport_portids { > * @ops: Class structure. > * @detach_list: list used for detaching vport in net-exit call. > * @rcu: RCU callback head for deferred destruction. >+ * @upcall_list: list of queued upcalls. >+ * @upcall_lock: lock for the upcall list. >+ * @upcall_len: number of the queued upcalls. > */ > struct vport { > struct net_device *dev; >@@ -92,6 +95,10 @@ struct vport { > > struct list_head detach_list; > struct rcu_head rcu; >+ >+ struct list_head upcall_list; >+ spinlock_t upcall_lock; /* protects the upcall list */ >+ int upcall_len; > }; > > /** >-- >2.17.1 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 1526306
:
1383546
|
1452875
|
1454715
|
1477839
|
1477840
|
1483331