Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 1477840 Details for
Bug 1526306
ovs-vswitchd service hangs with Error too many open files
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
beta patch for the kernel data path
openvswitch-queue-upcalls-to-userspace-in-per-por.patch (text/plain), 13.28 KB, created by
Matteo Croce
on 2018-08-22 10:43:59 UTC
(
hide
)
Description:
beta patch for the kernel data path
Filename:
MIME Type:
Creator:
Matteo Croce
Created:
2018-08-22 10:43:59 UTC
Size:
13.28 KB
patch
obsolete
>diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h >index dbe0cbe4f1b7..d8d811ec0b2b 100644 >--- a/include/uapi/linux/openvswitch.h >+++ b/include/uapi/linux/openvswitch.h >@@ -86,6 +86,7 @@ enum ovs_datapath_attr { > OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ > OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ > OVS_DP_ATTR_PAD, >+ OVS_DP_ATTR_RR_ALGO, /* round-robin algorithm */ > __OVS_DP_ATTR_MAX > }; > >diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c >index 0f5ce77460d4..edad75b0d31f 100644 >--- a/net/openvswitch/datapath.c >+++ b/net/openvswitch/datapath.c >@@ -59,6 +59,10 @@ > #include "vport-internal_dev.h" > #include "vport-netdev.h" > >+#define UPCALL_QUEUE_TIMEOUT msecs_to_jiffies(10) >+#define UPCALL_QUEUE_MAX_DELAY msecs_to_jiffies(10) >+#define UPCALL_QUEUE_MAX_LEN 512 >+ > unsigned int ovs_net_id __read_mostly; > > static struct genl_family dp_packet_genl_family; >@@ -79,6 +83,8 @@ static const struct genl_multicast_group ovs_dp_vport_multicast_group = { > .name = OVS_VPORT_MCGROUP, > }; > >+static struct kmem_cache *upcalls_slab; >+ > /* Check if need to build a reply message. > * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ > static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, >@@ -225,45 +231,223 @@ void ovs_dp_detach_port(struct vport *p) > ovs_vport_del(p); > } > >+static void ovs_flush_queue(struct datapath *dp, int idx) >+{ >+ struct dp_upcall_info *u, *n; >+ >+ list_for_each_entry_safe(u, n, &dp->upcalls.list[idx], list) { >+ if (unlikely(ovs_dp_upcall(dp, u->skb, &u->key, u, 0))) >+ kfree_skb(u->skb); >+ else >+ consume_skb(u->skb); >+ kmem_cache_free(upcalls_slab, u); >+ } >+ dp->upcalls.len[idx] = 0; >+ INIT_LIST_HEAD(&dp->upcalls.list[idx]); >+} >+ >+static void ovs_dp_upcall_dequeue(struct work_struct *work) >+{ >+ struct datapath *dp = container_of(work, struct datapath, >+ upcalls.work.work); >+ >+ spin_lock_bh(&dp->upcalls.lock); >+ ovs_flush_queue(dp, atomic_read(&dp->upcalls.listidx)); >+ atomic_xor(1, &dp->upcalls.listidx); >+ spin_unlock_bh(&dp->upcalls.lock); >+} >+ >+/* Calculate the delay of the deferred work which sends the upcalls. If it ran >+ * more than UPCALL_QUEUE_TIMEOUT ago, schedule the work immediately. Otherwise >+ * return a time between 0 and UPCALL_QUEUE_MAX_DELAY, depending linearly on the >+ * queue utilisation. >+ */ >+static unsigned long ovs_dp_upcall_delay(int queue_len, unsigned long last_run) >+{ >+ if (jiffies - last_run >= UPCALL_QUEUE_TIMEOUT) >+ return 0; >+ >+ return UPCALL_QUEUE_MAX_DELAY - >+ UPCALL_QUEUE_MAX_DELAY * queue_len / UPCALL_QUEUE_MAX_LEN; >+} >+ >+/* Compare upcall depending on the selected algorithm */ >+static u8 upcall_bucket_calc(struct datapath *dp, struct dp_upcall_info *upcall) >+{ >+ switch (dp->upcalls.rr_algo) { >+ case RR_VPORT: >+ return upcall->port_no; >+ default: >+ return 0; >+ } >+} >+ >+static int ovs_dp_upcall_queue_roundrobin(struct datapath *dp, >+ struct dp_upcall_info *upcall) >+{ >+ struct dp_upcall_info *here = NULL, *pos; >+ int b1 = upcall_bucket_calc(dp, upcall); >+ struct list_head *head; >+ bool find_next = true; >+ int err = 0; >+ u8 count; >+ int idx; >+ >+ spin_lock_bh(&dp->upcalls.lock); >+ >+ idx = atomic_read(&dp->upcalls.listidx); >+ head = &dp->upcalls.list[idx]; >+ >+ if (dp->upcalls.len[idx] >= UPCALL_QUEUE_MAX_LEN) { >+ err = -ENOSPC; >+ goto out; >+ } >+ >+ /* rr_algo changed between ovs_dp_process_packet() and here. */ >+ if (dp->upcalls.rr_algo == RR_NONE) { >+ list_add_tail(&upcall->list, head); >+ ovs_flush_queue(dp, idx); >+ goto out; >+ } >+ >+ /* Insert upcalls in the list in a per-port round-robin fashion, look >+ * for insertion point: >+ * - to avoid out-of-order per-port upcalls, we can insert only after >+ * the last occurrence of upcalls for the same port >+ * - insert upcall only after we reach a count of occurrences for a >+ * given port greater than the one we're inserting this upcall for >+ */ >+ list_for_each_entry(pos, head, list) { >+ int b2 = upcall_bucket_calc(dp, pos); >+ >+ /* Count per-port upcalls. */ >+ if (UPCALL_QUEUE_MAX_LEN > U8_MAX && >+ dp->upcalls.count[b2] == U8_MAX) { >+ err = -ENOSPC; >+ goto out_clear; >+ } >+ dp->upcalls.count[b2]++; >+ >+ if (b2 == b1) { >+ /* Another upcall for the same port: move insertion >+ * point here, keep looking for insertion condition to >+ * be still met further on. >+ */ >+ find_next = true; >+ here = pos; >+ continue; >+ } >+ >+ count = dp->upcalls.count[b1]; >+ if (find_next && dp->upcalls.count[b2] >= count) { >+ /* Insertion condition met: no need to look further, >+ * unless another upcall for the same port occurs later. >+ */ >+ find_next = false; >+ here = pos; >+ } >+ } >+ >+ if (here) >+ list_add(&upcall->list, &here->list); >+ else >+ list_add_tail(&upcall->list, head); >+ >+ dp->upcalls.len[idx]++; >+ >+out_clear: >+ /* Clear the per-port counters we used, so that we don't need to zero >+ * out the counters array on every insertion. >+ */ >+ list_for_each_entry_reverse(pos, head, list) >+ dp->upcalls.count[pos->port_no] = 0; >+ >+out: >+ spin_unlock_bh(&dp->upcalls.lock); >+ >+ if (!err) >+ mod_delayed_work(system_wq, &dp->upcalls.work, >+ ovs_dp_upcall_delay(dp->upcalls.len[idx], >+ dp->upcalls.last_run)); >+ return err; >+} >+ >+static void ovs_change_dp_rr(struct datapath *dp, u8 algo) >+{ >+ if (algo >= _RR_MAX) { >+ pr_warn("%s: unsupported round-robin algorithm: %d\n", >+ ovs_dp_name(dp), algo); >+ return; >+ } >+ >+ if (dp->upcalls.rr_algo == algo) >+ return; >+ >+ /* Need to flush the queue before changing algorithm */ >+ cancel_delayed_work_sync(&dp->upcalls.work); >+ spin_lock_bh(&dp->upcalls.lock); >+ ovs_flush_queue(dp, 0); >+ ovs_flush_queue(dp, 1); >+ dp->upcalls.rr_algo = algo; >+ spin_unlock_bh(&dp->upcalls.lock); >+} >+ > /* Must be called with rcu_read_lock. */ > void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) > { > const struct vport *p = OVS_CB(skb)->input_vport; > struct datapath *dp = p->dp; >- struct sw_flow *flow; >+ struct dp_stats_percpu *stats = this_cpu_ptr(dp->stats_percpu); >+ u64 *stats_counter = &stats->n_hit; > struct sw_flow_actions *sf_acts; >- struct dp_stats_percpu *stats; >- u64 *stats_counter; >+ struct sw_flow *flow; > u32 n_mask_hit; >- >- stats = this_cpu_ptr(dp->stats_percpu); >+ int error; > > /* Look up flow. */ > flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); > if (unlikely(!flow)) { >- struct dp_upcall_info upcall; >- int error; >- >- memset(&upcall, 0, sizeof(upcall)); >- upcall.cmd = OVS_PACKET_CMD_MISS; >- upcall.portid = ovs_vport_find_upcall_portid(p, skb); >- upcall.mru = OVS_CB(skb)->mru; >- error = ovs_dp_upcall(dp, skb, key, &upcall, 0); >- if (unlikely(error)) >- kfree_skb(skb); >- else >- consume_skb(skb); > stats_counter = &stats->n_missed; >- goto out; >+ if (dp->upcalls.rr_algo == RR_NONE) { >+ struct dp_upcall_info upcall = { >+ .cmd = OVS_PACKET_CMD_MISS, >+ .portid = ovs_vport_find_upcall_portid(p, skb), >+ .mru = OVS_CB(skb)->mru, >+ }; >+ >+ error = ovs_dp_upcall(dp, skb, key, &upcall, 0); >+ if (unlikely(error)) >+ kfree_skb(skb); >+ else >+ consume_skb(skb); >+ } else { >+ struct dp_upcall_info *upcall; >+ >+ upcall = kmem_cache_zalloc(upcalls_slab, GFP_ATOMIC); >+ if (upcall) { >+ upcall->cmd = OVS_PACKET_CMD_MISS; >+ upcall->portid = >+ ovs_vport_find_upcall_portid(p, skb); >+ upcall->port_no = p->port_no; >+ upcall->mru = OVS_CB(skb)->mru; >+ upcall->skb = skb; >+ upcall->key = *key; >+ error = ovs_dp_upcall_queue_roundrobin(dp, >+ upcall); >+ if (unlikely(error)) { >+ kfree_skb(skb); >+ kmem_cache_free(upcalls_slab, upcall); >+ } >+ } else { >+ kfree_skb(skb); >+ } >+ } >+ } else { >+ ovs_flow_stats_update(flow, key->tp.flags, skb); >+ sf_acts = rcu_dereference(flow->sf_acts); >+ ovs_execute_actions(dp, skb, sf_acts, key); > } > >- ovs_flow_stats_update(flow, key->tp.flags, skb); >- sf_acts = rcu_dereference(flow->sf_acts); >- ovs_execute_actions(dp, skb, sf_acts, key); >- >- stats_counter = &stats->n_hit; >- >-out: > /* Update datapath statistics. */ > u64_stats_update_begin(&stats->syncp); > (*stats_counter)++; >@@ -288,10 +472,8 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, > err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); > else > err = queue_gso_packets(dp, skb, key, upcall_info, cutlen); >- if (err) >- goto err; >- >- return 0; >+ if (!err) >+ return 0; > > err: > stats = this_cpu_ptr(dp->stats_percpu); >@@ -1540,6 +1722,8 @@ static void ovs_dp_change(struct datapath *dp, struct nlattr *a[]) > { > if (a[OVS_DP_ATTR_USER_FEATURES]) > dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); >+ if (a[OVS_DP_ATTR_RR_ALGO]) >+ ovs_change_dp_rr(dp, nla_get_u8(a[OVS_DP_ATTR_RR_ALGO])); > } > > static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) >@@ -1589,6 +1773,11 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) > for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) > INIT_HLIST_HEAD(&dp->ports[i]); > >+ INIT_LIST_HEAD(&dp->upcalls.list[0]); >+ INIT_LIST_HEAD(&dp->upcalls.list[1]); >+ spin_lock_init(&dp->upcalls.lock); >+ INIT_DELAYED_WORK(&dp->upcalls.work, ovs_dp_upcall_dequeue); >+ > err = ovs_meters_init(dp); > if (err) > goto err_destroy_ports_array; >@@ -1658,6 +1847,8 @@ static void __dp_destroy(struct datapath *dp) > { > int i; > >+ cancel_delayed_work_sync(&dp->upcalls.work); >+ > for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { > struct vport *vport; > struct hlist_node *n; >@@ -1800,6 +1991,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { > [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, > [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, > [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, >+ [OVS_DP_ATTR_RR_ALGO] = { .type = NLA_U8 }, > }; > > static const struct genl_ops dp_datapath_genl_ops[] = { >@@ -2397,9 +2589,17 @@ static int __init dp_init(void) > > pr_info("Open vSwitch switching datapath\n"); > >+ upcalls_slab = kmem_cache_create("ovs_upcalls_slab", >+ sizeof(struct dp_upcall_info), >+ 0, 0, NULL); >+ if (!upcalls_slab) { >+ err = -ENOMEM; >+ goto error_kmem_cache; >+ } >+ > err = action_fifos_init(); > if (err) >- goto error; >+ goto error_fifo; > > err = ovs_internal_dev_rtnl_link_register(); > if (err) >@@ -2445,7 +2645,9 @@ static int __init dp_init(void) > ovs_internal_dev_rtnl_link_unregister(); > error_action_fifos_exit: > action_fifos_exit(); >-error: >+error_fifo: >+ kmem_cache_destroy(upcalls_slab); >+error_kmem_cache: > return err; > } > >@@ -2460,6 +2662,7 @@ static void dp_cleanup(void) > ovs_flow_exit(); > ovs_internal_dev_rtnl_link_unregister(); > action_fifos_exit(); >+ kmem_cache_destroy(upcalls_slab); > } > > module_init(dp_init); >diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h >index c9eb267c6f7e..49194d2b31d3 100644 >--- a/net/openvswitch/datapath.h >+++ b/net/openvswitch/datapath.h >@@ -24,6 +24,7 @@ > #include <linux/mutex.h> > #include <linux/netdevice.h> > #include <linux/skbuff.h> >+#include <linux/workqueue.h> > #include <linux/u64_stats_sync.h> > #include <net/ip_tunnels.h> > >@@ -59,6 +60,12 @@ struct dp_stats_percpu { > struct u64_stats_sync syncp; > }; > >+enum rr_algo { >+ RR_NONE, /* No queue, upcalls are delivered immediately. */ >+ RR_VPORT, /* Upcalls are delivered one per port. */ >+ _RR_MAX >+}; >+ > /** > * struct datapath - datapath for flow-based packet switching > * @rcu: RCU callback head for deferred destruction. >@@ -70,6 +77,13 @@ struct dp_stats_percpu { > * @net: Reference to net namespace. > * @max_headroom: the maximum headroom of all vports in this datapath; it will > * be used by all the internal vports in this dp. >+ * @upcalls.work: sends queued upcalls to userspace. >+ * @upcalls.list: list of queued upcalls. >+ * @upcalls.len: elements in upcall_list. >+ * @upcalls.lock: lock for the upcall list. >+ * @upcalls.count: array used to sort the upcalls delivered to userspace. >+ * @upcalls.last_run: timestamp of last work run. >+ * @upcalls.rr_algo: algorithm used by the round-robin scheduler. > * > * Context: See the comment on locking at the top of datapath.c for additional > * locking information. >@@ -96,6 +110,18 @@ struct datapath { > > /* Switch meters. */ > struct hlist_head *meters; >+ >+ /* Upcalls queue handling. */ >+ struct { >+ struct delayed_work work; >+ struct list_head list[2]; >+ int len[2]; >+ atomic_t listidx; >+ spinlock_t lock; /* Protects len and upcall list. */ >+ u8 count[DP_MAX_PORTS]; >+ unsigned long last_run; >+ enum rr_algo rr_algo; >+ } upcalls; > }; > > /** >@@ -116,7 +142,7 @@ struct ovs_skb_cb { > #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) > > /** >- * struct dp_upcall - metadata to include with a packet to send to userspace >+ * struct dp_upcall_info - Upcall for userspace, including metadata to send > * @cmd: One of %OVS_PACKET_CMD_*. > * @userdata: If nonnull, its variable-length value is passed to userspace as > * %OVS_PACKET_ATTR_USERDATA. >@@ -125,6 +151,10 @@ struct ovs_skb_cb { > * counter. > * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY. > * @mru: If not zero, Maximum received IP fragment size. >+ * @list: list within vport for upcall queue handling. >+ * @skb: the socket buffer that generated the upcall. >+ * @key: flow key. >+ * @port_no: port number within the datapath. > */ > struct dp_upcall_info { > struct ip_tunnel_info *egress_tun_info; >@@ -134,6 +164,10 @@ struct dp_upcall_info { > u32 portid; > u8 cmd; > u16 mru; >+ struct list_head list; >+ struct sk_buff *skb; >+ struct sw_flow_key key; >+ u16 port_no; > }; > > /**
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 1526306
:
1383546
|
1452875
|
1454715
|
1477839
| 1477840 |
1483331