[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

More traffic control fixes



Hi Dave,

here's another bundle of traffic control fixes for 2.4.3-pre3. The
changes to CBQ and TBF have been okay'ed by Alexey. Detailed list
below.

Some of the changes affect the behaviour of user space in that some
tc commands that appeared to work now fail (and, in some cases, vice
versa). All these cases were nonsensical configurations, and some of
them could even cause an oops. Since some of that code is already
pointy hat stuff, I didn't bother to add bug emulation too ;-)

Note: you may have already seen the dsmark_enqueue fix. I sent you
that patch Feb 11, but you never acknowledged it, and later patches
are now in Linus' tree, so don't get confused ;-) All other patches
are new.

Cheers, Werner

---------------------------------- cut here -----------------------------------

cls_tcindex:
 - fix inconsistent use of the classid field as a null value for perfect
   hashing (now uses class and checks for 0 where needed)
 - added range check in tcindex_get (fixes potential oops) *
 - disallow hash changes when using imperfect hash (yielded incorrect
   hash table) *
 - pick smallest possible default hash (i.e. make better use of
   mask-then-shift instead of incorrectly assuming shift-then-mask)
 - only allow handles compatible with maximum hash size (see also comment
   in patch) *
 - didn't release old policers when setting different one

 * These changes may causes scripts to fail, which used to appear to
   work, but which were either broken, or depended on broken behaviour
   of cls_tcindex.

sch_cbq:
 - de-referencing pointer after kfree'ing structure

sch_dsmark:
 - didn't implement "leaf" function, so child qdiscs couldn't be
   explicitly accessed by parent
 - enqueue function always returned zero, confusing congestion
   control and possibly causing other, esoteric problems

sch_tbf:
 - maximum packet size no longer depends on psched_mtu, but uses only
   constraints imposed by bucket parameters. This eliminates an array
   overflow problem. *

 * This change may cause scripts to fail, which used to appear to
   work, but which were broken. It's also a little more permissive
   in other, typically useless cases.


--- linux.orig/net/sched/cls_tcindex.c	Wed Mar  7 14:26:36 2001
+++ linux/net/sched/cls_tcindex.c	Wed Mar  7 14:38:02 2001
@@ -76,7 +76,7 @@
 	struct tcindex_filter *f;
 
 	if (p->perfect)
-		return p->perfect[key].res.classid ? p->perfect+key : NULL;
+		return p->perfect[key].res.class ? p->perfect+key : NULL;
 	if (!p->h)
 		return NULL;
 	for (f = p->h[key % p->hash]; f; f = f->next) {
@@ -122,8 +122,14 @@
 
 static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
 {
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r;
+
 	DPRINTK("tcindex_get(tp %p,handle 0x%08x)\n",tp,handle);
-	return (unsigned long) lookup(PRIV(tp),handle);
+	if (p->perfect && handle >= p->alloc_hash)
+		return 0;
+	r = lookup(PRIV(tp),handle);
+	return r && r->res.class ? (unsigned long) r : 0;
 }
 
 
@@ -164,7 +170,7 @@
 
 	DPRINTK("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n",tp,arg,p,f);
 	if (p->perfect) {
-		if (!r->res.classid)
+		if (!r->res.class)
 			return -ENOENT;
 	} else {
 		int i;
@@ -212,7 +218,7 @@
 	struct tcindex_filter *f;
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
 	struct tcindex_filter **walk;
-	int hash;
+	int hash,shift;
 	__u16 mask;
 
 	DPRINTK("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
@@ -237,17 +243,22 @@
 			return -EINVAL;
 		mask = *(__u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK-1]);
 	}
-	if (p->perfect && hash <= mask)
+	if (!tb[TCA_TCINDEX_SHIFT-1])
+		shift = p->shift;
+	else {
+		if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(__u16))
+			return -EINVAL;
+		shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
+	}
+	if (p->perfect && hash <= (mask >> shift))
+		return -EBUSY;
+	if (p->perfect && hash > p->alloc_hash)
 		return -EBUSY;
-	if ((p->perfect || p->h) && hash > p->alloc_hash)
+	if (p->h && hash != p->alloc_hash)
 		return -EBUSY;
 	p->hash = hash;
 	p->mask = mask;
-	if (tb[TCA_TCINDEX_SHIFT-1]) {
-		if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(__u16))
-			return -EINVAL;
-		p->shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
-	}
+	p->shift = shift;
 	if (tb[TCA_TCINDEX_FALL_THROUGH-1]) {
 		if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH-1]) < sizeof(int))
 			return -EINVAL;
@@ -258,9 +269,9 @@
 	    tb[TCA_TCINDEX_POLICE-1]);
 	if (!tb[TCA_TCINDEX_CLASSID-1] && !tb[TCA_TCINDEX_POLICE-1])
 		return 0;
-	if (!p->hash) {
-		if (p->mask < PERFECT_HASH_THRESHOLD) {
-			p->hash = p->mask+1;
+	if (!hash) {
+		if ((mask >> shift) < PERFECT_HASH_THRESHOLD) {
+			p->hash = (mask >> shift)+1;
 		} else {
 			p->hash = DEFAULT_HASH_SIZE;
 		}
@@ -268,7 +279,7 @@
 	if (!p->perfect && !p->h) {
 		p->alloc_hash = p->hash;
 		DPRINTK("hash %d mask %d\n",p->hash,p->mask);
-		if (p->hash > p->mask) {
+		if (p->hash > (mask >> shift)) {
 			p->perfect = kmalloc(p->hash*
 			    sizeof(struct tcindex_filter_result),GFP_KERNEL);
 			if (!p->perfect)
@@ -283,7 +294,15 @@
 			memset(p->h, 0, p->hash*sizeof(struct tcindex_filter *));
 		}
 	}
-	if (handle > p->mask)
+	/*
+	 * Note: this could be as restrictive as
+	 * if (handle & ~(mask >> shift))
+	 * but then, we'd fail handles that may become valid after some
+	 * future mask change. While this is extremely unlikely to ever
+	 * matter, the check below is safer (and also more
+	 * backwards-compatible).
+	 */
+	if (p->perfect && handle >= p->alloc_hash)
 		return -EINVAL;
 	if (p->perfect) {
 		r = p->perfect+handle;
@@ -308,17 +327,16 @@
 		}
         }
 #ifdef CONFIG_NET_CLS_POLICE
-	if (!tb[TCA_TCINDEX_POLICE-1]) {
-		r->police = NULL;
-	} else {
-		struct tcf_police *police =
-		    tcf_police_locate(tb[TCA_TCINDEX_POLICE-1],NULL);
+	{
+		struct tcf_police *police;
 
-                tcf_tree_lock(tp);
+		police = tb[TCA_TCINDEX_POLICE-1] ?
+		    tcf_police_locate(tb[TCA_TCINDEX_POLICE-1],NULL) : NULL;
+		tcf_tree_lock(tp);
 		police = xchg(&r->police,police);
-                tcf_tree_unlock(tp);
+		tcf_tree_unlock(tp);
 		tcf_police_release(police);
-        }
+	}
 #endif
 	if (r != &new_filter_result)
 		return 0;
@@ -345,7 +363,7 @@
 	DPRINTK("tcindex_walk(tp %p,walker %p),p %p\n",tp,walker,p);
 	if (p->perfect) {
 		for (i = 0; i < p->hash; i++) {
-			if (!p->perfect[i].res.classid)
+			if (!p->perfect[i].res.class)
 				continue;
 			if (walker->count >= walker->skip) {
 				if (walker->fn(tp,
--- linux.orig/net/sched/sch_cbq.c	Wed Mar  7 14:26:36 2001
+++ linux/net/sched/sch_cbq.c	Wed Mar  7 21:38:38 2001
@@ -1740,9 +1740,13 @@
 	}
 
 	for (h = 0; h < 16; h++) {
-		for (cl = q->classes[h]; cl; cl = cl->next)
+		struct cbq_class *next;
+
+		for (cl = q->classes[h]; cl; cl = next) {
+			next = cl->next;
 			if (cl != &q->link)
 				cbq_destroy_class(cl);
+		}
 	}
 
 	qdisc_put_rtab(q->link.R_tab);
--- linux.orig/net/sched/sch_dsmark.c	Fri Feb  9 20:34:13 2001
+++ linux/net/sched/sch_dsmark.c	Wed Mar  7 21:49:33 2001
@@ -84,7 +84,9 @@
 
 static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
 {
-	return NULL;
+	struct dsmark_qdisc_data *p = PRIV(sch);
+
+	return p->q;
 }
 
 
@@ -187,7 +189,7 @@
 	struct dsmark_qdisc_data *p = PRIV(sch);
 	struct tcf_result res;
 	int result;
-	int ret;
+	int ret = NET_XMIT_POLICED;
 
 	D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
 	if (p->set_tc_index) {
@@ -237,7 +239,7 @@
 
 	    ((ret = p->q->enqueue(skb,p->q)) != 0)) {
 		sch->stats.drops++;
-		return 0;
+		return ret;
 	}
 	sch->stats.bytes += skb->len;
 	sch->stats.packets++;
--- linux.orig/net/sched/sch_tbf.c	Sat Apr 29 07:50:14 2000
+++ linux/net/sched/sch_tbf.c	Wed Mar  7 14:44:46 2001
@@ -276,7 +276,7 @@
 	struct tc_tbf_qopt *qopt;
 	struct qdisc_rate_table *rtab = NULL;
 	struct qdisc_rate_table *ptab = NULL;
-	int max_size;
+	int max_size,n;
 
 	if (rtattr_parse(tb, TCA_TBF_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
 	    tb[TCA_TBF_PARMS-1] == NULL ||
@@ -295,15 +295,18 @@
 			goto done;
 	}
 
-	max_size = psched_mtu(sch->dev);
+	for (n = 0; n < 256; n++)
+		if (rtab->data[n] > qopt->buffer) break;
+	max_size = (n << qopt->rate.cell_log)-1;
 	if (ptab) {
-		int n = max_size>>qopt->peakrate.cell_log;
-		while (n>0 && ptab->data[n-1] > qopt->mtu) {
-			max_size -= (1<<qopt->peakrate.cell_log);
-			n--;
-		}
+		int size;
+
+		for (n = 0; n < 256; n++)
+			if (ptab->data[n] > qopt->mtu) break;
+		size = (n << qopt->peakrate.cell_log)-1;
+		if (size < max_size) max_size = size;
 	}
-	if (rtab->data[max_size>>qopt->rate.cell_log] > qopt->buffer)
+	if (max_size < 0)
 		goto done;
 
 	sch_tree_lock(sch);

-- 
  _________________________________________________________________________
 / Werner Almesberger, ICA, EPFL, CH           Werner.Almesberger@epfl.ch /
/_IN_N_032__Tel_+41_21_693_6621__Fax_+41_21_693_6610_____________________/