kernel-ark/net/sched/act_gate.c
Davide Caratti a01c245438 net/sched: fix a couple of splats in the error path of tfc_gate_init()
trying to configure TC 'act_gate' rules with invalid control actions, the
following splat can be observed:

 general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] SMP KASAN NOPTI
 KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017]
 CPU: 1 PID: 2143 Comm: tc Not tainted 5.7.0-rc6+ #168
 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014
 RIP: 0010:hrtimer_active+0x56/0x290
 [...]
  Call Trace:
  hrtimer_try_to_cancel+0x6d/0x330
  hrtimer_cancel+0x11/0x20
  tcf_gate_cleanup+0x15/0x30 [act_gate]
  tcf_action_cleanup+0x58/0x170
  __tcf_action_put+0xb0/0xe0
  __tcf_idr_release+0x68/0x90
  tcf_gate_init+0x7c7/0x19a0 [act_gate]
  tcf_action_init_1+0x60f/0x960
  tcf_action_init+0x157/0x2a0
  tcf_action_add+0xd9/0x2f0
  tc_ctl_action+0x2a3/0x39d
  rtnetlink_rcv_msg+0x5f3/0x920
  netlink_rcv_skb+0x121/0x350
  netlink_unicast+0x439/0x630
  netlink_sendmsg+0x714/0xbf0
  sock_sendmsg+0xe2/0x110
  ____sys_sendmsg+0x5b4/0x890
  ___sys_sendmsg+0xe9/0x160
  __sys_sendmsg+0xd3/0x170
  do_syscall_64+0x9a/0x370
  entry_SYSCALL_64_after_hwframe+0x44/0xa9

this is caused by hrtimer_cancel(), running before hrtimer_init(). Fix it
ensuring to call hrtimer_cancel() only if clockid is valid, and the timer
has been initialized. After fixing this splat, the same error path causes
another problem:

 general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN NOPTI
 KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
 CPU: 1 PID: 980 Comm: tc Not tainted 5.7.0-rc6+ #168
 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014
 RIP: 0010:release_entry_list+0x4a/0x240 [act_gate]
 [...]
 Call Trace:
  tcf_action_cleanup+0x58/0x170
  __tcf_action_put+0xb0/0xe0
  __tcf_idr_release+0x68/0x90
  tcf_gate_init+0x7ab/0x19a0 [act_gate]
  tcf_action_init_1+0x60f/0x960
  tcf_action_init+0x157/0x2a0
  tcf_action_add+0xd9/0x2f0
  tc_ctl_action+0x2a3/0x39d
  rtnetlink_rcv_msg+0x5f3/0x920
  netlink_rcv_skb+0x121/0x350
  netlink_unicast+0x439/0x630
  netlink_sendmsg+0x714/0xbf0
  sock_sendmsg+0xe2/0x110
  ____sys_sendmsg+0x5b4/0x890
  ___sys_sendmsg+0xe9/0x160
  __sys_sendmsg+0xd3/0x170
  do_syscall_64+0x9a/0x370
  entry_SYSCALL_64_after_hwframe+0x44/0xa9

the problem is similar: tcf_action_cleanup() was trying to release a list
without initializing it first. Ensure that INIT_LIST_HEAD() is called for
every newly created 'act_gate' action, same as what was done to 'act_ife'
with commit 44c23d7159 ("net/sched: act_ife: initalize ife->metalist
earlier").

Fixes: a51c328df3 ("net: qos: introduce a gate control flow action")
CC: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-01 11:36:36 -07:00

640 lines
15 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/* Copyright 2020 NXP */
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gate.h>
static unsigned int gate_net_id;
static struct tc_action_ops act_gate_ops;
static ktime_t gate_get_time(struct tcf_gate *gact)
{
ktime_t mono = ktime_get();
switch (gact->tk_offset) {
case TK_OFFS_MAX:
return mono;
default:
return ktime_mono_to_any(mono, gact->tk_offset);
}
return KTIME_MAX;
}
static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
{
struct tcf_gate_params *param = &gact->param;
ktime_t now, base, cycle;
u64 n;
base = ns_to_ktime(param->tcfg_basetime);
now = gate_get_time(gact);
if (ktime_after(base, now)) {
*start = base;
return 0;
}
cycle = param->tcfg_cycletime;
/* cycle time should not be zero */
if (!cycle)
return -EFAULT;
n = div64_u64(ktime_sub_ns(now, base), cycle);
*start = ktime_add_ns(base, (n + 1) * cycle);
return 0;
}
static void gate_start_timer(struct tcf_gate *gact, ktime_t start)
{
ktime_t expires;
expires = hrtimer_get_expires(&gact->hitimer);
if (expires == 0)
expires = KTIME_MAX;
start = min_t(ktime_t, start, expires);
hrtimer_start(&gact->hitimer, start, HRTIMER_MODE_ABS_SOFT);
}
static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
{
struct tcf_gate *gact = container_of(timer, struct tcf_gate,
hitimer);
struct tcf_gate_params *p = &gact->param;
struct tcfg_gate_entry *next;
ktime_t close_time, now;
spin_lock(&gact->tcf_lock);
next = gact->next_entry;
/* cycle start, clear pending bit, clear total octets */
gact->current_gate_status = next->gate_state ? GATE_ACT_GATE_OPEN : 0;
gact->current_entry_octets = 0;
gact->current_max_octets = next->maxoctets;
gact->current_close_time = ktime_add_ns(gact->current_close_time,
next->interval);
close_time = gact->current_close_time;
if (list_is_last(&next->list, &p->entries))
next = list_first_entry(&p->entries,
struct tcfg_gate_entry, list);
else
next = list_next_entry(next, list);
now = gate_get_time(gact);
if (ktime_after(now, close_time)) {
ktime_t cycle, base;
u64 n;
cycle = p->tcfg_cycletime;
base = ns_to_ktime(p->tcfg_basetime);
n = div64_u64(ktime_sub_ns(now, base), cycle);
close_time = ktime_add_ns(base, (n + 1) * cycle);
}
gact->next_entry = next;
hrtimer_set_expires(&gact->hitimer, close_time);
spin_unlock(&gact->tcf_lock);
return HRTIMER_RESTART;
}
static int tcf_gate_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_gate *gact = to_gate(a);
spin_lock(&gact->tcf_lock);
tcf_lastuse_update(&gact->tcf_tm);
bstats_update(&gact->tcf_bstats, skb);
if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) {
spin_unlock(&gact->tcf_lock);
return gact->tcf_action;
}
if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN))
goto drop;
if (gact->current_max_octets >= 0) {
gact->current_entry_octets += qdisc_pkt_len(skb);
if (gact->current_entry_octets > gact->current_max_octets) {
gact->tcf_qstats.overlimits++;
goto drop;
}
}
spin_unlock(&gact->tcf_lock);
return gact->tcf_action;
drop:
gact->tcf_qstats.drops++;
spin_unlock(&gact->tcf_lock);
return TC_ACT_SHOT;
}
static const struct nla_policy entry_policy[TCA_GATE_ENTRY_MAX + 1] = {
[TCA_GATE_ENTRY_INDEX] = { .type = NLA_U32 },
[TCA_GATE_ENTRY_GATE] = { .type = NLA_FLAG },
[TCA_GATE_ENTRY_INTERVAL] = { .type = NLA_U32 },
[TCA_GATE_ENTRY_IPV] = { .type = NLA_S32 },
[TCA_GATE_ENTRY_MAX_OCTETS] = { .type = NLA_S32 },
};
static const struct nla_policy gate_policy[TCA_GATE_MAX + 1] = {
[TCA_GATE_PARMS] = { .len = sizeof(struct tc_gate),
.type = NLA_EXACT_LEN },
[TCA_GATE_PRIORITY] = { .type = NLA_S32 },
[TCA_GATE_ENTRY_LIST] = { .type = NLA_NESTED },
[TCA_GATE_BASE_TIME] = { .type = NLA_U64 },
[TCA_GATE_CYCLE_TIME] = { .type = NLA_U64 },
[TCA_GATE_CYCLE_TIME_EXT] = { .type = NLA_U64 },
[TCA_GATE_FLAGS] = { .type = NLA_U32 },
[TCA_GATE_CLOCKID] = { .type = NLA_S32 },
};
static int fill_gate_entry(struct nlattr **tb, struct tcfg_gate_entry *entry,
struct netlink_ext_ack *extack)
{
u32 interval = 0;
entry->gate_state = nla_get_flag(tb[TCA_GATE_ENTRY_GATE]);
if (tb[TCA_GATE_ENTRY_INTERVAL])
interval = nla_get_u32(tb[TCA_GATE_ENTRY_INTERVAL]);
if (interval == 0) {
NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
return -EINVAL;
}
entry->interval = interval;
if (tb[TCA_GATE_ENTRY_IPV])
entry->ipv = nla_get_s32(tb[TCA_GATE_ENTRY_IPV]);
else
entry->ipv = -1;
if (tb[TCA_GATE_ENTRY_MAX_OCTETS])
entry->maxoctets = nla_get_s32(tb[TCA_GATE_ENTRY_MAX_OCTETS]);
else
entry->maxoctets = -1;
return 0;
}
static int parse_gate_entry(struct nlattr *n, struct tcfg_gate_entry *entry,
int index, struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_GATE_ENTRY_MAX + 1] = { };
int err;
err = nla_parse_nested(tb, TCA_GATE_ENTRY_MAX, n, entry_policy, extack);
if (err < 0) {
NL_SET_ERR_MSG(extack, "Could not parse nested entry");
return -EINVAL;
}
entry->index = index;
return fill_gate_entry(tb, entry, extack);
}
static void release_entry_list(struct list_head *entries)
{
struct tcfg_gate_entry *entry, *e;
list_for_each_entry_safe(entry, e, entries, list) {
list_del(&entry->list);
kfree(entry);
}
}
static int parse_gate_list(struct nlattr *list_attr,
struct tcf_gate_params *sched,
struct netlink_ext_ack *extack)
{
struct tcfg_gate_entry *entry;
struct nlattr *n;
int err, rem;
int i = 0;
if (!list_attr)
return -EINVAL;
nla_for_each_nested(n, list_attr, rem) {
if (nla_type(n) != TCA_GATE_ONE_ENTRY) {
NL_SET_ERR_MSG(extack, "Attribute isn't type 'entry'");
continue;
}
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
NL_SET_ERR_MSG(extack, "Not enough memory for entry");
err = -ENOMEM;
goto release_list;
}
err = parse_gate_entry(n, entry, i, extack);
if (err < 0) {
kfree(entry);
goto release_list;
}
list_add_tail(&entry->list, &sched->entries);
i++;
}
sched->num_entries = i;
return i;
release_list:
release_entry_list(&sched->entries);
return err;
}
static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gate_net_id);
enum tk_offsets tk_offset = TK_OFFS_TAI;
struct nlattr *tb[TCA_GATE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tcf_gate_params *p;
s32 clockid = CLOCK_TAI;
struct tcf_gate *gact;
struct tc_gate *parm;
int ret = 0, err;
u64 basetime = 0;
u32 gflags = 0;
s32 prio = -1;
ktime_t start;
u32 index;
if (!nla)
return -EINVAL;
err = nla_parse_nested(tb, TCA_GATE_MAX, nla, gate_policy, extack);
if (err < 0)
return err;
if (!tb[TCA_GATE_PARMS])
return -EINVAL;
parm = nla_data(tb[TCA_GATE_PARMS]);
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
if (err && bind)
return 0;
if (!err) {
ret = tcf_idr_create(tn, index, est, a,
&act_gate_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
}
ret = ACT_P_CREATED;
} else if (!ovr) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
if (ret == ACT_P_CREATED) {
to_gate(*a)->param.tcfg_clockid = -1;
INIT_LIST_HEAD(&(to_gate(*a)->param.entries));
}
if (tb[TCA_GATE_PRIORITY])
prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
if (tb[TCA_GATE_BASE_TIME])
basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]);
if (tb[TCA_GATE_FLAGS])
gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
if (tb[TCA_GATE_CLOCKID]) {
clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
switch (clockid) {
case CLOCK_REALTIME:
tk_offset = TK_OFFS_REAL;
break;
case CLOCK_MONOTONIC:
tk_offset = TK_OFFS_MAX;
break;
case CLOCK_BOOTTIME:
tk_offset = TK_OFFS_BOOT;
break;
case CLOCK_TAI:
tk_offset = TK_OFFS_TAI;
break;
default:
NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
goto release_idr;
}
}
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
gact = to_gate(*a);
spin_lock_bh(&gact->tcf_lock);
p = &gact->param;
if (tb[TCA_GATE_CYCLE_TIME]) {
p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
if (!p->tcfg_cycletime_ext)
goto chain_put;
}
if (tb[TCA_GATE_ENTRY_LIST]) {
err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
if (err < 0)
goto chain_put;
}
if (!p->tcfg_cycletime) {
struct tcfg_gate_entry *entry;
ktime_t cycle = 0;
list_for_each_entry(entry, &p->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
p->tcfg_cycletime = cycle;
}
if (tb[TCA_GATE_CYCLE_TIME_EXT])
p->tcfg_cycletime_ext =
nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
p->tcfg_priority = prio;
p->tcfg_basetime = basetime;
p->tcfg_clockid = clockid;
p->tcfg_flags = gflags;
gact->tk_offset = tk_offset;
hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
gact->hitimer.function = gate_timer_func;
err = gate_get_start_time(gact, &start);
if (err < 0) {
NL_SET_ERR_MSG(extack,
"Internal error: failed get start time");
release_entry_list(&p->entries);
goto chain_put;
}
gact->current_close_time = start;
gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
gact->next_entry = list_first_entry(&p->entries,
struct tcfg_gate_entry, list);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
gate_start_timer(gact, start);
spin_unlock_bh(&gact->tcf_lock);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
chain_put:
spin_unlock_bh(&gact->tcf_lock);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
release_idr:
tcf_idr_release(*a, bind);
return err;
}
static void tcf_gate_cleanup(struct tc_action *a)
{
struct tcf_gate *gact = to_gate(a);
struct tcf_gate_params *p;
p = &gact->param;
if (p->tcfg_clockid != -1)
hrtimer_cancel(&gact->hitimer);
release_entry_list(&p->entries);
}
static int dumping_entry(struct sk_buff *skb,
struct tcfg_gate_entry *entry)
{
struct nlattr *item;
item = nla_nest_start_noflag(skb, TCA_GATE_ONE_ENTRY);
if (!item)
return -ENOSPC;
if (nla_put_u32(skb, TCA_GATE_ENTRY_INDEX, entry->index))
goto nla_put_failure;
if (entry->gate_state && nla_put_flag(skb, TCA_GATE_ENTRY_GATE))
goto nla_put_failure;
if (nla_put_u32(skb, TCA_GATE_ENTRY_INTERVAL, entry->interval))
goto nla_put_failure;
if (nla_put_s32(skb, TCA_GATE_ENTRY_MAX_OCTETS, entry->maxoctets))
goto nla_put_failure;
if (nla_put_s32(skb, TCA_GATE_ENTRY_IPV, entry->ipv))
goto nla_put_failure;
return nla_nest_end(skb, item);
nla_put_failure:
nla_nest_cancel(skb, item);
return -1;
}
static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_gate *gact = to_gate(a);
struct tc_gate opt = {
.index = gact->tcf_index,
.refcnt = refcount_read(&gact->tcf_refcnt) - ref,
.bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
};
struct tcfg_gate_entry *entry;
struct tcf_gate_params *p;
struct nlattr *entry_list;
struct tcf_t t;
spin_lock_bh(&gact->tcf_lock);
opt.action = gact->tcf_action;
p = &gact->param;
if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_GATE_BASE_TIME,
p->tcfg_basetime, TCA_GATE_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME,
p->tcfg_cycletime, TCA_GATE_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME_EXT,
p->tcfg_cycletime_ext, TCA_GATE_PAD))
goto nla_put_failure;
if (nla_put_s32(skb, TCA_GATE_CLOCKID, p->tcfg_clockid))
goto nla_put_failure;
if (nla_put_u32(skb, TCA_GATE_FLAGS, p->tcfg_flags))
goto nla_put_failure;
if (nla_put_s32(skb, TCA_GATE_PRIORITY, p->tcfg_priority))
goto nla_put_failure;
entry_list = nla_nest_start_noflag(skb, TCA_GATE_ENTRY_LIST);
if (!entry_list)
goto nla_put_failure;
list_for_each_entry(entry, &p->entries, list) {
if (dumping_entry(skb, entry) < 0)
goto nla_put_failure;
}
nla_nest_end(skb, entry_list);
tcf_tm_dump(&t, &gact->tcf_tm);
if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD))
goto nla_put_failure;
spin_unlock_bh(&gact->tcf_lock);
return skb->len;
nla_put_failure:
spin_unlock_bh(&gact->tcf_lock);
nlmsg_trim(skb, b);
return -1;
}
static int tcf_gate_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
const struct tc_action_ops *ops,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gate_net_id);
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u32 packets,
u64 lastuse, bool hw)
{
struct tcf_gate *gact = to_gate(a);
struct tcf_t *tm = &gact->tcf_tm;
tcf_action_update_stats(a, bytes, packets, false, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_gate_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, gate_net_id);
return tcf_idr_search(tn, a, index);
}
static size_t tcf_gate_get_fill_size(const struct tc_action *act)
{
return nla_total_size(sizeof(struct tc_gate));
}
static struct tc_action_ops act_gate_ops = {
.kind = "gate",
.id = TCA_ID_GATE,
.owner = THIS_MODULE,
.act = tcf_gate_act,
.dump = tcf_gate_dump,
.init = tcf_gate_init,
.cleanup = tcf_gate_cleanup,
.walk = tcf_gate_walker,
.stats_update = tcf_gate_stats_update,
.get_fill_size = tcf_gate_get_fill_size,
.lookup = tcf_gate_search,
.size = sizeof(struct tcf_gate),
};
static __net_init int gate_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, gate_net_id);
return tc_action_net_init(net, tn, &act_gate_ops);
}
static void __net_exit gate_exit_net(struct list_head *net_list)
{
tc_action_net_exit(net_list, gate_net_id);
}
static struct pernet_operations gate_net_ops = {
.init = gate_init_net,
.exit_batch = gate_exit_net,
.id = &gate_net_id,
.size = sizeof(struct tc_action_net),
};
static int __init gate_init_module(void)
{
return tcf_register_action(&act_gate_ops, &gate_net_ops);
}
static void __exit gate_cleanup_module(void)
{
tcf_unregister_action(&act_gate_ops, &gate_net_ops);
}
module_init(gate_init_module);
module_exit(gate_cleanup_module);
MODULE_LICENSE("GPL v2");