Implement a workaround for the policy routing gateway bug
[project/relayd.git] / route.c
1 /*
2 * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License v2 as published by
6 * the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
16 */
17
18 #include <sys/socket.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <errno.h>
22 #include <unistd.h>
23 #include <fcntl.h>
24
25 #include <linux/fib_rules.h>
26
27 #include "relayd.h"
28
29 static struct uloop_fd rtnl_sock;
30 static unsigned int rtnl_seq, rtnl_dump_seq;
31 int route_table = 16800;
32
33 static void rtnl_flush(void)
34 {
35 int fd;
36
37 fd = open("/proc/sys/net/ipv4/route/flush", O_WRONLY);
38 if (fd < 0)
39 return;
40
41 write(fd, "-1", 2);
42 close(fd);
43 }
44
45 static void
46 rtnl_rule_request(struct relayd_interface *rif, bool add, bool defgw_workaround)
47 {
48 static struct {
49 struct nlmsghdr nl;
50 struct rtmsg rt;
51 struct {
52 struct rtattr rta;
53 int table;
54 } __packed table;
55 struct {
56 struct rtattr rta;
57 char ifname[IFNAMSIZ + 1];
58 } __packed dev;
59 } __packed req = {
60 .rt = {
61 .rtm_family = AF_INET,
62 .rtm_table = RT_TABLE_UNSPEC,
63 .rtm_scope = RT_SCOPE_UNIVERSE,
64 .rtm_protocol = RTPROT_BOOT,
65 },
66 .table.rta = {
67 .rta_type = FRA_TABLE,
68 .rta_len = sizeof(req.table),
69 },
70 };
71
72 int padding = sizeof(req.dev.ifname);
73
74 if (!defgw_workaround) {
75 req.dev.rta.rta_type = FRA_IFNAME;
76 padding -= strlen(rif->ifname) + 1;
77 strcpy(req.dev.ifname, rif->ifname);
78 req.dev.rta.rta_len = sizeof(req.dev.rta) + strlen(rif->ifname) + 1;
79 } else {
80 req.dev.rta.rta_type = FRA_PRIORITY;
81 req.dev.rta.rta_len = sizeof(req.dev.rta) + sizeof(uint32_t);
82 padding -= sizeof(uint32_t);
83 *((uint32_t *) &req.dev.ifname) = 1;
84 }
85 req.table.table = rif->rt_table;
86 req.nl.nlmsg_len = sizeof(req) - padding;
87
88 req.nl.nlmsg_flags = NLM_F_REQUEST;
89 if (add) {
90 req.nl.nlmsg_type = RTM_NEWRULE;
91 req.nl.nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
92
93 req.rt.rtm_type = RTN_UNICAST;
94 } else {
95 req.nl.nlmsg_type = RTM_DELRULE;
96 req.rt.rtm_type = RTN_UNSPEC;
97 }
98
99 send(rtnl_sock.fd, &req, req.nl.nlmsg_len, 0);
100 rtnl_flush();
101 }
102
103
104 static void
105 rtnl_route_request(struct relayd_interface *rif, struct relayd_host *host,
106 struct relayd_route *route, bool add)
107 {
108 static struct {
109 struct nlmsghdr nl;
110 struct rtmsg rt;
111 struct {
112 struct rtattr rta;
113 uint8_t ipaddr[4];
114 } __packed dst;
115 struct {
116 struct rtattr rta;
117 int table;
118 } __packed table;
119 struct {
120 struct rtattr rta;
121 int ifindex;
122 } __packed dev;
123 struct {
124 struct rtattr rta;
125 uint8_t ipaddr[4];
126 } __packed gw;
127 } __packed req = {
128 .rt = {
129 .rtm_family = AF_INET,
130 .rtm_dst_len = 32,
131 .rtm_table = RT_TABLE_MAIN,
132 },
133 .table.rta = {
134 .rta_type = RTA_TABLE,
135 .rta_len = sizeof(req.table),
136 },
137 .dst.rta = {
138 .rta_len = sizeof(req.dst),
139 },
140 .dev.rta = {
141 .rta_type = RTA_OIF,
142 .rta_len = sizeof(req.dev),
143 },
144 .gw.rta = {
145 .rta_type = RTA_GATEWAY,
146 .rta_len = sizeof(req.gw),
147 },
148 };
149 int pktlen = sizeof(req);
150
151 req.dev.ifindex = host->rif->sll.sll_ifindex;
152 req.table.table = rif->rt_table;
153
154 req.nl.nlmsg_flags = NLM_F_REQUEST;
155 if (add) {
156 req.nl.nlmsg_type = RTM_NEWROUTE;
157 req.nl.nlmsg_flags |= NLM_F_CREATE | NLM_F_REPLACE;
158
159 req.rt.rtm_protocol = RTPROT_BOOT;
160 if (route) {
161 req.rt.rtm_scope = RT_SCOPE_UNIVERSE;
162 } else {
163 req.rt.rtm_scope = RT_SCOPE_LINK;
164 }
165 req.rt.rtm_type = RTN_UNICAST;
166 } else {
167 req.nl.nlmsg_type = RTM_DELROUTE;
168 req.rt.rtm_scope = RT_SCOPE_NOWHERE;
169 }
170
171 req.dst.rta.rta_type = RTA_DST;
172 if (route) {
173 DPRINTF(2, "%s: add route to "IP_FMT"/%d via "IP_FMT"\n",
174 host->rif->ifname, IP_BUF(route->dest), route->mask, IP_BUF(host->ipaddr));
175 if (!route->mask) {
176 /* No DST -> reuse the DST attribute as the gateway attribute */
177 memcpy(req.dst.ipaddr, host->ipaddr, sizeof(req.dst.ipaddr));
178 req.dst.rta.rta_type = RTA_GATEWAY;
179 pktlen -= sizeof(req.gw);
180 req.rt.rtm_dst_len = 0;
181 } else {
182 memcpy(req.gw.ipaddr, host->ipaddr, sizeof(req.gw.ipaddr));
183 memcpy(req.dst.ipaddr, route->dest, sizeof(req.dst.ipaddr));
184 req.rt.rtm_dst_len = route->mask;
185 }
186 } else {
187 pktlen -= sizeof(req.gw);
188 req.rt.rtm_dst_len = 32;
189 memcpy(req.dst.ipaddr, host->ipaddr, sizeof(req.dst.ipaddr));
190 }
191
192 req.nl.nlmsg_len = pktlen;
193 if (route)
194 rtnl_rule_request(rif, true, true);
195 send(rtnl_sock.fd, &req, pktlen, 0);
196 if (route)
197 rtnl_rule_request(rif, false, true);
198 rtnl_flush();
199 }
200
201 void
202 rtnl_route_set(struct relayd_host *host, struct relayd_route *route, bool add)
203 {
204 struct relayd_interface *rif;
205
206 list_for_each_entry(rif, &interfaces, list) {
207 if (rif == host->rif)
208 continue;
209
210 rtnl_route_request(rif, host, route, add);
211 }
212 }
213
214 void relayd_add_interface_routes(struct relayd_interface *rif)
215 {
216 rif->rt_table = route_table++;
217 rtnl_rule_request(rif, true, false);
218 }
219
220 void relayd_del_interface_routes(struct relayd_interface *rif)
221 {
222 rtnl_rule_request(rif, false, false);
223 }
224
225 #ifndef NDA_RTA
226 #define NDA_RTA(r) \
227 ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
228 #endif
229
230 static void rtnl_parse_newneigh(struct nlmsghdr *h)
231 {
232 struct relayd_interface *rif = NULL;
233 struct ndmsg *r = NLMSG_DATA(h);
234 const uint8_t *lladdr = NULL;
235 const uint8_t *ipaddr = NULL;
236 struct rtattr *rta;
237 int len;
238
239 if (r->ndm_family != AF_INET)
240 return;
241
242 list_for_each_entry(rif, &interfaces, list) {
243 if (rif->sll.sll_ifindex == r->ndm_ifindex)
244 goto found_interface;
245 }
246 return;
247
248 found_interface:
249 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
250 for (rta = NDA_RTA(r); RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) {
251 switch(rta->rta_type) {
252 case NDA_LLADDR:
253 lladdr = RTA_DATA(rta);
254 break;
255 case NDA_DST:
256 ipaddr = RTA_DATA(rta);
257 break;
258 default:
259 break;
260 }
261 }
262
263 if (!lladdr || !ipaddr || (r->ndm_state & (NUD_INCOMPLETE|NUD_FAILED)))
264 return;
265
266 if (!memcmp(lladdr, "\x00\x00\x00\x00\x00\x00", ETH_ALEN))
267 return;
268
269 DPRINTF(1, "%s: Found ARP cache entry for host "IP_FMT" ("MAC_FMT")\n",
270 rif->ifname, IP_BUF(ipaddr), MAC_BUF(lladdr));
271 relayd_refresh_host(rif, lladdr, ipaddr);
272 }
273
274 static void rtnl_parse_packet(void *data, int len)
275 {
276 struct nlmsghdr *h;
277
278 for (h = data; NLMSG_OK(h, len); h = NLMSG_NEXT(h, len)) {
279 if (h->nlmsg_type == NLMSG_DONE ||
280 h->nlmsg_type == NLMSG_ERROR)
281 return;
282
283 if (h->nlmsg_seq != rtnl_dump_seq)
284 continue;
285
286 if (h->nlmsg_type == RTM_NEWNEIGH)
287 rtnl_parse_newneigh(h);
288 }
289 }
290
291 static void rtnl_cb(struct uloop_fd *fd, unsigned int events)
292 {
293 struct sockaddr_nl nladdr;
294 static uint8_t buf[16384];
295 struct iovec iov = {
296 .iov_base = buf,
297 .iov_len = sizeof(buf),
298 };
299 struct msghdr msg = {
300 .msg_name = &nladdr,
301 .msg_namelen = sizeof(nladdr),
302 .msg_iov = &iov,
303 .msg_iovlen = 1,
304 };
305
306 do {
307 int len;
308
309 len = recvmsg(rtnl_sock.fd, &msg, 0);
310 if (len < 0) {
311 if (errno == EINTR)
312 continue;
313
314 return;
315 }
316
317 if (!len)
318 break;
319
320 if (nladdr.nl_pid != 0)
321 continue;
322
323 rtnl_parse_packet(buf, len);
324 } while (1);
325 }
326
327 int relayd_rtnl_init(void)
328 {
329 struct sockaddr_nl snl_local;
330 static struct {
331 struct nlmsghdr nlh;
332 struct rtgenmsg g;
333 } req = {
334 .nlh = {
335 .nlmsg_len = sizeof(req),
336 .nlmsg_type = RTM_GETNEIGH,
337 .nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST,
338 .nlmsg_pid = 0,
339 },
340 .g.rtgen_family = AF_INET,
341 };
342
343 rtnl_sock.fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
344 if (rtnl_sock.fd < 0) {
345 perror("socket(AF_NETLINK)");
346 return -1;
347 }
348
349 snl_local.nl_family = AF_NETLINK;
350
351 if (bind(rtnl_sock.fd, (struct sockaddr *) &snl_local, sizeof(struct sockaddr_nl)) < 0) {
352 perror("bind");
353 close(rtnl_sock.fd);
354 return -1;
355 }
356
357 rtnl_sock.cb = rtnl_cb;
358 uloop_fd_add(&rtnl_sock, ULOOP_READ | ULOOP_EDGE_TRIGGER);
359
360 rtnl_seq = time(NULL);
361 rtnl_dump_seq = rtnl_seq;
362 req.nlh.nlmsg_seq = rtnl_seq;
363 send(rtnl_sock.fd, &req, sizeof(req), 0);
364
365 return 0;
366 }
367
368 void relayd_rtnl_done(void)
369 {
370 uloop_fd_delete(&rtnl_sock);
371 close(rtnl_sock.fd);
372 }