Main Page | Class List | File List | Class Members | File Members

fib_semantics.c

Go to the documentation of this file.
00001 /* 00002 * INET An implementation of the TCP/IP protocol suite for the LINUX 00003 * operating system. INET is implemented using the BSD Socket 00004 * interface as the means of communication with the user level. 00005 * 00006 * IPv4 Forwarding Information Base: semantics. 00007 * 00008 * Version: $Id: fib_semantics.c,v 1.18.2.2 2002/01/12 07:54:15 davem Exp $ 00009 * 00010 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 00011 * 00012 * This program is free software; you can redistribute it and/or 00013 * modify it under the terms of the GNU General Public License 00014 * as published by the Free Software Foundation; either version 00015 * 2 of the License, or (at your option) any later version. 00016 */ 00017 00018 #include <linux/config.h> 00019 #include <asm/uaccess.h> 00020 #include <asm/system.h> 00021 #include <asm/bitops.h> 00022 #include <linux/types.h> 00023 #include <linux/kernel.h> 00024 #include <linux/sched.h> 00025 #include <linux/mm.h> 00026 #include <linux/string.h> 00027 #include <linux/socket.h> 00028 #include <linux/sockios.h> 00029 #include <linux/errno.h> 00030 #include <linux/in.h> 00031 #include <linux/inet.h> 00032 #include <linux/netdevice.h> 00033 #include <linux/if_arp.h> 00034 #include <linux/proc_fs.h> 00035 #include <linux/skbuff.h> 00036 #include <linux/netlink.h> 00037 #include <linux/init.h> 00038 00039 #include <net/ip.h> 00040 #include <net/protocol.h> 00041 #include <net/route.h> 00042 #include <net/tcp.h> 00043 #include <net/sock.h> 00044 #include <net/ip_fib.h> 00045 00046 #define FSprintk(a...) 00047 00048 static struct fib_info *fib_info_list; 00049 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED; 00050 int fib_info_cnt; 00051 00052 #define for_fib_info() { struct fib_info *fi; \ 00053 for (fi = fib_info_list; fi; fi = fi->fib_next) 00054 00055 #define endfor_fib_info() } 00056 00057 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00058 00059 static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED; 00060 00061 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 00062 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 00063 00064 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ 00065 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) 00066 00067 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 00068 00069 /* Hope, that gcc will optimize it to get rid of dummy loop */ 00070 00071 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ 00072 for (nhsel=0; nhsel < 1; nhsel++) 00073 00074 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ 00075 for (nhsel=0; nhsel < 1; nhsel++) 00076 00077 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 00078 00079 #define endfor_nexthops(fi) } 00080 00081 00082 static struct 00083 { 00084 int error; 00085 u8 scope; 00086 } fib_props[RTA_MAX+1] = { 00087 { 0, RT_SCOPE_NOWHERE}, /* RTN_UNSPEC */ 00088 { 0, RT_SCOPE_UNIVERSE}, /* RTN_UNICAST */ 00089 { 0, RT_SCOPE_HOST}, /* RTN_LOCAL */ 00090 { 0, RT_SCOPE_LINK}, /* RTN_BROADCAST */ 00091 { 0, RT_SCOPE_LINK}, /* RTN_ANYCAST */ 00092 { 0, RT_SCOPE_UNIVERSE}, /* RTN_MULTICAST */ 00093 { -EINVAL, RT_SCOPE_UNIVERSE}, /* RTN_BLACKHOLE */ 00094 { -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */ 00095 { -EACCES, RT_SCOPE_UNIVERSE}, /* RTN_PROHIBIT */ 00096 { -EAGAIN, RT_SCOPE_UNIVERSE}, /* RTN_THROW */ 00097 #ifdef CONFIG_IP_ROUTE_NAT 00098 { 0, RT_SCOPE_HOST}, /* RTN_NAT */ 00099 #else 00100 { -EINVAL, RT_SCOPE_NOWHERE}, /* RTN_NAT */ 00101 #endif 00102 { -EINVAL, RT_SCOPE_NOWHERE} /* RTN_XRESOLVE */ 00103 }; 00104 00105 00106 /* Release a nexthop info record */ 00107 00108 void free_fib_info(struct fib_info *fi) 00109 { 00110 if (fi->fib_dead == 0) { 00111 printk("Freeing alive fib_info %p\n", fi); 00112 return; 00113 } 00114 change_nexthops(fi) { 00115 if (nh->nh_dev) 00116 dev_put(nh->nh_dev); 00117 nh->nh_dev = NULL; 00118 } endfor_nexthops(fi); 00119 fib_info_cnt--; 00120 kfree(fi); 00121 } 00122 00123 void fib_release_info(struct fib_info *fi) 00124 { 00125 write_lock(&fib_info_lock); 00126 if (fi && --fi->fib_treeref == 0) { 00127 if (fi->fib_next) 00128 fi->fib_next->fib_prev = fi->fib_prev; 00129 if (fi->fib_prev) 00130 fi->fib_prev->fib_next = fi->fib_next; 00131 if (fi == fib_info_list) 00132 fib_info_list = fi->fib_next; 00133 fi->fib_dead = 1; 00134 fib_info_put(fi); 00135 } 00136 write_unlock(&fib_info_lock); 00137 } 00138 00139 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 00140 { 00141 const struct fib_nh *onh = ofi->fib_nh; 00142 00143 for_nexthops(fi) { 00144 if (nh->nh_oif != onh->nh_oif || 00145 nh->nh_gw != onh->nh_gw || 00146 nh->nh_scope != onh->nh_scope || 00147 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00148 nh->nh_weight != onh->nh_weight || 00149 #endif 00150 #ifdef CONFIG_NET_CLS_ROUTE 00151 nh->nh_tclassid != onh->nh_tclassid || 00152 #endif 00153 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 00154 return -1; 00155 onh++; 00156 } endfor_nexthops(fi); 00157 return 0; 00158 } 00159 00160 static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi) 00161 { 00162 for_fib_info() { 00163 if (fi->fib_nhs != nfi->fib_nhs) 00164 continue; 00165 if (nfi->fib_protocol == fi->fib_protocol && 00166 nfi->fib_prefsrc == fi->fib_prefsrc && 00167 nfi->fib_priority == fi->fib_priority && 00168 memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 && 00169 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 00170 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 00171 return fi; 00172 } endfor_fib_info(); 00173 return NULL; 00174 } 00175 00176 /* Check, that the gateway is already configured. 00177 Used only by redirect accept routine. 00178 */ 00179 00180 int ip_fib_check_default(u32 gw, struct net_device *dev) 00181 { 00182 read_lock(&fib_info_lock); 00183 for_fib_info() { 00184 if (fi->fib_flags & RTNH_F_DEAD) 00185 continue; 00186 for_nexthops(fi) { 00187 if (nh->nh_dev == dev && nh->nh_gw == gw && 00188 nh->nh_scope == RT_SCOPE_LINK && 00189 !(nh->nh_flags&RTNH_F_DEAD)) { 00190 read_unlock(&fib_info_lock); 00191 return 0; 00192 } 00193 } endfor_nexthops(fi); 00194 } endfor_fib_info(); 00195 read_unlock(&fib_info_lock); 00196 return -1; 00197 } 00198 00199 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00200 00201 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) 00202 { 00203 while (RTA_OK(attr,attrlen)) { 00204 if (attr->rta_type == type) 00205 return *(u32*)RTA_DATA(attr); 00206 attr = RTA_NEXT(attr, attrlen); 00207 } 00208 return 0; 00209 } 00210 00211 static int 00212 fib_count_nexthops(struct rtattr *rta) 00213 { 00214 int nhs = 0; 00215 struct rtnexthop *nhp = RTA_DATA(rta); 00216 int nhlen = RTA_PAYLOAD(rta); 00217 00218 while (nhlen >= (int)sizeof(struct rtnexthop)) { 00219 if ((nhlen -= nhp->rtnh_len) < 0) 00220 return 0; 00221 nhs++; 00222 nhp = RTNH_NEXT(nhp); 00223 }; 00224 return nhs; 00225 } 00226 00227 static int 00228 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) 00229 { 00230 struct rtnexthop *nhp = RTA_DATA(rta); 00231 int nhlen = RTA_PAYLOAD(rta); 00232 00233 change_nexthops(fi) { 00234 int attrlen = nhlen - sizeof(struct rtnexthop); 00235 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) 00236 return -EINVAL; 00237 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; 00238 nh->nh_oif = nhp->rtnh_ifindex; 00239 nh->nh_weight = nhp->rtnh_hops + 1; 00240 if (attrlen) { 00241 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 00242 #ifdef CONFIG_NET_CLS_ROUTE 00243 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); 00244 #endif 00245 } 00246 nhp = RTNH_NEXT(nhp); 00247 } endfor_nexthops(fi); 00248 return 0; 00249 } 00250 00251 #endif 00252 00253 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, 00254 struct fib_info *fi) 00255 { 00256 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00257 struct rtnexthop *nhp; 00258 int nhlen; 00259 #endif 00260 00261 if (rta->rta_priority && 00262 *rta->rta_priority != fi->fib_priority) 00263 return 1; 00264 00265 if (rta->rta_oif || rta->rta_gw) { 00266 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && 00267 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) 00268 return 0; 00269 return 1; 00270 } 00271 00272 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00273 if (rta->rta_mp == NULL) 00274 return 0; 00275 nhp = RTA_DATA(rta->rta_mp); 00276 nhlen = RTA_PAYLOAD(rta->rta_mp); 00277 00278 for_nexthops(fi) { 00279 int attrlen = nhlen - sizeof(struct rtnexthop); 00280 u32 gw; 00281 00282 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) 00283 return -EINVAL; 00284 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) 00285 return 1; 00286 if (attrlen) { 00287 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 00288 if (gw && gw != nh->nh_gw) 00289 return 1; 00290 #ifdef CONFIG_NET_CLS_ROUTE 00291 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); 00292 if (gw && gw != nh->nh_tclassid) 00293 return 1; 00294 #endif 00295 } 00296 nhp = RTNH_NEXT(nhp); 00297 } endfor_nexthops(fi); 00298 #endif 00299 return 0; 00300 } 00301 00302 00303 /* 00304 Picture 00305 ------- 00306 00307 Semantics of nexthop is very messy by historical reasons. 00308 We have to take into account, that: 00309 a) gateway can be actually local interface address, 00310 so that gatewayed route is direct. 00311 b) gateway must be on-link address, possibly 00312 described not by an ifaddr, but also by a direct route. 00313 c) If both gateway and interface are specified, they should not 00314 contradict. 00315 d) If we use tunnel routes, gateway could be not on-link. 00316 00317 Attempt to reconcile all of these (alas, self-contradictory) conditions 00318 results in pretty ugly and hairy code with obscure logic. 00319 00320 I choosed to generalized it instead, so that the size 00321 of code does not increase practically, but it becomes 00322 much more general. 00323 Every prefix is assigned a "scope" value: "host" is local address, 00324 "link" is direct route, 00325 [ ... "site" ... "interior" ... ] 00326 and "universe" is true gateway route with global meaning. 00327 00328 Every prefix refers to a set of "nexthop"s (gw, oif), 00329 where gw must have narrower scope. This recursion stops 00330 when gw has LOCAL scope or if "nexthop" is declared ONLINK, 00331 which means that gw is forced to be on link. 00332 00333 Code is still hairy, but now it is apparently logically 00334 consistent and very flexible. F.e. as by-product it allows 00335 to co-exists in peace independent exterior and interior 00336 routing processes. 00337 00338 Normally it looks as following. 00339 00340 {universe prefix} -> (gw, oif) [scope link] 00341 | 00342 |-> {link prefix} -> (gw, oif) [scope local] 00343 | 00344 |-> {local prefix} (terminal node) 00345 */ 00346 00347 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) 00348 { 00349 int err; 00350 00351 if (nh->nh_gw) { 00352 struct rt_key key; 00353 struct fib_result res; 00354 00355 #ifdef CONFIG_IP_ROUTE_PERVASIVE 00356 if (nh->nh_flags&RTNH_F_PERVASIVE) 00357 return 0; 00358 #endif 00359 if (nh->nh_flags&RTNH_F_ONLINK) { 00360 struct net_device *dev; 00361 00362 if (r->rtm_scope >= RT_SCOPE_LINK) 00363 return -EINVAL; 00364 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) 00365 return -EINVAL; 00366 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) 00367 return -ENODEV; 00368 if (!(dev->flags&IFF_UP)) 00369 return -ENETDOWN; 00370 nh->nh_dev = dev; 00371 dev_hold(dev); 00372 nh->nh_scope = RT_SCOPE_LINK; 00373 return 0; 00374 } 00375 memset(&key, 0, sizeof(key)); 00376 key.dst = nh->nh_gw; 00377 key.oif = nh->nh_oif; 00378 key.scope = r->rtm_scope + 1; 00379 00380 /* It is not necessary, but requires a bit of thinking */ 00381 if (key.scope < RT_SCOPE_LINK) 00382 key.scope = RT_SCOPE_LINK; 00383 if ((err = fib_lookup(&key, &res)) != 0) 00384 return err; 00385 err = -EINVAL; 00386 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 00387 goto out; 00388 nh->nh_scope = res.scope; 00389 nh->nh_oif = FIB_RES_OIF(res); 00390 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 00391 goto out; 00392 dev_hold(nh->nh_dev); 00393 err = -ENETDOWN; 00394 if (!(nh->nh_dev->flags & IFF_UP)) 00395 goto out; 00396 err = 0; 00397 out: 00398 fib_res_put(&res); 00399 return err; 00400 } else { 00401 struct in_device *in_dev; 00402 00403 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 00404 return -EINVAL; 00405 00406 in_dev = inetdev_by_index(nh->nh_oif); 00407 if (in_dev == NULL) 00408 return -ENODEV; 00409 if (!(in_dev->dev->flags&IFF_UP)) { 00410 in_dev_put(in_dev); 00411 return -ENETDOWN; 00412 } 00413 nh->nh_dev = in_dev->dev; 00414 dev_hold(nh->nh_dev); 00415 nh->nh_scope = RT_SCOPE_HOST; 00416 in_dev_put(in_dev); 00417 } 00418 return 0; 00419 } 00420 00421 struct fib_info * 00422 fib_create_info(const struct rtmsg *r, struct kern_rta *rta, 00423 const struct nlmsghdr *nlh, int *errp) 00424 { 00425 int err; 00426 struct fib_info *fi = NULL; 00427 struct fib_info *ofi; 00428 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00429 int nhs = 1; 00430 #else 00431 const int nhs = 1; 00432 #endif 00433 00434 /* Fast check to catch the most weird cases */ 00435 if (fib_props[r->rtm_type].scope > r->rtm_scope) 00436 goto err_inval; 00437 00438 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00439 if (rta->rta_mp) { 00440 nhs = fib_count_nexthops(rta->rta_mp); 00441 if (nhs == 0) 00442 goto err_inval; 00443 } 00444 #endif 00445 00446 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 00447 err = -ENOBUFS; 00448 if (fi == NULL) 00449 goto failure; 00450 fib_info_cnt++; 00451 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh)); 00452 00453 fi->fib_protocol = r->rtm_protocol; 00454 fi->fib_nhs = nhs; 00455 fi->fib_flags = r->rtm_flags; 00456 if (rta->rta_priority) 00457 fi->fib_priority = *rta->rta_priority; 00458 if (rta->rta_mx) { 00459 int attrlen = RTA_PAYLOAD(rta->rta_mx); 00460 struct rtattr *attr = RTA_DATA(rta->rta_mx); 00461 00462 while (RTA_OK(attr, attrlen)) { 00463 unsigned flavor = attr->rta_type; 00464 if (flavor) { 00465 if (flavor > RTAX_MAX) 00466 goto err_inval; 00467 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); 00468 } 00469 attr = RTA_NEXT(attr, attrlen); 00470 } 00471 } 00472 if (rta->rta_prefsrc) 00473 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); 00474 00475 if (rta->rta_mp) { 00476 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00477 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) 00478 goto failure; 00479 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) 00480 goto err_inval; 00481 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) 00482 goto err_inval; 00483 #ifdef CONFIG_NET_CLS_ROUTE 00484 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) 00485 goto err_inval; 00486 #endif 00487 #else 00488 goto err_inval; 00489 #endif 00490 } else { 00491 struct fib_nh *nh = fi->fib_nh; 00492 if (rta->rta_oif) 00493 nh->nh_oif = *rta->rta_oif; 00494 if (rta->rta_gw) 00495 memcpy(&nh->nh_gw, rta->rta_gw, 4); 00496 #ifdef CONFIG_NET_CLS_ROUTE 00497 if (rta->rta_flow) 00498 memcpy(&nh->nh_tclassid, rta->rta_flow, 4); 00499 #endif 00500 nh->nh_flags = r->rtm_flags; 00501 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00502 nh->nh_weight = 1; 00503 #endif 00504 } 00505 00506 #ifdef CONFIG_IP_ROUTE_NAT 00507 if (r->rtm_type == RTN_NAT) { 00508 if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) 00509 goto err_inval; 00510 memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4); 00511 goto link_it; 00512 } 00513 #endif 00514 00515 if (fib_props[r->rtm_type].error) { 00516 if (rta->rta_gw || rta->rta_oif || rta->rta_mp) 00517 goto err_inval; 00518 goto link_it; 00519 } 00520 00521 if (r->rtm_scope > RT_SCOPE_HOST) 00522 goto err_inval; 00523 00524 if (r->rtm_scope == RT_SCOPE_HOST) { 00525 struct fib_nh *nh = fi->fib_nh; 00526 00527 /* Local address is added. */ 00528 if (nhs != 1 || nh->nh_gw) 00529 goto err_inval; 00530 nh->nh_scope = RT_SCOPE_NOWHERE; 00531 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); 00532 err = -ENODEV; 00533 if (nh->nh_dev == NULL) 00534 goto failure; 00535 } else { 00536 change_nexthops(fi) { 00537 if ((err = fib_check_nh(r, fi, nh)) != 0) 00538 goto failure; 00539 } endfor_nexthops(fi) 00540 } 00541 00542 if (fi->fib_prefsrc) { 00543 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || 00544 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) 00545 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 00546 goto err_inval; 00547 } 00548 00549 link_it: 00550 if ((ofi = fib_find_info(fi)) != NULL) { 00551 fi->fib_dead = 1; 00552 free_fib_info(fi); 00553 ofi->fib_treeref++; 00554 return ofi; 00555 } 00556 00557 fi->fib_treeref++; 00558 atomic_inc(&fi->fib_clntref); 00559 write_lock(&fib_info_lock); 00560 fi->fib_next = fib_info_list; 00561 fi->fib_prev = NULL; 00562 if (fib_info_list) 00563 fib_info_list->fib_prev = fi; 00564 fib_info_list = fi; 00565 write_unlock(&fib_info_lock); 00566 return fi; 00567 00568 err_inval: 00569 err = -EINVAL; 00570 00571 failure: 00572 *errp = err; 00573 if (fi) { 00574 fi->fib_dead = 1; 00575 free_fib_info(fi); 00576 } 00577 return NULL; 00578 } 00579 00580 int 00581 fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res) 00582 { 00583 int err = fib_props[type].error; 00584 00585 if (err == 0) { 00586 if (fi->fib_flags&RTNH_F_DEAD) 00587 return 1; 00588 00589 res->fi = fi; 00590 00591 switch (type) { 00592 #ifdef CONFIG_IP_ROUTE_NAT 00593 case RTN_NAT: 00594 FIB_RES_RESET(*res); 00595 atomic_inc(&fi->fib_clntref); 00596 return 0; 00597 #endif 00598 case RTN_UNICAST: 00599 case RTN_LOCAL: 00600 case RTN_BROADCAST: 00601 case RTN_ANYCAST: 00602 case RTN_MULTICAST: 00603 for_nexthops(fi) { 00604 if (nh->nh_flags&RTNH_F_DEAD) 00605 continue; 00606 if (!key->oif || key->oif == nh->nh_oif) 00607 break; 00608 } 00609 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00610 if (nhsel < fi->fib_nhs) { 00611 res->nh_sel = nhsel; 00612 atomic_inc(&fi->fib_clntref); 00613 return 0; 00614 } 00615 #else 00616 if (nhsel < 1) { 00617 atomic_inc(&fi->fib_clntref); 00618 return 0; 00619 } 00620 #endif 00621 endfor_nexthops(fi); 00622 res->fi = NULL; 00623 return 1; 00624 default: 00625 res->fi = NULL; 00626 printk(KERN_DEBUG "impossible 102\n"); 00627 return -EINVAL; 00628 } 00629 } 00630 return err; 00631 } 00632 00633 /* Find appropriate source address to this destination */ 00634 00635 u32 __fib_res_prefsrc(struct fib_result *res) 00636 { 00637 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 00638 } 00639 00640 int 00641 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 00642 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, 00643 struct fib_info *fi) 00644 { 00645 struct rtmsg *rtm; 00646 struct nlmsghdr *nlh; 00647 unsigned char *b = skb->tail; 00648 00649 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); 00650 rtm = NLMSG_DATA(nlh); 00651 rtm->rtm_family = AF_INET; 00652 rtm->rtm_dst_len = dst_len; 00653 rtm->rtm_src_len = 0; 00654 rtm->rtm_tos = tos; 00655 rtm->rtm_table = tb_id; 00656 rtm->rtm_type = type; 00657 rtm->rtm_flags = fi->fib_flags; 00658 rtm->rtm_scope = scope; 00659 if (rtm->rtm_dst_len) 00660 RTA_PUT(skb, RTA_DST, 4, dst); 00661 rtm->rtm_protocol = fi->fib_protocol; 00662 if (fi->fib_priority) 00663 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); 00664 #ifdef CONFIG_NET_CLS_ROUTE 00665 if (fi->fib_nh[0].nh_tclassid) 00666 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); 00667 #endif 00668 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 00669 goto rtattr_failure; 00670 if (fi->fib_prefsrc) 00671 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); 00672 if (fi->fib_nhs == 1) { 00673 if (fi->fib_nh->nh_gw) 00674 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); 00675 if (fi->fib_nh->nh_oif) 00676 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); 00677 } 00678 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00679 if (fi->fib_nhs > 1) { 00680 struct rtnexthop *nhp; 00681 struct rtattr *mp_head; 00682 if (skb_tailroom(skb) <= RTA_SPACE(0)) 00683 goto rtattr_failure; 00684 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); 00685 00686 for_nexthops(fi) { 00687 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 00688 goto rtattr_failure; 00689 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 00690 nhp->rtnh_flags = nh->nh_flags & 0xFF; 00691 nhp->rtnh_hops = nh->nh_weight-1; 00692 nhp->rtnh_ifindex = nh->nh_oif; 00693 if (nh->nh_gw) 00694 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); 00695 nhp->rtnh_len = skb->tail - (unsigned char*)nhp; 00696 } endfor_nexthops(fi); 00697 mp_head->rta_type = RTA_MULTIPATH; 00698 mp_head->rta_len = skb->tail - (u8*)mp_head; 00699 } 00700 #endif 00701 nlh->nlmsg_len = skb->tail - b; 00702 return skb->len; 00703 00704 nlmsg_failure: 00705 rtattr_failure: 00706 skb_trim(skb, b - skb->data); 00707 return -1; 00708 } 00709 00710 #ifndef CONFIG_IP_NOSIOCRT 00711 00712 int 00713 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, 00714 struct kern_rta *rta, struct rtentry *r) 00715 { 00716 int plen; 00717 u32 *ptr; 00718 00719 memset(rtm, 0, sizeof(*rtm)); 00720 memset(rta, 0, sizeof(*rta)); 00721 00722 if (r->rt_dst.sa_family != AF_INET) 00723 return -EAFNOSUPPORT; 00724 00725 /* Check mask for validity: 00726 a) it must be contiguous. 00727 b) destination must have all host bits clear. 00728 c) if application forgot to set correct family (AF_INET), 00729 reject request unless it is absolutely clear i.e. 00730 both family and mask are zero. 00731 */ 00732 plen = 32; 00733 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; 00734 if (!(r->rt_flags&RTF_HOST)) { 00735 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; 00736 if (r->rt_genmask.sa_family != AF_INET) { 00737 if (mask || r->rt_genmask.sa_family) 00738 return -EAFNOSUPPORT; 00739 } 00740 if (bad_mask(mask, *ptr)) 00741 return -EINVAL; 00742 plen = inet_mask_len(mask); 00743 } 00744 00745 nl->nlmsg_flags = NLM_F_REQUEST; 00746 nl->nlmsg_pid = 0; 00747 nl->nlmsg_seq = 0; 00748 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); 00749 if (cmd == SIOCDELRT) { 00750 nl->nlmsg_type = RTM_DELROUTE; 00751 nl->nlmsg_flags = 0; 00752 } else { 00753 nl->nlmsg_type = RTM_NEWROUTE; 00754 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; 00755 rtm->rtm_protocol = RTPROT_BOOT; 00756 } 00757 00758 rtm->rtm_dst_len = plen; 00759 rta->rta_dst = ptr; 00760 00761 if (r->rt_metric) { 00762 *(u32*)&r->rt_pad3 = r->rt_metric - 1; 00763 rta->rta_priority = (u32*)&r->rt_pad3; 00764 } 00765 if (r->rt_flags&RTF_REJECT) { 00766 rtm->rtm_scope = RT_SCOPE_HOST; 00767 rtm->rtm_type = RTN_UNREACHABLE; 00768 return 0; 00769 } 00770 rtm->rtm_scope = RT_SCOPE_NOWHERE; 00771 rtm->rtm_type = RTN_UNICAST; 00772 00773 if (r->rt_dev) { 00774 char *colon; 00775 struct net_device *dev; 00776 char devname[IFNAMSIZ]; 00777 00778 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1)) 00779 return -EFAULT; 00780 devname[IFNAMSIZ-1] = 0; 00781 colon = strchr(devname, ':'); 00782 if (colon) 00783 *colon = 0; 00784 dev = __dev_get_by_name(devname); 00785 if (!dev) 00786 return -ENODEV; 00787 rta->rta_oif = &dev->ifindex; 00788 if (colon) { 00789 struct in_ifaddr *ifa; 00790 struct in_device *in_dev = __in_dev_get(dev); 00791 if (!in_dev) 00792 return -ENODEV; 00793 *colon = ':'; 00794 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 00795 if (strcmp(ifa->ifa_label, devname) == 0) 00796 break; 00797 if (ifa == NULL) 00798 return -ENODEV; 00799 rta->rta_prefsrc = &ifa->ifa_local; 00800 } 00801 } 00802 00803 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; 00804 if (r->rt_gateway.sa_family == AF_INET && *ptr) { 00805 rta->rta_gw = ptr; 00806 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST) 00807 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 00808 } 00809 00810 if (cmd == SIOCDELRT) 00811 return 0; 00812 00813 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) 00814 return -EINVAL; 00815 00816 if (rtm->rtm_scope == RT_SCOPE_NOWHERE) 00817 rtm->rtm_scope = RT_SCOPE_LINK; 00818 00819 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { 00820 struct rtattr *rec; 00821 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); 00822 if (mx == NULL) 00823 return -ENOMEM; 00824 rta->rta_mx = mx; 00825 mx->rta_type = RTA_METRICS; 00826 mx->rta_len = RTA_LENGTH(0); 00827 if (r->rt_flags&RTF_MTU) { 00828 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); 00829 rec->rta_type = RTAX_ADVMSS; 00830 rec->rta_len = RTA_LENGTH(4); 00831 mx->rta_len += RTA_LENGTH(4); 00832 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40; 00833 } 00834 if (r->rt_flags&RTF_WINDOW) { 00835 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); 00836 rec->rta_type = RTAX_WINDOW; 00837 rec->rta_len = RTA_LENGTH(4); 00838 mx->rta_len += RTA_LENGTH(4); 00839 *(u32*)RTA_DATA(rec) = r->rt_window; 00840 } 00841 if (r->rt_flags&RTF_IRTT) { 00842 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); 00843 rec->rta_type = RTAX_RTT; 00844 rec->rta_len = RTA_LENGTH(4); 00845 mx->rta_len += RTA_LENGTH(4); 00846 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3; 00847 } 00848 } 00849 return 0; 00850 } 00851 00852 #endif 00853 00854 /* 00855 Update FIB if: 00856 - local address disappeared -> we must delete all the entries 00857 referring to it. 00858 - device went down -> we must shutdown all nexthops going via it. 00859 */ 00860 00861 int fib_sync_down(u32 local, struct net_device *dev, int force) 00862 { 00863 int ret = 0; 00864 int scope = RT_SCOPE_NOWHERE; 00865 00866 if (force) 00867 scope = -1; 00868 00869 for_fib_info() { 00870 if (local && fi->fib_prefsrc == local) { 00871 fi->fib_flags |= RTNH_F_DEAD; 00872 ret++; 00873 } else if (dev && fi->fib_nhs) { 00874 int dead = 0; 00875 00876 change_nexthops(fi) { 00877 if (nh->nh_flags&RTNH_F_DEAD) 00878 dead++; 00879 else if (nh->nh_dev == dev && 00880 nh->nh_scope != scope) { 00881 nh->nh_flags |= RTNH_F_DEAD; 00882 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00883 spin_lock_bh(&fib_multipath_lock); 00884 fi->fib_power -= nh->nh_power; 00885 nh->nh_power = 0; 00886 spin_unlock_bh(&fib_multipath_lock); 00887 #endif 00888 dead++; 00889 } 00890 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00891 if (force > 1 && nh->nh_dev == dev) { 00892 dead = fi->fib_nhs; 00893 break; 00894 } 00895 #endif 00896 } endfor_nexthops(fi) 00897 if (dead == fi->fib_nhs) { 00898 fi->fib_flags |= RTNH_F_DEAD; 00899 ret++; 00900 } 00901 } 00902 } endfor_fib_info(); 00903 return ret; 00904 } 00905 00906 #ifdef CONFIG_IP_ROUTE_MULTIPATH 00907 00908 /* 00909 Dead device goes up. We wake up dead nexthops. 00910 It takes sense only on multipath routes. 00911 */ 00912 00913 int fib_sync_up(struct net_device *dev) 00914 { 00915 int ret = 0; 00916 00917 if (!(dev->flags&IFF_UP)) 00918 return 0; 00919 00920 for_fib_info() { 00921 int alive = 0; 00922 00923 change_nexthops(fi) { 00924 if (!(nh->nh_flags&RTNH_F_DEAD)) { 00925 alive++; 00926 continue; 00927 } 00928 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) 00929 continue; 00930 if (nh->nh_dev != dev || __in_dev_get(dev) == NULL) 00931 continue; 00932 alive++; 00933 spin_lock_bh(&fib_multipath_lock); 00934 nh->nh_power = 0; 00935 nh->nh_flags &= ~RTNH_F_DEAD; 00936 spin_unlock_bh(&fib_multipath_lock); 00937 } endfor_nexthops(fi) 00938 00939 if (alive > 0) { 00940 fi->fib_flags &= ~RTNH_F_DEAD; 00941 ret++; 00942 } 00943 } endfor_fib_info(); 00944 return ret; 00945 } 00946 00947 /* 00948 The algorithm is suboptimal, but it provides really 00949 fair weighted route distribution. 00950 */ 00951 00952 void fib_select_multipath(const struct rt_key *key, struct fib_result *res) 00953 { 00954 struct fib_info *fi = res->fi; 00955 int w; 00956 00957 spin_lock_bh(&fib_multipath_lock); 00958 if (fi->fib_power <= 0) { 00959 int power = 0; 00960 change_nexthops(fi) { 00961 if (!(nh->nh_flags&RTNH_F_DEAD)) { 00962 power += nh->nh_weight; 00963 nh->nh_power = nh->nh_weight; 00964 } 00965 } endfor_nexthops(fi); 00966 fi->fib_power = power; 00967 if (power <= 0) { 00968 spin_unlock_bh(&fib_multipath_lock); 00969 /* Race condition: route has just become dead. */ 00970 res->nh_sel = 0; 00971 return; 00972 } 00973 } 00974 00975 00976 /* w should be random number [0..fi->fib_power-1], 00977 it is pretty bad approximation. 00978 */ 00979 00980 w = jiffies % fi->fib_power; 00981 00982 change_nexthops(fi) { 00983 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { 00984 if ((w -= nh->nh_power) <= 0) { 00985 nh->nh_power--; 00986 fi->fib_power--; 00987 res->nh_sel = nhsel; 00988 spin_unlock_bh(&fib_multipath_lock); 00989 return; 00990 } 00991 } 00992 } endfor_nexthops(fi); 00993 00994 /* Race condition: route has just become dead. */ 00995 res->nh_sel = 0; 00996 spin_unlock_bh(&fib_multipath_lock); 00997 } 00998 #endif 00999 01000 01001 #ifdef CONFIG_PROC_FS 01002 01003 static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi) 01004 { 01005 static unsigned type2flags[RTN_MAX+1] = { 01006 0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0 01007 }; 01008 unsigned flags = type2flags[type]; 01009 01010 if (fi && fi->fib_nh->nh_gw) 01011 flags |= RTF_GATEWAY; 01012 if (mask == 0xFFFFFFFF) 01013 flags |= RTF_HOST; 01014 if (!dead) 01015 flags |= RTF_UP; 01016 return flags; 01017 } 01018 01019 void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer) 01020 { 01021 int len; 01022 unsigned flags = fib_flag_trans(type, dead, mask, fi); 01023 01024 if (fi) { 01025 len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", 01026 fi->fib_dev ? fi->fib_dev->name : "*", prefix, 01027 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority, 01028 mask, (fi->fib_advmss ? fi->fib_advmss+40 : 0), 01029 fi->fib_window, fi->fib_rtt>>3); 01030 } else { 01031 len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", 01032 prefix, 0, 01033 flags, 0, 0, 0, 01034 mask, 0, 0, 0); 01035 } 01036 memset(buffer+len, ' ', 127-len); 01037 buffer[127] = '\n'; 01038 } 01039 01040 #endif

Generated on Wed Dec 1 21:25:30 2004 for Linux 2.4.23 Networking by doxygen 1.3.8