Main Page | Class List | File List | Class Members | File Members

ipmr.c

Go to the documentation of this file.
00001 /* 00002 * IP multicast routing support for mrouted 3.6/3.8 00003 * 00004 * (c) 1995 Alan Cox, <alan@redhat.com> 00005 * Linux Consultancy and Custom Driver Development 00006 * 00007 * This program is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU General Public License 00009 * as published by the Free Software Foundation; either version 00010 * 2 of the License, or (at your option) any later version. 00011 * 00012 * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $ 00013 * 00014 * Fixes: 00015 * Michael Chastain : Incorrect size of copying. 00016 * Alan Cox : Added the cache manager code 00017 * Alan Cox : Fixed the clone/copy bug and device race. 00018 * Mike McLagan : Routing by source 00019 * Malcolm Beattie : Buffer handling fixes. 00020 * Alexey Kuznetsov : Double buffer free and other fixes. 00021 * SVR Anand : Fixed several multicast bugs and problems. 00022 * Alexey Kuznetsov : Status, optimisations and more. 00023 * Brad Parker : Better behaviour on mrouted upcall 00024 * overflow. 00025 * Carlos Picoto : PIMv1 Support 00026 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 00027 * Relax this requrement to work with older peers. 00028 * 00029 */ 00030 00031 #include <linux/config.h> 00032 #include <asm/system.h> 00033 #include <asm/uaccess.h> 00034 #include <linux/types.h> 00035 #include <linux/sched.h> 00036 #include <linux/errno.h> 00037 #include <linux/timer.h> 00038 #include <linux/mm.h> 00039 #include <linux/kernel.h> 00040 #include <linux/fcntl.h> 00041 #include <linux/stat.h> 00042 #include <linux/socket.h> 00043 #include <linux/in.h> 00044 #include <linux/inet.h> 00045 #include <linux/netdevice.h> 00046 #include <linux/inetdevice.h> 00047 #include <linux/igmp.h> 00048 #include <linux/proc_fs.h> 00049 #include <linux/mroute.h> 00050 #include <linux/init.h> 00051 #include <net/ip.h> 00052 #include <net/protocol.h> 00053 #include <linux/skbuff.h> 00054 #include <net/sock.h> 00055 #include <net/icmp.h> 00056 #include <net/udp.h> 00057 #include <net/raw.h> 00058 #include <linux/notifier.h> 00059 #include <linux/if_arp.h> 00060 #include <linux/netfilter_ipv4.h> 00061 #include <net/ipip.h> 00062 #include <net/checksum.h> 00063 00064 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 00065 #define CONFIG_IP_PIMSM 1 00066 #endif 00067 00068 static struct sock *mroute_socket; 00069 00070 00071 /* Big lock, protecting vif table, mrt cache and mroute socket state. 00072 Note that the changes are semaphored via rtnl_lock. 00073 */ 00074 00075 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED; 00076 00077 /* 00078 * Multicast router control variables 00079 */ 00080 00081 static struct vif_device vif_table[MAXVIFS]; /* Devices */ 00082 static int maxvif; 00083 00084 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL) 00085 00086 int mroute_do_assert; /* Set in PIM assert */ 00087 int mroute_do_pim; 00088 00089 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ 00090 00091 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ 00092 atomic_t cache_resolve_queue_len; /* Size of unresolved */ 00093 00094 /* Special spinlock for queue of unresolved entries */ 00095 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED; 00096 00097 /* We return to original Alan's scheme. Hash table of resolved 00098 entries is changed only in process context and protected 00099 with weak lock mrt_lock. Queue of unresolved entries is protected 00100 with strong spinlock mfc_unres_lock. 00101 00102 In this case data path is free of exclusive locks at all. 00103 */ 00104 00105 kmem_cache_t *mrt_cachep; 00106 00107 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); 00108 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); 00109 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); 00110 00111 extern struct inet_protocol pim_protocol; 00112 00113 static struct timer_list ipmr_expire_timer; 00114 00115 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 00116 00117 static 00118 struct net_device *ipmr_new_tunnel(struct vifctl *v) 00119 { 00120 struct net_device *dev; 00121 00122 dev = __dev_get_by_name("tunl0"); 00123 00124 if (dev) { 00125 int err; 00126 struct ifreq ifr; 00127 mm_segment_t oldfs; 00128 struct ip_tunnel_parm p; 00129 struct in_device *in_dev; 00130 00131 memset(&p, 0, sizeof(p)); 00132 p.iph.daddr = v->vifc_rmt_addr.s_addr; 00133 p.iph.saddr = v->vifc_lcl_addr.s_addr; 00134 p.iph.version = 4; 00135 p.iph.ihl = 5; 00136 p.iph.protocol = IPPROTO_IPIP; 00137 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 00138 ifr.ifr_ifru.ifru_data = (void*)&p; 00139 00140 oldfs = get_fs(); set_fs(KERNEL_DS); 00141 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); 00142 set_fs(oldfs); 00143 00144 dev = NULL; 00145 00146 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { 00147 dev->flags |= IFF_MULTICAST; 00148 00149 in_dev = __in_dev_get(dev); 00150 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) 00151 goto failure; 00152 in_dev->cnf.rp_filter = 0; 00153 00154 if (dev_open(dev)) 00155 goto failure; 00156 } 00157 } 00158 return dev; 00159 00160 failure: 00161 unregister_netdevice(dev); 00162 return NULL; 00163 } 00164 00165 #ifdef CONFIG_IP_PIMSM 00166 00167 static int reg_vif_num = -1; 00168 00169 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 00170 { 00171 read_lock(&mrt_lock); 00172 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len; 00173 ((struct net_device_stats*)dev->priv)->tx_packets++; 00174 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); 00175 read_unlock(&mrt_lock); 00176 kfree_skb(skb); 00177 return 0; 00178 } 00179 00180 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) 00181 { 00182 return (struct net_device_stats*)dev->priv; 00183 } 00184 00185 static 00186 struct net_device *ipmr_reg_vif(struct vifctl *v) 00187 { 00188 struct net_device *dev; 00189 struct in_device *in_dev; 00190 int size; 00191 00192 size = sizeof(*dev) + sizeof(struct net_device_stats); 00193 dev = kmalloc(size, GFP_KERNEL); 00194 if (!dev) 00195 return NULL; 00196 00197 memset(dev, 0, size); 00198 00199 dev->priv = dev + 1; 00200 00201 strcpy(dev->name, "pimreg"); 00202 00203 dev->type = ARPHRD_PIMREG; 00204 dev->mtu = 1500 - sizeof(struct iphdr) - 8; 00205 dev->flags = IFF_NOARP; 00206 dev->hard_start_xmit = reg_vif_xmit; 00207 dev->get_stats = reg_vif_get_stats; 00208 dev->features |= NETIF_F_DYNALLOC; 00209 00210 if (register_netdevice(dev)) { 00211 kfree(dev); 00212 return NULL; 00213 } 00214 dev->iflink = 0; 00215 00216 if ((in_dev = inetdev_init(dev)) == NULL) 00217 goto failure; 00218 00219 in_dev->cnf.rp_filter = 0; 00220 00221 if (dev_open(dev)) 00222 goto failure; 00223 00224 return dev; 00225 00226 failure: 00227 unregister_netdevice(dev); 00228 return NULL; 00229 } 00230 #endif 00231 00232 /* 00233 * Delete a VIF entry 00234 */ 00235 00236 static int vif_delete(int vifi) 00237 { 00238 struct vif_device *v; 00239 struct net_device *dev; 00240 struct in_device *in_dev; 00241 00242 if (vifi < 0 || vifi >= maxvif) 00243 return -EADDRNOTAVAIL; 00244 00245 v = &vif_table[vifi]; 00246 00247 write_lock_bh(&mrt_lock); 00248 dev = v->dev; 00249 v->dev = NULL; 00250 00251 if (!dev) { 00252 write_unlock_bh(&mrt_lock); 00253 return -EADDRNOTAVAIL; 00254 } 00255 00256 #ifdef CONFIG_IP_PIMSM 00257 if (vifi == reg_vif_num) 00258 reg_vif_num = -1; 00259 #endif 00260 00261 if (vifi+1 == maxvif) { 00262 int tmp; 00263 for (tmp=vifi-1; tmp>=0; tmp--) { 00264 if (VIF_EXISTS(tmp)) 00265 break; 00266 } 00267 maxvif = tmp+1; 00268 } 00269 00270 write_unlock_bh(&mrt_lock); 00271 00272 dev_set_allmulti(dev, -1); 00273 00274 if ((in_dev = __in_dev_get(dev)) != NULL) { 00275 in_dev->cnf.mc_forwarding--; 00276 ip_rt_multicast_event(in_dev); 00277 } 00278 00279 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 00280 unregister_netdevice(dev); 00281 00282 dev_put(dev); 00283 return 0; 00284 } 00285 00286 /* Destroy an unresolved cache entry, killing queued skbs 00287 and reporting error to netlink readers. 00288 */ 00289 00290 static void ipmr_destroy_unres(struct mfc_cache *c) 00291 { 00292 struct sk_buff *skb; 00293 00294 atomic_dec(&cache_resolve_queue_len); 00295 00296 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { 00297 if (skb->nh.iph->version == 0) { 00298 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 00299 nlh->nlmsg_type = NLMSG_ERROR; 00300 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 00301 skb_trim(skb, nlh->nlmsg_len); 00302 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT; 00303 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); 00304 } else 00305 kfree_skb(skb); 00306 } 00307 00308 kmem_cache_free(mrt_cachep, c); 00309 } 00310 00311 00312 /* Single timer process for all the unresolved queue. */ 00313 00314 void ipmr_expire_process(unsigned long dummy) 00315 { 00316 unsigned long now; 00317 unsigned long expires; 00318 struct mfc_cache *c, **cp; 00319 00320 if (!spin_trylock(&mfc_unres_lock)) { 00321 mod_timer(&ipmr_expire_timer, jiffies+HZ/10); 00322 return; 00323 } 00324 00325 if (atomic_read(&cache_resolve_queue_len) == 0) 00326 goto out; 00327 00328 now = jiffies; 00329 expires = 10*HZ; 00330 cp = &mfc_unres_queue; 00331 00332 while ((c=*cp) != NULL) { 00333 long interval = c->mfc_un.unres.expires - now; 00334 00335 if (interval > 0) { 00336 if (interval < expires) 00337 expires = interval; 00338 cp = &c->next; 00339 continue; 00340 } 00341 00342 *cp = c->next; 00343 00344 ipmr_destroy_unres(c); 00345 } 00346 00347 if (atomic_read(&cache_resolve_queue_len)) 00348 mod_timer(&ipmr_expire_timer, jiffies + expires); 00349 00350 out: 00351 spin_unlock(&mfc_unres_lock); 00352 } 00353 00354 /* Fill oifs list. It is called under write locked mrt_lock. */ 00355 00356 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls) 00357 { 00358 int vifi; 00359 00360 cache->mfc_un.res.minvif = MAXVIFS; 00361 cache->mfc_un.res.maxvif = 0; 00362 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 00363 00364 for (vifi=0; vifi<maxvif; vifi++) { 00365 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { 00366 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 00367 if (cache->mfc_un.res.minvif > vifi) 00368 cache->mfc_un.res.minvif = vifi; 00369 if (cache->mfc_un.res.maxvif <= vifi) 00370 cache->mfc_un.res.maxvif = vifi + 1; 00371 } 00372 } 00373 } 00374 00375 static int vif_add(struct vifctl *vifc, int mrtsock) 00376 { 00377 int vifi = vifc->vifc_vifi; 00378 struct vif_device *v = &vif_table[vifi]; 00379 struct net_device *dev; 00380 struct in_device *in_dev; 00381 00382 /* Is vif busy ? */ 00383 if (VIF_EXISTS(vifi)) 00384 return -EADDRINUSE; 00385 00386 switch (vifc->vifc_flags) { 00387 #ifdef CONFIG_IP_PIMSM 00388 case VIFF_REGISTER: 00389 /* 00390 * Special Purpose VIF in PIM 00391 * All the packets will be sent to the daemon 00392 */ 00393 if (reg_vif_num >= 0) 00394 return -EADDRINUSE; 00395 dev = ipmr_reg_vif(vifc); 00396 if (!dev) 00397 return -ENOBUFS; 00398 break; 00399 #endif 00400 case VIFF_TUNNEL: 00401 dev = ipmr_new_tunnel(vifc); 00402 if (!dev) 00403 return -ENOBUFS; 00404 break; 00405 case 0: 00406 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr); 00407 if (!dev) 00408 return -EADDRNOTAVAIL; 00409 __dev_put(dev); 00410 break; 00411 default: 00412 return -EINVAL; 00413 } 00414 00415 if ((in_dev = __in_dev_get(dev)) == NULL) 00416 return -EADDRNOTAVAIL; 00417 in_dev->cnf.mc_forwarding++; 00418 dev_set_allmulti(dev, +1); 00419 ip_rt_multicast_event(in_dev); 00420 00421 /* 00422 * Fill in the VIF structures 00423 */ 00424 v->rate_limit=vifc->vifc_rate_limit; 00425 v->local=vifc->vifc_lcl_addr.s_addr; 00426 v->remote=vifc->vifc_rmt_addr.s_addr; 00427 v->flags=vifc->vifc_flags; 00428 if (!mrtsock) 00429 v->flags |= VIFF_STATIC; 00430 v->threshold=vifc->vifc_threshold; 00431 v->bytes_in = 0; 00432 v->bytes_out = 0; 00433 v->pkt_in = 0; 00434 v->pkt_out = 0; 00435 v->link = dev->ifindex; 00436 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 00437 v->link = dev->iflink; 00438 00439 /* And finish update writing critical data */ 00440 write_lock_bh(&mrt_lock); 00441 dev_hold(dev); 00442 v->dev=dev; 00443 #ifdef CONFIG_IP_PIMSM 00444 if (v->flags&VIFF_REGISTER) 00445 reg_vif_num = vifi; 00446 #endif 00447 if (vifi+1 > maxvif) 00448 maxvif = vifi+1; 00449 write_unlock_bh(&mrt_lock); 00450 return 0; 00451 } 00452 00453 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp) 00454 { 00455 int line=MFC_HASH(mcastgrp,origin); 00456 struct mfc_cache *c; 00457 00458 for (c=mfc_cache_array[line]; c; c = c->next) { 00459 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) 00460 break; 00461 } 00462 return c; 00463 } 00464 00465 /* 00466 * Allocate a multicast cache entry 00467 */ 00468 static struct mfc_cache *ipmr_cache_alloc(void) 00469 { 00470 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL); 00471 if(c==NULL) 00472 return NULL; 00473 memset(c, 0, sizeof(*c)); 00474 c->mfc_un.res.minvif = MAXVIFS; 00475 return c; 00476 } 00477 00478 static struct mfc_cache *ipmr_cache_alloc_unres(void) 00479 { 00480 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); 00481 if(c==NULL) 00482 return NULL; 00483 memset(c, 0, sizeof(*c)); 00484 skb_queue_head_init(&c->mfc_un.unres.unresolved); 00485 c->mfc_un.unres.expires = jiffies + 10*HZ; 00486 return c; 00487 } 00488 00489 /* 00490 * A cache entry has gone into a resolved state from queued 00491 */ 00492 00493 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) 00494 { 00495 struct sk_buff *skb; 00496 00497 /* 00498 * Play the pending entries through our router 00499 */ 00500 00501 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { 00502 if (skb->nh.iph->version == 0) { 00503 int err; 00504 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 00505 00506 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 00507 nlh->nlmsg_len = skb->tail - (u8*)nlh; 00508 } else { 00509 nlh->nlmsg_type = NLMSG_ERROR; 00510 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 00511 skb_trim(skb, nlh->nlmsg_len); 00512 ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE; 00513 } 00514 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); 00515 } else 00516 ip_mr_forward(skb, c, 0); 00517 } 00518 } 00519 00520 /* 00521 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted 00522 * expects the following bizarre scheme. 00523 * 00524 * Called under mrt_lock. 00525 */ 00526 00527 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) 00528 { 00529 struct sk_buff *skb; 00530 int ihl = pkt->nh.iph->ihl<<2; 00531 struct igmphdr *igmp; 00532 struct igmpmsg *msg; 00533 int ret; 00534 00535 #ifdef CONFIG_IP_PIMSM 00536 if (assert == IGMPMSG_WHOLEPKT) 00537 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 00538 else 00539 #endif 00540 skb = alloc_skb(128, GFP_ATOMIC); 00541 00542 if(!skb) 00543 return -ENOBUFS; 00544 00545 #ifdef CONFIG_IP_PIMSM 00546 if (assert == IGMPMSG_WHOLEPKT) { 00547 /* Ugly, but we have no choice with this interface. 00548 Duplicate old header, fix ihl, length etc. 00549 And all this only to mangle msg->im_msgtype and 00550 to set msg->im_mbz to "mbz" :-) 00551 */ 00552 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr)); 00553 skb->nh.raw = skb->h.raw = (u8*)msg; 00554 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr)); 00555 msg->im_msgtype = IGMPMSG_WHOLEPKT; 00556 msg->im_mbz = 0; 00557 msg->im_vif = reg_vif_num; 00558 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2; 00559 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr)); 00560 } else 00561 #endif 00562 { 00563 00564 /* 00565 * Copy the IP header 00566 */ 00567 00568 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl); 00569 memcpy(skb->data,pkt->data,ihl); 00570 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */ 00571 msg = (struct igmpmsg*)skb->nh.iph; 00572 msg->im_vif = vifi; 00573 skb->dst = dst_clone(pkt->dst); 00574 00575 /* 00576 * Add our header 00577 */ 00578 00579 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); 00580 igmp->type = 00581 msg->im_msgtype = assert; 00582 igmp->code = 0; 00583 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */ 00584 skb->h.raw = skb->nh.raw; 00585 } 00586 00587 if (mroute_socket == NULL) { 00588 kfree_skb(skb); 00589 return -EINVAL; 00590 } 00591 00592 /* 00593 * Deliver to mrouted 00594 */ 00595 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { 00596 if (net_ratelimit()) 00597 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 00598 kfree_skb(skb); 00599 } 00600 00601 return ret; 00602 } 00603 00604 /* 00605 * Queue a packet for resolution. It gets locked cache entry! 00606 */ 00607 00608 static int 00609 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) 00610 { 00611 int err; 00612 struct mfc_cache *c; 00613 00614 spin_lock_bh(&mfc_unres_lock); 00615 for (c=mfc_unres_queue; c; c=c->next) { 00616 if (c->mfc_mcastgrp == skb->nh.iph->daddr && 00617 c->mfc_origin == skb->nh.iph->saddr) 00618 break; 00619 } 00620 00621 if (c == NULL) { 00622 /* 00623 * Create a new entry if allowable 00624 */ 00625 00626 if (atomic_read(&cache_resolve_queue_len)>=10 || 00627 (c=ipmr_cache_alloc_unres())==NULL) { 00628 spin_unlock_bh(&mfc_unres_lock); 00629 00630 kfree_skb(skb); 00631 return -ENOBUFS; 00632 } 00633 00634 /* 00635 * Fill in the new cache entry 00636 */ 00637 c->mfc_parent=-1; 00638 c->mfc_origin=skb->nh.iph->saddr; 00639 c->mfc_mcastgrp=skb->nh.iph->daddr; 00640 00641 /* 00642 * Reflect first query at mrouted. 00643 */ 00644 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) { 00645 /* If the report failed throw the cache entry 00646 out - Brad Parker 00647 */ 00648 spin_unlock_bh(&mfc_unres_lock); 00649 00650 kmem_cache_free(mrt_cachep, c); 00651 kfree_skb(skb); 00652 return err; 00653 } 00654 00655 atomic_inc(&cache_resolve_queue_len); 00656 c->next = mfc_unres_queue; 00657 mfc_unres_queue = c; 00658 00659 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); 00660 } 00661 00662 /* 00663 * See if we can append the packet 00664 */ 00665 if (c->mfc_un.unres.unresolved.qlen>3) { 00666 kfree_skb(skb); 00667 err = -ENOBUFS; 00668 } else { 00669 skb_queue_tail(&c->mfc_un.unres.unresolved,skb); 00670 err = 0; 00671 } 00672 00673 spin_unlock_bh(&mfc_unres_lock); 00674 return err; 00675 } 00676 00677 /* 00678 * MFC cache manipulation by user space mroute daemon 00679 */ 00680 00681 int ipmr_mfc_delete(struct mfcctl *mfc) 00682 { 00683 int line; 00684 struct mfc_cache *c, **cp; 00685 00686 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 00687 00688 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 00689 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 00690 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 00691 write_lock_bh(&mrt_lock); 00692 *cp = c->next; 00693 write_unlock_bh(&mrt_lock); 00694 00695 kmem_cache_free(mrt_cachep, c); 00696 return 0; 00697 } 00698 } 00699 return -ENOENT; 00700 } 00701 00702 int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) 00703 { 00704 int line; 00705 struct mfc_cache *uc, *c, **cp; 00706 00707 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 00708 00709 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 00710 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 00711 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) 00712 break; 00713 } 00714 00715 if (c != NULL) { 00716 write_lock_bh(&mrt_lock); 00717 c->mfc_parent = mfc->mfcc_parent; 00718 ipmr_update_threshoulds(c, mfc->mfcc_ttls); 00719 if (!mrtsock) 00720 c->mfc_flags |= MFC_STATIC; 00721 write_unlock_bh(&mrt_lock); 00722 return 0; 00723 } 00724 00725 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) 00726 return -EINVAL; 00727 00728 c=ipmr_cache_alloc(); 00729 if (c==NULL) 00730 return -ENOMEM; 00731 00732 c->mfc_origin=mfc->mfcc_origin.s_addr; 00733 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; 00734 c->mfc_parent=mfc->mfcc_parent; 00735 ipmr_update_threshoulds(c, mfc->mfcc_ttls); 00736 if (!mrtsock) 00737 c->mfc_flags |= MFC_STATIC; 00738 00739 write_lock_bh(&mrt_lock); 00740 c->next = mfc_cache_array[line]; 00741 mfc_cache_array[line] = c; 00742 write_unlock_bh(&mrt_lock); 00743 00744 /* 00745 * Check to see if we resolved a queued list. If so we 00746 * need to send on the frames and tidy up. 00747 */ 00748 spin_lock_bh(&mfc_unres_lock); 00749 for (cp = &mfc_unres_queue; (uc=*cp) != NULL; 00750 cp = &uc->next) { 00751 if (uc->mfc_origin == c->mfc_origin && 00752 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 00753 *cp = uc->next; 00754 if (atomic_dec_and_test(&cache_resolve_queue_len)) 00755 del_timer(&ipmr_expire_timer); 00756 break; 00757 } 00758 } 00759 spin_unlock_bh(&mfc_unres_lock); 00760 00761 if (uc) { 00762 ipmr_cache_resolve(uc, c); 00763 kmem_cache_free(mrt_cachep, uc); 00764 } 00765 return 0; 00766 } 00767 00768 /* 00769 * Close the multicast socket, and clear the vif tables etc 00770 */ 00771 00772 static void mroute_clean_tables(struct sock *sk) 00773 { 00774 int i; 00775 00776 /* 00777 * Shut down all active vif entries 00778 */ 00779 for(i=0; i<maxvif; i++) { 00780 if (!(vif_table[i].flags&VIFF_STATIC)) 00781 vif_delete(i); 00782 } 00783 00784 /* 00785 * Wipe the cache 00786 */ 00787 for (i=0;i<MFC_LINES;i++) { 00788 struct mfc_cache *c, **cp; 00789 00790 cp = &mfc_cache_array[i]; 00791 while ((c = *cp) != NULL) { 00792 if (c->mfc_flags&MFC_STATIC) { 00793 cp = &c->next; 00794 continue; 00795 } 00796 write_lock_bh(&mrt_lock); 00797 *cp = c->next; 00798 write_unlock_bh(&mrt_lock); 00799 00800 kmem_cache_free(mrt_cachep, c); 00801 } 00802 } 00803 00804 if (atomic_read(&cache_resolve_queue_len) != 0) { 00805 struct mfc_cache *c; 00806 00807 spin_lock_bh(&mfc_unres_lock); 00808 while (mfc_unres_queue != NULL) { 00809 c = mfc_unres_queue; 00810 mfc_unres_queue = c->next; 00811 spin_unlock_bh(&mfc_unres_lock); 00812 00813 ipmr_destroy_unres(c); 00814 00815 spin_lock_bh(&mfc_unres_lock); 00816 } 00817 spin_unlock_bh(&mfc_unres_lock); 00818 } 00819 } 00820 00821 static void mrtsock_destruct(struct sock *sk) 00822 { 00823 rtnl_lock(); 00824 if (sk == mroute_socket) { 00825 ipv4_devconf.mc_forwarding--; 00826 00827 write_lock_bh(&mrt_lock); 00828 mroute_socket=NULL; 00829 write_unlock_bh(&mrt_lock); 00830 00831 mroute_clean_tables(sk); 00832 } 00833 rtnl_unlock(); 00834 } 00835 00836 /* 00837 * Socket options and virtual interface manipulation. The whole 00838 * virtual interface system is a complete heap, but unfortunately 00839 * that's how BSD mrouted happens to think. Maybe one day with a proper 00840 * MOSPF/PIM router set up we can clean this up. 00841 */ 00842 00843 int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen) 00844 { 00845 int ret; 00846 struct vifctl vif; 00847 struct mfcctl mfc; 00848 00849 if(optname!=MRT_INIT) 00850 { 00851 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN)) 00852 return -EACCES; 00853 } 00854 00855 switch(optname) 00856 { 00857 case MRT_INIT: 00858 if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP) 00859 return -EOPNOTSUPP; 00860 if(optlen!=sizeof(int)) 00861 return -ENOPROTOOPT; 00862 00863 rtnl_lock(); 00864 if (mroute_socket) { 00865 rtnl_unlock(); 00866 return -EADDRINUSE; 00867 } 00868 00869 ret = ip_ra_control(sk, 1, mrtsock_destruct); 00870 if (ret == 0) { 00871 write_lock_bh(&mrt_lock); 00872 mroute_socket=sk; 00873 write_unlock_bh(&mrt_lock); 00874 00875 ipv4_devconf.mc_forwarding++; 00876 } 00877 rtnl_unlock(); 00878 return ret; 00879 case MRT_DONE: 00880 if (sk!=mroute_socket) 00881 return -EACCES; 00882 return ip_ra_control(sk, 0, NULL); 00883 case MRT_ADD_VIF: 00884 case MRT_DEL_VIF: 00885 if(optlen!=sizeof(vif)) 00886 return -EINVAL; 00887 if (copy_from_user(&vif,optval,sizeof(vif))) 00888 return -EFAULT; 00889 if(vif.vifc_vifi >= MAXVIFS) 00890 return -ENFILE; 00891 rtnl_lock(); 00892 if (optname==MRT_ADD_VIF) { 00893 ret = vif_add(&vif, sk==mroute_socket); 00894 } else { 00895 ret = vif_delete(vif.vifc_vifi); 00896 } 00897 rtnl_unlock(); 00898 return ret; 00899 00900 /* 00901 * Manipulate the forwarding caches. These live 00902 * in a sort of kernel/user symbiosis. 00903 */ 00904 case MRT_ADD_MFC: 00905 case MRT_DEL_MFC: 00906 if(optlen!=sizeof(mfc)) 00907 return -EINVAL; 00908 if (copy_from_user(&mfc,optval, sizeof(mfc))) 00909 return -EFAULT; 00910 rtnl_lock(); 00911 if (optname==MRT_DEL_MFC) 00912 ret = ipmr_mfc_delete(&mfc); 00913 else 00914 ret = ipmr_mfc_add(&mfc, sk==mroute_socket); 00915 rtnl_unlock(); 00916 return ret; 00917 /* 00918 * Control PIM assert. 00919 */ 00920 case MRT_ASSERT: 00921 { 00922 int v; 00923 if(get_user(v,(int *)optval)) 00924 return -EFAULT; 00925 mroute_do_assert=(v)?1:0; 00926 return 0; 00927 } 00928 #ifdef CONFIG_IP_PIMSM 00929 case MRT_PIM: 00930 { 00931 int v; 00932 if(get_user(v,(int *)optval)) 00933 return -EFAULT; 00934 v = (v)?1:0; 00935 rtnl_lock(); 00936 if (v != mroute_do_pim) { 00937 mroute_do_pim = v; 00938 mroute_do_assert = v; 00939 #ifdef CONFIG_IP_PIMSM_V2 00940 if (mroute_do_pim) 00941 inet_add_protocol(&pim_protocol); 00942 else 00943 inet_del_protocol(&pim_protocol); 00944 #endif 00945 } 00946 rtnl_unlock(); 00947 return 0; 00948 } 00949 #endif 00950 /* 00951 * Spurious command, or MRT_VERSION which you cannot 00952 * set. 00953 */ 00954 default: 00955 return -ENOPROTOOPT; 00956 } 00957 } 00958 00959 /* 00960 * Getsock opt support for the multicast routing system. 00961 */ 00962 00963 int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen) 00964 { 00965 int olr; 00966 int val; 00967 00968 if(optname!=MRT_VERSION && 00969 #ifdef CONFIG_IP_PIMSM 00970 optname!=MRT_PIM && 00971 #endif 00972 optname!=MRT_ASSERT) 00973 return -ENOPROTOOPT; 00974 00975 if (get_user(olr, optlen)) 00976 return -EFAULT; 00977 00978 olr = min_t(unsigned int, olr, sizeof(int)); 00979 if (olr < 0) 00980 return -EINVAL; 00981 00982 if(put_user(olr,optlen)) 00983 return -EFAULT; 00984 if(optname==MRT_VERSION) 00985 val=0x0305; 00986 #ifdef CONFIG_IP_PIMSM 00987 else if(optname==MRT_PIM) 00988 val=mroute_do_pim; 00989 #endif 00990 else 00991 val=mroute_do_assert; 00992 if(copy_to_user(optval,&val,olr)) 00993 return -EFAULT; 00994 return 0; 00995 } 00996 00997 /* 00998 * The IP multicast ioctl support routines. 00999 */ 01000 01001 int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg) 01002 { 01003 struct sioc_sg_req sr; 01004 struct sioc_vif_req vr; 01005 struct vif_device *vif; 01006 struct mfc_cache *c; 01007 01008 switch(cmd) 01009 { 01010 case SIOCGETVIFCNT: 01011 if (copy_from_user(&vr,(void *)arg,sizeof(vr))) 01012 return -EFAULT; 01013 if(vr.vifi>=maxvif) 01014 return -EINVAL; 01015 read_lock(&mrt_lock); 01016 vif=&vif_table[vr.vifi]; 01017 if(VIF_EXISTS(vr.vifi)) { 01018 vr.icount=vif->pkt_in; 01019 vr.ocount=vif->pkt_out; 01020 vr.ibytes=vif->bytes_in; 01021 vr.obytes=vif->bytes_out; 01022 read_unlock(&mrt_lock); 01023 01024 if (copy_to_user((void *)arg,&vr,sizeof(vr))) 01025 return -EFAULT; 01026 return 0; 01027 } 01028 read_unlock(&mrt_lock); 01029 return -EADDRNOTAVAIL; 01030 case SIOCGETSGCNT: 01031 if (copy_from_user(&sr,(void *)arg,sizeof(sr))) 01032 return -EFAULT; 01033 01034 read_lock(&mrt_lock); 01035 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); 01036 if (c) { 01037 sr.pktcnt = c->mfc_un.res.pkt; 01038 sr.bytecnt = c->mfc_un.res.bytes; 01039 sr.wrong_if = c->mfc_un.res.wrong_if; 01040 read_unlock(&mrt_lock); 01041 01042 if (copy_to_user((void *)arg,&sr,sizeof(sr))) 01043 return -EFAULT; 01044 return 0; 01045 } 01046 read_unlock(&mrt_lock); 01047 return -EADDRNOTAVAIL; 01048 default: 01049 return -ENOIOCTLCMD; 01050 } 01051 } 01052 01053 01054 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 01055 { 01056 struct vif_device *v; 01057 int ct; 01058 if (event != NETDEV_UNREGISTER) 01059 return NOTIFY_DONE; 01060 v=&vif_table[0]; 01061 for(ct=0;ct<maxvif;ct++,v++) { 01062 if (v->dev==ptr) 01063 vif_delete(ct); 01064 } 01065 return NOTIFY_DONE; 01066 } 01067 01068 01069 static struct notifier_block ip_mr_notifier={ 01070 ipmr_device_event, 01071 NULL, 01072 0 01073 }; 01074 01075 /* 01076 * Encapsulate a packet by attaching a valid IPIP header to it. 01077 * This avoids tunnel drivers and other mess and gives us the speed so 01078 * important for multicast video. 01079 */ 01080 01081 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr) 01082 { 01083 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr)); 01084 01085 iph->version = 4; 01086 iph->tos = skb->nh.iph->tos; 01087 iph->ttl = skb->nh.iph->ttl; 01088 iph->frag_off = 0; 01089 iph->daddr = daddr; 01090 iph->saddr = saddr; 01091 iph->protocol = IPPROTO_IPIP; 01092 iph->ihl = 5; 01093 iph->tot_len = htons(skb->len); 01094 ip_select_ident(iph, skb->dst, NULL); 01095 ip_send_check(iph); 01096 01097 skb->h.ipiph = skb->nh.iph; 01098 skb->nh.iph = iph; 01099 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 01100 #ifdef CONFIG_NETFILTER 01101 nf_conntrack_put(skb->nfct); 01102 skb->nfct = NULL; 01103 #endif 01104 } 01105 01106 static inline int ipmr_forward_finish(struct sk_buff *skb) 01107 { 01108 struct ip_options *opt = &(IPCB(skb)->opt); 01109 struct dst_entry *dst = skb->dst; 01110 01111 if (unlikely(opt->optlen)) 01112 ip_forward_options(skb); 01113 01114 if (skb->len <= dst->pmtu) 01115 return dst->output(skb); 01116 else 01117 return ip_fragment(skb, dst->output); 01118 } 01119 01120 /* 01121 * Processing handlers for ipmr_forward 01122 */ 01123 01124 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, 01125 int vifi, int last) 01126 { 01127 struct iphdr *iph = skb->nh.iph; 01128 struct vif_device *vif = &vif_table[vifi]; 01129 struct net_device *dev; 01130 struct rtable *rt; 01131 int encap = 0; 01132 struct sk_buff *skb2; 01133 01134 if (vif->dev == NULL) 01135 return; 01136 01137 #ifdef CONFIG_IP_PIMSM 01138 if (vif->flags & VIFF_REGISTER) { 01139 vif->pkt_out++; 01140 vif->bytes_out+=skb->len; 01141 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len; 01142 ((struct net_device_stats*)vif->dev->priv)->tx_packets++; 01143 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); 01144 return; 01145 } 01146 #endif 01147 01148 if (vif->flags&VIFF_TUNNEL) { 01149 if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link)) 01150 return; 01151 encap = sizeof(struct iphdr); 01152 } else { 01153 if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link)) 01154 return; 01155 } 01156 01157 dev = rt->u.dst.dev; 01158 01159 if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) { 01160 /* Do not fragment multicasts. Alas, IPv4 does not 01161 allow to send ICMP, so that packets will disappear 01162 to blackhole. 01163 */ 01164 01165 IP_INC_STATS_BH(IpFragFails); 01166 ip_rt_put(rt); 01167 return; 01168 } 01169 01170 encap += dev->hard_header_len; 01171 01172 if (skb_headroom(skb) < encap || skb_cloned(skb) || !last) 01173 skb2 = skb_realloc_headroom(skb, (encap + 15)&~15); 01174 else if (atomic_read(&skb->users) != 1) 01175 skb2 = skb_clone(skb, GFP_ATOMIC); 01176 else { 01177 atomic_inc(&skb->users); 01178 skb2 = skb; 01179 } 01180 01181 if (skb2 == NULL) { 01182 ip_rt_put(rt); 01183 return; 01184 } 01185 01186 vif->pkt_out++; 01187 vif->bytes_out+=skb->len; 01188 01189 dst_release(skb2->dst); 01190 skb2->dst = &rt->u.dst; 01191 iph = skb2->nh.iph; 01192 ip_decrease_ttl(iph); 01193 01194 /* FIXME: forward and output firewalls used to be called here. 01195 * What do we do with netfilter? -- RR */ 01196 if (vif->flags & VIFF_TUNNEL) { 01197 ip_encap(skb2, vif->local, vif->remote); 01198 /* FIXME: extra output firewall step used to be here. --RR */ 01199 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++; 01200 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len; 01201 } 01202 01203 IPCB(skb2)->flags |= IPSKB_FORWARDED; 01204 01205 /* 01206 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally 01207 * not only before forwarding, but after forwarding on all output 01208 * interfaces. It is clear, if mrouter runs a multicasting 01209 * program, it should receive packets not depending to what interface 01210 * program is joined. 01211 * If we will not make it, the program will have to join on all 01212 * interfaces. On the other hand, multihoming host (or router, but 01213 * not mrouter) cannot join to more than one interface - it will 01214 * result in receiving multiple packets. 01215 */ 01216 NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev, 01217 ipmr_forward_finish); 01218 } 01219 01220 int ipmr_find_vif(struct net_device *dev) 01221 { 01222 int ct; 01223 for (ct=maxvif-1; ct>=0; ct--) { 01224 if (vif_table[ct].dev == dev) 01225 break; 01226 } 01227 return ct; 01228 } 01229 01230 /* "local" means that we should preserve one skb (for local delivery) */ 01231 01232 int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) 01233 { 01234 int psend = -1; 01235 int vif, ct; 01236 01237 vif = cache->mfc_parent; 01238 cache->mfc_un.res.pkt++; 01239 cache->mfc_un.res.bytes += skb->len; 01240 01241 /* 01242 * Wrong interface: drop packet and (maybe) send PIM assert. 01243 */ 01244 if (vif_table[vif].dev != skb->dev) { 01245 int true_vifi; 01246 01247 if (((struct rtable*)skb->dst)->key.iif == 0) { 01248 /* It is our own packet, looped back. 01249 Very complicated situation... 01250 01251 The best workaround until routing daemons will be 01252 fixed is not to redistribute packet, if it was 01253 send through wrong interface. It means, that 01254 multicast applications WILL NOT work for 01255 (S,G), which have default multicast route pointing 01256 to wrong oif. In any case, it is not a good 01257 idea to use multicasting applications on router. 01258 */ 01259 goto dont_forward; 01260 } 01261 01262 cache->mfc_un.res.wrong_if++; 01263 true_vifi = ipmr_find_vif(skb->dev); 01264 01265 if (true_vifi >= 0 && mroute_do_assert && 01266 /* pimsm uses asserts, when switching from RPT to SPT, 01267 so that we cannot check that packet arrived on an oif. 01268 It is bad, but otherwise we would need to move pretty 01269 large chunk of pimd to kernel. Ough... --ANK 01270 */ 01271 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && 01272 jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) { 01273 cache->mfc_un.res.last_assert = jiffies; 01274 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); 01275 } 01276 goto dont_forward; 01277 } 01278 01279 vif_table[vif].pkt_in++; 01280 vif_table[vif].bytes_in+=skb->len; 01281 01282 /* 01283 * Forward the frame 01284 */ 01285 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 01286 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) { 01287 if (psend != -1) 01288 ipmr_queue_xmit(skb, cache, psend, 0); 01289 psend=ct; 01290 } 01291 } 01292 if (psend != -1) 01293 ipmr_queue_xmit(skb, cache, psend, !local); 01294 01295 dont_forward: 01296 if (!local) 01297 kfree_skb(skb); 01298 return 0; 01299 } 01300 01301 01302 /* 01303 * Multicast packets for forwarding arrive here 01304 */ 01305 01306 int ip_mr_input(struct sk_buff *skb) 01307 { 01308 struct mfc_cache *cache; 01309 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; 01310 01311 /* Packet is looped back after forward, it should not be 01312 forwarded second time, but still can be delivered locally. 01313 */ 01314 if (IPCB(skb)->flags&IPSKB_FORWARDED) 01315 goto dont_forward; 01316 01317 if (!local) { 01318 if (IPCB(skb)->opt.router_alert) { 01319 if (ip_call_ra_chain(skb)) 01320 return 0; 01321 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){ 01322 /* IGMPv1 (and broken IGMPv2 implementations sort of 01323 Cisco IOS <= 11.2(8)) do not put router alert 01324 option to IGMP packets destined to routable 01325 groups. It is very bad, because it means 01326 that we can forward NO IGMP messages. 01327 */ 01328 read_lock(&mrt_lock); 01329 if (mroute_socket) { 01330 raw_rcv(mroute_socket, skb); 01331 read_unlock(&mrt_lock); 01332 return 0; 01333 } 01334 read_unlock(&mrt_lock); 01335 } 01336 } 01337 01338 read_lock(&mrt_lock); 01339 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); 01340 01341 /* 01342 * No usable cache entry 01343 */ 01344 if (cache==NULL) { 01345 int vif; 01346 01347 if (local) { 01348 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 01349 ip_local_deliver(skb); 01350 if (skb2 == NULL) { 01351 read_unlock(&mrt_lock); 01352 return -ENOBUFS; 01353 } 01354 skb = skb2; 01355 } 01356 01357 vif = ipmr_find_vif(skb->dev); 01358 if (vif >= 0) { 01359 int err = ipmr_cache_unresolved(vif, skb); 01360 read_unlock(&mrt_lock); 01361 01362 return err; 01363 } 01364 read_unlock(&mrt_lock); 01365 kfree_skb(skb); 01366 return -ENODEV; 01367 } 01368 01369 ip_mr_forward(skb, cache, local); 01370 01371 read_unlock(&mrt_lock); 01372 01373 if (local) 01374 return ip_local_deliver(skb); 01375 01376 return 0; 01377 01378 dont_forward: 01379 if (local) 01380 return ip_local_deliver(skb); 01381 kfree_skb(skb); 01382 return 0; 01383 } 01384 01385 #ifdef CONFIG_IP_PIMSM_V1 01386 /* 01387 * Handle IGMP messages of PIMv1 01388 */ 01389 01390 int pim_rcv_v1(struct sk_buff * skb) 01391 { 01392 struct igmphdr *pim = (struct igmphdr*)skb->h.raw; 01393 struct iphdr *encap; 01394 struct net_device *reg_dev = NULL; 01395 01396 if (skb_is_nonlinear(skb)) { 01397 if (skb_linearize(skb, GFP_ATOMIC) != 0) { 01398 kfree_skb(skb); 01399 return -ENOMEM; 01400 } 01401 pim = (struct igmphdr*)skb->h.raw; 01402 } 01403 01404 if (!mroute_do_pim || 01405 skb->len < sizeof(*pim) + sizeof(*encap) || 01406 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) { 01407 kfree_skb(skb); 01408 return -EINVAL; 01409 } 01410 01411 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr)); 01412 /* 01413 Check that: 01414 a. packet is really destinted to a multicast group 01415 b. packet is not a NULL-REGISTER 01416 c. packet is not truncated 01417 */ 01418 if (!MULTICAST(encap->daddr) || 01419 ntohs(encap->tot_len) == 0 || 01420 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) { 01421 kfree_skb(skb); 01422 return -EINVAL; 01423 } 01424 01425 read_lock(&mrt_lock); 01426 if (reg_vif_num >= 0) 01427 reg_dev = vif_table[reg_vif_num].dev; 01428 if (reg_dev) 01429 dev_hold(reg_dev); 01430 read_unlock(&mrt_lock); 01431 01432 if (reg_dev == NULL) { 01433 kfree_skb(skb); 01434 return -EINVAL; 01435 } 01436 01437 skb->mac.raw = skb->nh.raw; 01438 skb_pull(skb, (u8*)encap - skb->data); 01439 skb->nh.iph = (struct iphdr *)skb->data; 01440 skb->dev = reg_dev; 01441 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); 01442 skb->protocol = htons(ETH_P_IP); 01443 skb->ip_summed = 0; 01444 skb->pkt_type = PACKET_HOST; 01445 dst_release(skb->dst); 01446 skb->dst = NULL; 01447 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; 01448 ((struct net_device_stats*)reg_dev->priv)->rx_packets++; 01449 #ifdef CONFIG_NETFILTER 01450 nf_conntrack_put(skb->nfct); 01451 skb->nfct = NULL; 01452 #endif 01453 netif_rx(skb); 01454 dev_put(reg_dev); 01455 return 0; 01456 } 01457 #endif 01458 01459 #ifdef CONFIG_IP_PIMSM_V2 01460 int pim_rcv(struct sk_buff * skb) 01461 { 01462 struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw; 01463 struct iphdr *encap; 01464 struct net_device *reg_dev = NULL; 01465 01466 if (skb_is_nonlinear(skb)) { 01467 if (skb_linearize(skb, GFP_ATOMIC) != 0) { 01468 kfree_skb(skb); 01469 return -ENOMEM; 01470 } 01471 pim = (struct pimreghdr*)skb->h.raw; 01472 } 01473 01474 if (skb->len < sizeof(*pim) + sizeof(*encap) || 01475 pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 01476 (pim->flags&PIM_NULL_REGISTER) || 01477 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 01478 ip_compute_csum((void *)pim, skb->len))) { 01479 kfree_skb(skb); 01480 return -EINVAL; 01481 } 01482 01483 /* check if the inner packet is destined to mcast group */ 01484 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr)); 01485 if (!MULTICAST(encap->daddr) || 01486 ntohs(encap->tot_len) == 0 || 01487 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) { 01488 kfree_skb(skb); 01489 return -EINVAL; 01490 } 01491 01492 read_lock(&mrt_lock); 01493 if (reg_vif_num >= 0) 01494 reg_dev = vif_table[reg_vif_num].dev; 01495 if (reg_dev) 01496 dev_hold(reg_dev); 01497 read_unlock(&mrt_lock); 01498 01499 if (reg_dev == NULL) { 01500 kfree_skb(skb); 01501 return -EINVAL; 01502 } 01503 01504 skb->mac.raw = skb->nh.raw; 01505 skb_pull(skb, (u8*)encap - skb->data); 01506 skb->nh.iph = (struct iphdr *)skb->data; 01507 skb->dev = reg_dev; 01508 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); 01509 skb->protocol = htons(ETH_P_IP); 01510 skb->ip_summed = 0; 01511 skb->pkt_type = PACKET_HOST; 01512 dst_release(skb->dst); 01513 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; 01514 ((struct net_device_stats*)reg_dev->priv)->rx_packets++; 01515 skb->dst = NULL; 01516 #ifdef CONFIG_NETFILTER 01517 nf_conntrack_put(skb->nfct); 01518 skb->nfct = NULL; 01519 #endif 01520 netif_rx(skb); 01521 dev_put(reg_dev); 01522 return 0; 01523 } 01524 #endif 01525 01526 static int 01527 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) 01528 { 01529 int ct; 01530 struct rtnexthop *nhp; 01531 struct net_device *dev = vif_table[c->mfc_parent].dev; 01532 u8 *b = skb->tail; 01533 struct rtattr *mp_head; 01534 01535 if (dev) 01536 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); 01537 01538 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0)); 01539 01540 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 01541 if (c->mfc_un.res.ttls[ct] < 255) { 01542 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 01543 goto rtattr_failure; 01544 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 01545 nhp->rtnh_flags = 0; 01546 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 01547 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; 01548 nhp->rtnh_len = sizeof(*nhp); 01549 } 01550 } 01551 mp_head->rta_type = RTA_MULTIPATH; 01552 mp_head->rta_len = skb->tail - (u8*)mp_head; 01553 rtm->rtm_type = RTN_MULTICAST; 01554 return 1; 01555 01556 rtattr_failure: 01557 skb_trim(skb, b - skb->data); 01558 return -EMSGSIZE; 01559 } 01560 01561 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) 01562 { 01563 int err; 01564 struct mfc_cache *cache; 01565 struct rtable *rt = (struct rtable*)skb->dst; 01566 01567 read_lock(&mrt_lock); 01568 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); 01569 01570 if (cache==NULL) { 01571 struct net_device *dev; 01572 int vif; 01573 01574 if (nowait) { 01575 read_unlock(&mrt_lock); 01576 return -EAGAIN; 01577 } 01578 01579 dev = skb->dev; 01580 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { 01581 read_unlock(&mrt_lock); 01582 return -ENODEV; 01583 } 01584 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 01585 skb->nh.iph->ihl = sizeof(struct iphdr)>>2; 01586 skb->nh.iph->saddr = rt->rt_src; 01587 skb->nh.iph->daddr = rt->rt_dst; 01588 skb->nh.iph->version = 0; 01589 err = ipmr_cache_unresolved(vif, skb); 01590 read_unlock(&mrt_lock); 01591 return err; 01592 } 01593 01594 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 01595 cache->mfc_flags |= MFC_NOTIFY; 01596 err = ipmr_fill_mroute(skb, cache, rtm); 01597 read_unlock(&mrt_lock); 01598 return err; 01599 } 01600 01601 #ifdef CONFIG_PROC_FS 01602 /* 01603 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 01604 */ 01605 01606 static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length) 01607 { 01608 struct vif_device *vif; 01609 int len=0; 01610 off_t pos=0; 01611 off_t begin=0; 01612 int size; 01613 int ct; 01614 01615 len += sprintf(buffer, 01616 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 01617 pos=len; 01618 01619 read_lock(&mrt_lock); 01620 for (ct=0;ct<maxvif;ct++) 01621 { 01622 char *name = "none"; 01623 vif=&vif_table[ct]; 01624 if(!VIF_EXISTS(ct)) 01625 continue; 01626 if (vif->dev) 01627 name = vif->dev->name; 01628 size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 01629 ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, 01630 vif->flags, vif->local, vif->remote); 01631 len+=size; 01632 pos+=size; 01633 if(pos<offset) 01634 { 01635 len=0; 01636 begin=pos; 01637 } 01638 if(pos>offset+length) 01639 break; 01640 } 01641 read_unlock(&mrt_lock); 01642 01643 *start=buffer+(offset-begin); 01644 len-=(offset-begin); 01645 if(len>length) 01646 len=length; 01647 if (len<0) 01648 len = 0; 01649 return len; 01650 } 01651 01652 static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length) 01653 { 01654 struct mfc_cache *mfc; 01655 int len=0; 01656 off_t pos=0; 01657 off_t begin=0; 01658 int size; 01659 int ct; 01660 01661 len += sprintf(buffer, 01662 "Group Origin Iif Pkts Bytes Wrong Oifs\n"); 01663 pos=len; 01664 01665 read_lock(&mrt_lock); 01666 for (ct=0;ct<MFC_LINES;ct++) 01667 { 01668 for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next) 01669 { 01670 int n; 01671 01672 /* 01673 * Interface forwarding map 01674 */ 01675 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld", 01676 (unsigned long)mfc->mfc_mcastgrp, 01677 (unsigned long)mfc->mfc_origin, 01678 mfc->mfc_parent, 01679 mfc->mfc_un.res.pkt, 01680 mfc->mfc_un.res.bytes, 01681 mfc->mfc_un.res.wrong_if); 01682 for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++) 01683 { 01684 if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255) 01685 size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]); 01686 } 01687 size += sprintf(buffer+len+size, "\n"); 01688 len+=size; 01689 pos+=size; 01690 if(pos<offset) 01691 { 01692 len=0; 01693 begin=pos; 01694 } 01695 if(pos>offset+length) 01696 goto done; 01697 } 01698 } 01699 01700 spin_lock_bh(&mfc_unres_lock); 01701 for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) { 01702 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n", 01703 (unsigned long)mfc->mfc_mcastgrp, 01704 (unsigned long)mfc->mfc_origin, 01705 -1, 01706 (long)mfc->mfc_un.unres.unresolved.qlen, 01707 0L, 0L); 01708 len+=size; 01709 pos+=size; 01710 if(pos<offset) 01711 { 01712 len=0; 01713 begin=pos; 01714 } 01715 if(pos>offset+length) 01716 break; 01717 } 01718 spin_unlock_bh(&mfc_unres_lock); 01719 01720 done: 01721 read_unlock(&mrt_lock); 01722 *start=buffer+(offset-begin); 01723 len-=(offset-begin); 01724 if(len>length) 01725 len=length; 01726 if (len < 0) { 01727 len = 0; 01728 } 01729 return len; 01730 } 01731 01732 #endif 01733 01734 #ifdef CONFIG_IP_PIMSM_V2 01735 struct inet_protocol pim_protocol = 01736 { 01737 pim_rcv, /* PIM handler */ 01738 NULL, /* PIM error control */ 01739 NULL, /* next */ 01740 IPPROTO_PIM, /* protocol ID */ 01741 0, /* copy */ 01742 NULL, /* data */ 01743 "PIM" /* name */ 01744 }; 01745 #endif 01746 01747 01748 /* 01749 * Setup for IP multicast routing 01750 */ 01751 01752 void __init ip_mr_init(void) 01753 { 01754 printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n"); 01755 mrt_cachep = kmem_cache_create("ip_mrt_cache", 01756 sizeof(struct mfc_cache), 01757 0, SLAB_HWCACHE_ALIGN, 01758 NULL, NULL); 01759 init_timer(&ipmr_expire_timer); 01760 ipmr_expire_timer.function=ipmr_expire_process; 01761 register_netdevice_notifier(&ip_mr_notifier); 01762 #ifdef CONFIG_PROC_FS 01763 proc_net_create("ip_mr_vif",0,ipmr_vif_info); 01764 proc_net_create("ip_mr_cache",0,ipmr_mfc_info); 01765 #endif 01766 }

Generated on Wed Dec 1 21:25:31 2004 for Linux 2.4.23 Networking by doxygen 1.3.8