Main Page | Class List | File List | Class Members | File Members

icmp.c

Go to the documentation of this file.
00001 /* 00002 * NET3: Implementation of the ICMP protocol layer. 00003 * 00004 * Alan Cox, <alan@redhat.com> 00005 * 00006 * Version: $Id: icmp.c,v 1.82.2.1 2001/12/13 08:59:27 davem Exp $ 00007 * 00008 * This program is free software; you can redistribute it and/or 00009 * modify it under the terms of the GNU General Public License 00010 * as published by the Free Software Foundation; either version 00011 * 2 of the License, or (at your option) any later version. 00012 * 00013 * Some of the function names and the icmp unreach table for this 00014 * module were derived from [icmp.c 1.0.11 06/02/93] by 00015 * Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting. 00016 * Other than that this module is a complete rewrite. 00017 * 00018 * Fixes: 00019 * Clemens Fruhwirth : introduce global icmp rate limiting 00020 * with icmp type masking ability instead 00021 * of broken per type icmp timeouts. 00022 * Mike Shaver : RFC1122 checks. 00023 * Alan Cox : Multicast ping reply as self. 00024 * Alan Cox : Fix atomicity lockup in ip_build_xmit 00025 * call. 00026 * Alan Cox : Added 216,128 byte paths to the MTU 00027 * code. 00028 * Martin Mares : RFC1812 checks. 00029 * Martin Mares : Can be configured to follow redirects 00030 * if acting as a router _without_ a 00031 * routing protocol (RFC 1812). 00032 * Martin Mares : Echo requests may be configured to 00033 * be ignored (RFC 1812). 00034 * Martin Mares : Limitation of ICMP error message 00035 * transmit rate (RFC 1812). 00036 * Martin Mares : TOS and Precedence set correctly 00037 * (RFC 1812). 00038 * Martin Mares : Now copying as much data from the 00039 * original packet as we can without 00040 * exceeding 576 bytes (RFC 1812). 00041 * Willy Konynenberg : Transparent proxying support. 00042 * Keith Owens : RFC1191 correction for 4.2BSD based 00043 * path MTU bug. 00044 * Thomas Quinot : ICMP Dest Unreach codes up to 15 are 00045 * valid (RFC 1812). 00046 * Andi Kleen : Check all packet lengths properly 00047 * and moved all kfree_skb() up to 00048 * icmp_rcv. 00049 * Andi Kleen : Move the rate limit bookkeeping 00050 * into the dest entry and use a token 00051 * bucket filter (thanks to ANK). Make 00052 * the rates sysctl configurable. 00053 * Yu Tianli : Fixed two ugly bugs in icmp_send 00054 * - IP option length was accounted wrongly 00055 * - ICMP header length was not accounted at all. 00056 * Tristan Greaves : Added sysctl option to ignore bogus broadcast 00057 * responses from broken routers. 00058 * 00059 * To Fix: 00060 * 00061 * - Should use skb_pull() instead of all the manual checking. 00062 * This would also greatly simply some upper layer error handlers. --AK 00063 * 00064 */ 00065 00066 #include <linux/config.h> 00067 #include <linux/types.h> 00068 #include <linux/sched.h> 00069 #include <linux/kernel.h> 00070 #include <linux/fcntl.h> 00071 #include <linux/socket.h> 00072 #include <linux/in.h> 00073 #include <linux/inet.h> 00074 #include <linux/netdevice.h> 00075 #include <linux/string.h> 00076 #include <linux/netfilter_ipv4.h> 00077 #include <net/snmp.h> 00078 #include <net/ip.h> 00079 #include <net/route.h> 00080 #include <net/protocol.h> 00081 #include <net/icmp.h> 00082 #include <net/tcp.h> 00083 #include <net/udp.h> 00084 #include <net/raw.h> 00085 #include <linux/skbuff.h> 00086 #include <net/sock.h> 00087 #include <linux/errno.h> 00088 #include <linux/timer.h> 00089 #include <linux/init.h> 00090 #include <asm/system.h> 00091 #include <asm/uaccess.h> 00092 #include <net/checksum.h> 00093 00094 /* 00095 * Build xmit assembly blocks 00096 */ 00097 00098 struct icmp_bxm 00099 { 00100 struct sk_buff *skb; 00101 int offset; 00102 int data_len; 00103 00104 unsigned int csum; 00105 struct { 00106 struct icmphdr icmph; 00107 __u32 times[3]; 00108 } data; 00109 int head_len; 00110 struct ip_options replyopts; 00111 unsigned char optbuf[40]; 00112 }; 00113 00114 /* 00115 * Statistics 00116 */ 00117 00118 struct icmp_mib icmp_statistics[NR_CPUS*2]; 00119 00120 /* An array of errno for error messages from dest unreach. */ 00121 /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOS_UNREACH and SR_FAIELD MUST be considered 'transient errs'. */ 00122 00123 struct icmp_err icmp_err_convert[] = { 00124 { ENETUNREACH, 0 }, /* ICMP_NET_UNREACH */ 00125 { EHOSTUNREACH, 0 }, /* ICMP_HOST_UNREACH */ 00126 { ENOPROTOOPT, 1 }, /* ICMP_PROT_UNREACH */ 00127 { ECONNREFUSED, 1 }, /* ICMP_PORT_UNREACH */ 00128 { EMSGSIZE, 0 }, /* ICMP_FRAG_NEEDED */ 00129 { EOPNOTSUPP, 0 }, /* ICMP_SR_FAILED */ 00130 { ENETUNREACH, 1 }, /* ICMP_NET_UNKNOWN */ 00131 { EHOSTDOWN, 1 }, /* ICMP_HOST_UNKNOWN */ 00132 { ENONET, 1 }, /* ICMP_HOST_ISOLATED */ 00133 { ENETUNREACH, 1 }, /* ICMP_NET_ANO */ 00134 { EHOSTUNREACH, 1 }, /* ICMP_HOST_ANO */ 00135 { ENETUNREACH, 0 }, /* ICMP_NET_UNR_TOS */ 00136 { EHOSTUNREACH, 0 }, /* ICMP_HOST_UNR_TOS */ 00137 { EHOSTUNREACH, 1 }, /* ICMP_PKT_FILTERED */ 00138 { EHOSTUNREACH, 1 }, /* ICMP_PREC_VIOLATION */ 00139 { EHOSTUNREACH, 1 } /* ICMP_PREC_CUTOFF */ 00140 }; 00141 00142 extern int sysctl_ip_default_ttl; 00143 00144 /* Control parameters for ECHO replies. */ 00145 int sysctl_icmp_echo_ignore_all; 00146 int sysctl_icmp_echo_ignore_broadcasts; 00147 00148 /* Control parameter - ignore bogus broadcast responses? */ 00149 int sysctl_icmp_ignore_bogus_error_responses; 00150 00151 /* 00152 * Configurable global rate limit. 00153 * 00154 * ratelimit defines tokens/packet consumed for dst->rate_token bucket 00155 * ratemask defines which icmp types are ratelimited by setting 00156 * it's bit position. 00157 * 00158 * default: 00159 * dest unreachable (3), source quench (4), 00160 * time exceeded (11), parameter problem (12) 00161 */ 00162 00163 int sysctl_icmp_ratelimit = 1*HZ; 00164 int sysctl_icmp_ratemask = 0x1818; 00165 00166 /* 00167 * ICMP control array. This specifies what to do with each ICMP. 00168 */ 00169 00170 struct icmp_control 00171 { 00172 unsigned long *output; /* Address to increment on output */ 00173 unsigned long *input; /* Address to increment on input */ 00174 void (*handler)(struct sk_buff *skb); 00175 short error; /* This ICMP is classed as an error message */ 00176 }; 00177 00178 static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; 00179 00180 /* 00181 * The ICMP socket(s). This is the most convenient way to flow control 00182 * our ICMP output as well as maintain a clean interface throughout 00183 * all layers. All Socketless IP sends will soon be gone. 00184 */ 00185 00186 static struct inode __icmp_inode[NR_CPUS]; 00187 #define icmp_socket (&__icmp_inode[smp_processor_id()].u.socket_i) 00188 #define icmp_socket_cpu(X) (&__icmp_inode[(X)].u.socket_i) 00189 00190 static int icmp_xmit_lock(void) 00191 { 00192 local_bh_disable(); 00193 if (unlikely(!spin_trylock(&icmp_socket->sk->lock.slock))) { 00194 /* This can happen if the output path signals a 00195 * dst_link_failure() for an outgoing ICMP packet. 00196 */ 00197 local_bh_enable(); 00198 return 1; 00199 } 00200 return 0; 00201 } 00202 00203 static void icmp_xmit_unlock(void) 00204 { 00205 spin_unlock_bh(&icmp_socket->sk->lock.slock); 00206 } 00207 00208 /* 00209 * Send an ICMP frame. 00210 */ 00211 00212 /* 00213 * Check transmit rate limitation for given message. 00214 * The rate information is held in the destination cache now. 00215 * This function is generic and could be used for other purposes 00216 * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. 00217 * 00218 * Note that the same dst_entry fields are modified by functions in 00219 * route.c too, but these work for packet destinations while xrlim_allow 00220 * works for icmp destinations. This means the rate limiting information 00221 * for one "ip object" is shared - and these ICMPs are twice limited: 00222 * by source and by destination. 00223 * 00224 * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate 00225 * SHOULD allow setting of rate limits 00226 * 00227 * Shared between ICMPv4 and ICMPv6. 00228 */ 00229 #define XRLIM_BURST_FACTOR 6 00230 int xrlim_allow(struct dst_entry *dst, int timeout) 00231 { 00232 unsigned long now; 00233 00234 now = jiffies; 00235 dst->rate_tokens += now - dst->rate_last; 00236 dst->rate_last = now; 00237 if (dst->rate_tokens > XRLIM_BURST_FACTOR*timeout) 00238 dst->rate_tokens = XRLIM_BURST_FACTOR*timeout; 00239 if (dst->rate_tokens >= timeout) { 00240 dst->rate_tokens -= timeout; 00241 return 1; 00242 } 00243 return 0; 00244 } 00245 00246 static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) 00247 { 00248 struct dst_entry *dst = &rt->u.dst; 00249 00250 if (type > NR_ICMP_TYPES) 00251 return 1; 00252 00253 /* Don't limit PMTU discovery. */ 00254 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) 00255 return 1; 00256 00257 /* No rate limit on loopback */ 00258 if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) 00259 return 1; 00260 00261 /* Limit if icmp type is enabled in ratemask. */ 00262 if((1 << type) & sysctl_icmp_ratemask) 00263 return xrlim_allow(dst, sysctl_icmp_ratelimit); 00264 else 00265 return 1; 00266 } 00267 00268 /* 00269 * Maintain the counters used in the SNMP statistics for outgoing ICMP 00270 */ 00271 00272 static void icmp_out_count(int type) 00273 { 00274 if (type>NR_ICMP_TYPES) 00275 return; 00276 (icmp_pointers[type].output)[(smp_processor_id()*2+!in_softirq())*sizeof(struct icmp_mib)/sizeof(unsigned long)]++; 00277 ICMP_INC_STATS(IcmpOutMsgs); 00278 } 00279 00280 /* 00281 * Checksum each fragment, and on the first include the headers and final checksum. 00282 */ 00283 00284 static int icmp_glue_bits(const void *p, char *to, unsigned int offset, unsigned int fraglen) 00285 { 00286 struct icmp_bxm *icmp_param = (struct icmp_bxm *)p; 00287 struct icmphdr *icmph; 00288 unsigned int csum; 00289 00290 if (offset) { 00291 icmp_param->csum=skb_copy_and_csum_bits(icmp_param->skb, 00292 icmp_param->offset+(offset-icmp_param->head_len), 00293 to, fraglen,icmp_param->csum); 00294 return 0; 00295 } 00296 00297 /* 00298 * First fragment includes header. Note that we've done 00299 * the other fragments first, so that we get the checksum 00300 * for the whole packet here. 00301 */ 00302 csum = csum_partial_copy_nocheck((void *)&icmp_param->data, 00303 to, icmp_param->head_len, 00304 icmp_param->csum); 00305 csum=skb_copy_and_csum_bits(icmp_param->skb, 00306 icmp_param->offset, 00307 to+icmp_param->head_len, 00308 fraglen-icmp_param->head_len, 00309 csum); 00310 icmph=(struct icmphdr *)to; 00311 icmph->checksum = csum_fold(csum); 00312 return 0; 00313 } 00314 00318 static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) 00319 { 00320 struct sock *sk=icmp_socket->sk; 00321 struct ipcm_cookie ipc; 00322 struct rtable *rt = (struct rtable*)skb->dst; 00323 u32 daddr; 00324 00326 if (ip_options_echo(&icmp_param->replyopts, skb)) 00327 return; 00328 00329 if (icmp_xmit_lock()) 00330 return; 00331 00332 icmp_param->data.icmph.checksum=0; 00333 icmp_param->csum=0; 00334 icmp_out_count(icmp_param->data.icmph.type); 00335 00336 sk->protinfo.af_inet.tos = skb->nh.iph->tos; 00337 sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl; 00338 daddr = ipc.addr = rt->rt_src; 00339 ipc.opt = NULL; 00340 if (icmp_param->replyopts.optlen) { 00341 ipc.opt = &icmp_param->replyopts; 00342 if (ipc.opt->srr) 00343 daddr = icmp_param->replyopts.faddr; 00344 } 00345 if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) 00346 goto out; 00347 if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 00348 icmp_param->data.icmph.code)) { 00349 ip_build_xmit(sk, icmp_glue_bits, icmp_param, 00350 icmp_param->data_len+icmp_param->head_len, 00351 &ipc, rt, MSG_DONTWAIT); 00352 } 00353 ip_rt_put(rt); 00354 out: 00355 icmp_xmit_unlock(); 00356 } 00357 00358 00359 /* 00360 * Send an ICMP message in response to a situation 00361 * 00362 * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. MAY send more (we do). 00363 * MUST NOT change this header information. 00364 * MUST NOT reply to a multicast/broadcast IP address. 00365 * MUST NOT reply to a multicast/broadcast MAC address. 00366 * MUST reply to only the first fragment. 00367 */ 00368 00369 void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) 00370 { 00371 struct iphdr *iph; 00372 int room; 00373 struct icmp_bxm icmp_param; 00374 struct rtable *rt = (struct rtable*)skb_in->dst; 00375 struct ipcm_cookie ipc; 00376 u32 saddr; 00377 u8 tos; 00378 00379 if (!rt) 00380 return; 00381 00382 /* 00383 * Find the original header. It is expected to be valid, of course. 00384 * Check this, icmp_send is called from the most obscure devices 00385 * sometimes. 00386 */ 00387 iph = skb_in->nh.iph; 00388 00389 if ((u8*)iph < skb_in->head || (u8*)(iph+1) > skb_in->tail) 00390 return; 00391 00392 /* 00393 * No replies to physical multicast/broadcast 00394 */ 00395 if (skb_in->pkt_type!=PACKET_HOST) 00396 return; 00397 00398 /* 00399 * Now check at the protocol level 00400 */ 00401 if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) 00402 return; 00403 00404 /* 00405 * Only reply to fragment 0. We byte re-order the constant 00406 * mask for efficiency. 00407 */ 00408 if (iph->frag_off&htons(IP_OFFSET)) 00409 return; 00410 00411 /* 00412 * If we send an ICMP error to an ICMP error a mess would result.. 00413 */ 00414 if (icmp_pointers[type].error) { 00415 /* 00416 * We are an error, check if we are replying to an ICMP error 00417 */ 00418 if (iph->protocol==IPPROTO_ICMP) { 00419 u8 inner_type; 00420 00421 if (skb_copy_bits(skb_in, 00422 skb_in->nh.raw + (iph->ihl<<2) 00423 + offsetof(struct icmphdr, type) 00424 - skb_in->data, 00425 &inner_type, 1)) 00426 return; 00427 00428 /* 00429 * Assume any unknown ICMP type is an error. This isn't 00430 * specified by the RFC, but think about it.. 00431 */ 00432 if (inner_type>NR_ICMP_TYPES || icmp_pointers[inner_type].error) 00433 return; 00434 } 00435 } 00436 00437 if (icmp_xmit_lock()) 00438 return; 00439 00440 /* 00441 * Construct source address and options. 00442 */ 00443 00444 #ifdef CONFIG_IP_ROUTE_NAT 00445 /* 00446 * Restore original addresses if packet has been translated. 00447 */ 00448 if (rt->rt_flags&RTCF_NAT && IPCB(skb_in)->flags&IPSKB_TRANSLATED) { 00449 iph->daddr = rt->key.dst; 00450 iph->saddr = rt->key.src; 00451 } 00452 #endif 00453 00454 saddr = iph->daddr; 00455 if (!(rt->rt_flags & RTCF_LOCAL)) 00456 saddr = 0; 00457 00458 tos = icmp_pointers[type].error ? 00459 ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : 00460 iph->tos; 00461 00462 if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0)) 00463 goto out; 00464 00465 if (ip_options_echo(&icmp_param.replyopts, skb_in)) 00466 goto ende; 00467 00468 00469 /* 00470 * Prepare data for ICMP header. 00471 */ 00472 00473 icmp_param.data.icmph.type=type; 00474 icmp_param.data.icmph.code=code; 00475 icmp_param.data.icmph.un.gateway = info; 00476 icmp_param.data.icmph.checksum=0; 00477 icmp_param.csum=0; 00478 icmp_param.skb=skb_in; 00479 icmp_param.offset=skb_in->nh.raw - skb_in->data; 00480 icmp_out_count(icmp_param.data.icmph.type); 00481 icmp_socket->sk->protinfo.af_inet.tos = tos; 00482 icmp_socket->sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl; 00483 ipc.addr = iph->saddr; 00484 ipc.opt = &icmp_param.replyopts; 00485 if (icmp_param.replyopts.srr) { 00486 ip_rt_put(rt); 00487 if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0)) 00488 goto out; 00489 } 00490 00491 if (!icmpv4_xrlim_allow(rt, type, code)) 00492 goto ende; 00493 00494 /* RFC says return as much as we can without exceeding 576 bytes. */ 00495 00496 room = rt->u.dst.pmtu; 00497 if (room > 576) 00498 room = 576; 00499 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; 00500 room -= sizeof(struct icmphdr); 00501 00502 icmp_param.data_len=skb_in->len-icmp_param.offset; 00503 if (icmp_param.data_len > room) 00504 icmp_param.data_len = room; 00505 icmp_param.head_len = sizeof(struct icmphdr); 00506 00507 ip_build_xmit(icmp_socket->sk, icmp_glue_bits, &icmp_param, 00508 icmp_param.data_len+sizeof(struct icmphdr), 00509 &ipc, rt, MSG_DONTWAIT); 00510 00511 ende: 00512 ip_rt_put(rt); 00513 out: 00514 icmp_xmit_unlock(); 00515 } 00516 00517 00518 /* 00519 * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. 00520 */ 00521 00522 static void icmp_unreach(struct sk_buff *skb) 00523 { 00524 struct iphdr *iph; 00525 struct icmphdr *icmph; 00526 int hash, protocol; 00527 struct inet_protocol *ipprot; 00528 struct sock *raw_sk; 00529 u32 info = 0; 00530 00531 /* 00532 * Incomplete header ? 00533 * Only checks for the IP header, there should be an 00534 * additional check for longer headers in upper levels. 00535 */ 00536 00537 if (!pskb_may_pull(skb, sizeof(struct iphdr))) { 00538 ICMP_INC_STATS_BH(IcmpInErrors); 00539 return; 00540 } 00541 00542 icmph = skb->h.icmph; 00543 iph = (struct iphdr *) skb->data; 00544 00545 if (iph->ihl<5) { 00546 /* Mangled header, drop. */ 00547 ICMP_INC_STATS_BH(IcmpInErrors); 00548 return; 00549 } 00550 00551 if(icmph->type==ICMP_DEST_UNREACH) { 00552 switch(icmph->code & 15) { 00553 case ICMP_NET_UNREACH: 00554 break; 00555 case ICMP_HOST_UNREACH: 00556 break; 00557 case ICMP_PROT_UNREACH: 00558 break; 00559 case ICMP_PORT_UNREACH: 00560 break; 00561 case ICMP_FRAG_NEEDED: 00562 if (ipv4_config.no_pmtu_disc) { 00563 if (net_ratelimit()) 00564 printk(KERN_INFO "ICMP: %u.%u.%u.%u: fragmentation needed and DF set.\n", 00565 NIPQUAD(iph->daddr)); 00566 } else { 00567 info = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu)); 00568 if (!info) 00569 goto out; 00570 } 00571 break; 00572 case ICMP_SR_FAILED: 00573 if (net_ratelimit()) 00574 printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source Route Failed.\n", NIPQUAD(iph->daddr)); 00575 break; 00576 default: 00577 break; 00578 } 00579 if (icmph->code>NR_ICMP_UNREACH) 00580 goto out; 00581 } else if (icmph->type == ICMP_PARAMETERPROB) { 00582 info = ntohl(icmph->un.gateway)>>24; 00583 } 00584 00585 /* 00586 * Throw it at our lower layers 00587 * 00588 * RFC 1122: 3.2.2 MUST extract the protocol ID from the passed header. 00589 * RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the transport layer. 00590 * RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to transport layer. 00591 */ 00592 00593 /* 00594 * Check the other end isnt violating RFC 1122. Some routers send 00595 * bogus responses to broadcast frames. If you see this message 00596 * first check your netmask matches at both ends, if it does then 00597 * get the other vendor to fix their kit. 00598 */ 00599 00600 if (!sysctl_icmp_ignore_bogus_error_responses) 00601 { 00602 00603 if (inet_addr_type(iph->daddr) == RTN_BROADCAST) 00604 { 00605 if (net_ratelimit()) 00606 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP type %u, code %u error to a broadcast: %u.%u.%u.%u on %s\n", 00607 NIPQUAD(iph->saddr), 00608 icmph->type, icmph->code, 00609 NIPQUAD(iph->daddr), 00610 skb->dev->name); 00611 goto out; 00612 } 00613 } 00614 00615 /* Checkin full IP header plus 8 bytes of protocol to 00616 * avoid additional coding at protocol handlers. 00617 */ 00618 if (!pskb_may_pull(skb, iph->ihl*4+8)) 00619 goto out; 00620 00621 iph = (struct iphdr *) skb->data; 00622 protocol = iph->protocol; 00623 00624 /* 00625 * Deliver ICMP message to raw sockets. Pretty useless feature? 00626 */ 00627 00628 /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ 00629 hash = protocol & (MAX_INET_PROTOS - 1); 00630 read_lock(&raw_v4_lock); 00631 if ((raw_sk = raw_v4_htable[hash]) != NULL) 00632 { 00633 while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, 00634 iph->saddr, skb->dev->ifindex)) != NULL) { 00635 raw_err(raw_sk, skb, info); 00636 raw_sk = raw_sk->next; 00637 iph = (struct iphdr *)skb->data; 00638 } 00639 } 00640 read_unlock(&raw_v4_lock); 00641 00642 /* 00643 * This can't change while we are doing it. 00644 * Callers have obtained BR_NETPROTO_LOCK so 00645 * we are OK. 00646 */ 00647 00648 ipprot = (struct inet_protocol *) inet_protos[hash]; 00649 while (ipprot) { 00650 struct inet_protocol *nextip; 00651 00652 nextip = (struct inet_protocol *) ipprot->next; 00653 00654 /* 00655 * Pass it off to everyone who wants it. 00656 */ 00657 00658 /* RFC1122: OK. Passes appropriate ICMP errors to the */ 00659 /* appropriate protocol layer (MUST), as per 3.2.2. */ 00660 00661 if (protocol == ipprot->protocol && ipprot->err_handler) 00662 ipprot->err_handler(skb, info); 00663 00664 ipprot = nextip; 00665 } 00666 out:; 00667 } 00668 00669 00670 /* 00671 * Handle ICMP_REDIRECT. 00672 */ 00673 00674 static void icmp_redirect(struct sk_buff *skb) 00675 { 00676 struct iphdr *iph; 00677 unsigned long ip; 00678 00679 if (skb->len < sizeof(struct iphdr)) { 00680 ICMP_INC_STATS_BH(IcmpInErrors); 00681 return; 00682 } 00683 00684 /* 00685 * Get the copied header of the packet that caused the redirect 00686 */ 00687 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 00688 return; 00689 00690 iph = (struct iphdr *) skb->data; 00691 ip = iph->daddr; 00692 00693 switch (skb->h.icmph->code & 7) { 00694 case ICMP_REDIR_NET: 00695 case ICMP_REDIR_NETTOS: 00696 /* 00697 * As per RFC recommendations now handle it as 00698 * a host redirect. 00699 */ 00700 00701 case ICMP_REDIR_HOST: 00702 case ICMP_REDIR_HOSTTOS: 00703 ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway, iph->saddr, iph->tos, skb->dev); 00704 break; 00705 default: 00706 break; 00707 } 00708 } 00709 00710 /* 00711 * Handle ICMP_ECHO ("ping") requests. 00712 * 00713 * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo requests. 00714 * RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be included in the reply. 00715 * RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring echo requests, MUST have default=NOT. 00716 * See also WRT handling of options once they are done and working. 00717 */ 00718 00719 static void icmp_echo(struct sk_buff *skb) 00720 { 00721 if (!sysctl_icmp_echo_ignore_all) { 00722 struct icmp_bxm icmp_param; 00723 00724 icmp_param.data.icmph=*skb->h.icmph; 00725 icmp_param.data.icmph.type=ICMP_ECHOREPLY; 00726 icmp_param.skb=skb; 00727 icmp_param.offset=0; 00728 icmp_param.data_len=skb->len; 00729 icmp_param.head_len=sizeof(struct icmphdr); 00730 icmp_reply(&icmp_param, skb); 00731 } 00732 } 00733 00734 /* 00735 * Handle ICMP Timestamp requests. 00736 * RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests. 00737 * SHOULD be in the kernel for minimum random latency. 00738 * MUST be accurate to a few minutes. 00739 * MUST be updated at least at 15Hz. 00740 */ 00741 00742 static void icmp_timestamp(struct sk_buff *skb) 00743 { 00744 struct timeval tv; 00745 struct icmp_bxm icmp_param; 00746 00747 /* 00748 * Too short. 00749 */ 00750 00751 if (skb->len < 4) { 00752 ICMP_INC_STATS_BH(IcmpInErrors); 00753 return; 00754 } 00755 00756 /* 00757 * Fill in the current time as ms since midnight UT: 00758 */ 00759 do_gettimeofday(&tv); 00760 icmp_param.data.times[1] = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); 00761 icmp_param.data.times[2] = icmp_param.data.times[1]; 00762 if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) 00763 BUG(); 00764 icmp_param.data.icmph=*skb->h.icmph; 00765 icmp_param.data.icmph.type=ICMP_TIMESTAMPREPLY; 00766 icmp_param.data.icmph.code=0; 00767 icmp_param.skb=skb; 00768 icmp_param.offset=0; 00769 icmp_param.data_len=0; 00770 icmp_param.head_len=sizeof(struct icmphdr)+12; 00771 icmp_reply(&icmp_param, skb); 00772 } 00773 00774 00775 /* 00776 * Handle ICMP_ADDRESS_MASK requests. (RFC950) 00777 * 00778 * RFC1122 (3.2.2.9). A host MUST only send replies to 00779 * ADDRESS_MASK requests if it's been configured as an address mask 00780 * agent. Receiving a request doesn't constitute implicit permission to 00781 * act as one. Of course, implementing this correctly requires (SHOULD) 00782 * a way to turn the functionality on and off. Another one for sysctl(), 00783 * I guess. -- MS 00784 * 00785 * RFC1812 (4.3.3.9). A router MUST implement it. 00786 * A router SHOULD have switch turning it on/off. 00787 * This switch MUST be ON by default. 00788 * 00789 * Gratuitous replies, zero-source replies are not implemented, 00790 * that complies with RFC. DO NOT implement them!!! All the idea 00791 * of broadcast addrmask replies as specified in RFC950 is broken. 00792 * The problem is that it is not uncommon to have several prefixes 00793 * on one physical interface. Moreover, addrmask agent can even be 00794 * not aware of existing another prefixes. 00795 * If source is zero, addrmask agent cannot choose correct prefix. 00796 * Gratuitous mask announcements suffer from the same problem. 00797 * RFC1812 explains it, but still allows to use ADDRMASK, 00798 * that is pretty silly. --ANK 00799 * 00800 * All these rules are so bizarre, that I removed kernel addrmask 00801 * support at all. It is wrong, it is obsolete, nobody uses it in 00802 * any case. --ANK 00803 * 00804 * Furthermore you can do it with a usermode address agent program 00805 * anyway... 00806 */ 00807 00808 static void icmp_address(struct sk_buff *skb) 00809 { 00810 #if 0 00811 if (net_ratelimit()) 00812 printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n"); 00813 #endif 00814 } 00815 00816 /* 00817 * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain 00818 * loudly if an inconsistency is found. 00819 */ 00820 00821 static void icmp_address_reply(struct sk_buff *skb) 00822 { 00823 struct rtable *rt = (struct rtable*)skb->dst; 00824 struct net_device *dev = skb->dev; 00825 struct in_device *in_dev; 00826 struct in_ifaddr *ifa; 00827 u32 mask; 00828 00829 if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) 00830 return; 00831 00832 in_dev = in_dev_get(dev); 00833 if (!in_dev) 00834 return; 00835 read_lock(&in_dev->lock); 00836 if (in_dev->ifa_list && 00837 IN_DEV_LOG_MARTIANS(in_dev) && 00838 IN_DEV_FORWARD(in_dev)) { 00839 if (skb_copy_bits(skb, 0, &mask, 4)) 00840 BUG(); 00841 for (ifa=in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 00842 if (mask == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa)) 00843 break; 00844 } 00845 if (!ifa && net_ratelimit()) { 00846 printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from %s/%u.%u.%u.%u\n", 00847 NIPQUAD(mask), dev->name, NIPQUAD(rt->rt_src)); 00848 } 00849 } 00850 read_unlock(&in_dev->lock); 00851 in_dev_put(in_dev); 00852 } 00853 00854 static void icmp_discard(struct sk_buff *skb) 00855 { 00856 } 00857 00858 /* 00859 * Deal with incoming ICMP packets. 00860 */ 00861 00862 int icmp_rcv(struct sk_buff *skb) 00863 { 00864 struct icmphdr *icmph; 00865 struct rtable *rt = (struct rtable*)skb->dst; 00866 00867 ICMP_INC_STATS_BH(IcmpInMsgs); 00868 00869 switch (skb->ip_summed) { 00870 case CHECKSUM_HW: 00871 if ((u16)csum_fold(skb->csum) == 0) 00872 break; 00873 NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "icmp v4 hw csum failure\n")); 00874 case CHECKSUM_NONE: 00875 if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) 00876 goto error; 00877 default:; 00878 } 00879 00880 if (!pskb_pull(skb, sizeof(struct icmphdr))) 00881 goto error; 00882 00883 icmph = skb->h.icmph; 00884 00885 /* 00886 * 18 is the highest 'known' ICMP type. Anything else is a mystery 00887 * 00888 * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently discarded. 00889 */ 00890 if (icmph->type > NR_ICMP_TYPES) 00891 goto error; 00892 00893 00894 /* 00895 * Parse the ICMP message 00896 */ 00897 00898 if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) { 00899 /* 00900 * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be 00901 * silently ignored (we let user decide with a sysctl). 00902 * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently 00903 * discarded if to broadcast/multicast. 00904 */ 00905 if (icmph->type == ICMP_ECHO && 00906 sysctl_icmp_echo_ignore_broadcasts) { 00907 goto error; 00908 } 00909 if (icmph->type != ICMP_ECHO && 00910 icmph->type != ICMP_TIMESTAMP && 00911 icmph->type != ICMP_ADDRESS && 00912 icmph->type != ICMP_ADDRESSREPLY) { 00913 goto error; 00914 } 00915 } 00916 00917 icmp_pointers[icmph->type].input[smp_processor_id()*2*sizeof(struct icmp_mib)/sizeof(unsigned long)]++; 00918 (icmp_pointers[icmph->type].handler)(skb); 00919 00920 drop: 00921 kfree_skb(skb); 00922 return 0; 00923 error: 00924 ICMP_INC_STATS_BH(IcmpInErrors); 00925 goto drop; 00926 } 00927 00928 /* 00929 * This table is the definition of how we handle ICMP. 00930 */ 00931 00932 static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1] = { 00933 /* ECHO REPLY (0) */ 00934 { &icmp_statistics[0].IcmpOutEchoReps, &icmp_statistics[0].IcmpInEchoReps, icmp_discard, 0 }, 00935 { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, 00936 { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, 00937 /* DEST UNREACH (3) */ 00938 { &icmp_statistics[0].IcmpOutDestUnreachs, &icmp_statistics[0].IcmpInDestUnreachs, icmp_unreach, 1 }, 00939 /* SOURCE QUENCH (4) */ 00940 { &icmp_statistics[0].IcmpOutSrcQuenchs, &icmp_statistics[0].IcmpInSrcQuenchs, icmp_unreach, 1 }, 00941 /* REDIRECT (5) */ 00942 { &icmp_statistics[0].IcmpOutRedirects, &icmp_statistics[0].IcmpInRedirects, icmp_redirect, 1 }, 00943 { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, 00944 { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, 00945 /* ECHO (8) */ 00946 { &icmp_statistics[0].IcmpOutEchos, &icmp_statistics[0].IcmpInEchos, icmp_echo, 0 }, 00947 { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, 00948 { &icmp_statistics[0].dummy, &icmp_statistics[0].IcmpInErrors, icmp_discard, 1 }, 00949 /* TIME EXCEEDED (11) */ 00950 { &icmp_statistics[0].IcmpOutTimeExcds, &icmp_statistics[0].IcmpInTimeExcds, icmp_unreach, 1 }, 00951 /* PARAMETER PROBLEM (12) */ 00952 { &icmp_statistics[0].IcmpOutParmProbs, &icmp_statistics[0].IcmpInParmProbs, icmp_unreach, 1 }, 00953 /* TIMESTAMP (13) */ 00954 { &icmp_statistics[0].IcmpOutTimestamps, &icmp_statistics[0].IcmpInTimestamps, icmp_timestamp, 0 }, 00955 /* TIMESTAMP REPLY (14) */ 00956 { &icmp_statistics[0].IcmpOutTimestampReps, &icmp_statistics[0].IcmpInTimestampReps, icmp_discard, 0 }, 00957 /* INFO (15) */ 00958 { &icmp_statistics[0].dummy, &icmp_statistics[0].dummy, icmp_discard, 0 }, 00959 /* INFO REPLY (16) */ 00960 { &icmp_statistics[0].dummy, &icmp_statistics[0].dummy, icmp_discard, 0 }, 00961 /* ADDR MASK (17) */ 00962 { &icmp_statistics[0].IcmpOutAddrMasks, &icmp_statistics[0].IcmpInAddrMasks, icmp_address, 0 }, 00963 /* ADDR MASK REPLY (18) */ 00964 { &icmp_statistics[0].IcmpOutAddrMaskReps, &icmp_statistics[0].IcmpInAddrMaskReps, icmp_address_reply, 0 } 00965 }; 00966 00967 void __init icmp_init(struct net_proto_family *ops) 00968 { 00969 int err, i; 00970 00971 for (i = 0; i < NR_CPUS; i++) { 00972 __icmp_inode[i].i_mode = S_IFSOCK; 00973 __icmp_inode[i].i_sock = 1; 00974 __icmp_inode[i].i_uid = 0; 00975 __icmp_inode[i].i_gid = 0; 00976 init_waitqueue_head(&__icmp_inode[i].i_wait); 00977 init_waitqueue_head(&__icmp_inode[i].u.socket_i.wait); 00978 00979 icmp_socket_cpu(i)->inode = &__icmp_inode[i]; 00980 icmp_socket_cpu(i)->state = SS_UNCONNECTED; 00981 icmp_socket_cpu(i)->type = SOCK_RAW; 00982 00983 if ((err=ops->create(icmp_socket_cpu(i), IPPROTO_ICMP)) < 0) 00984 panic("Failed to create the ICMP control socket.\n"); 00985 00986 icmp_socket_cpu(i)->sk->allocation=GFP_ATOMIC; 00987 icmp_socket_cpu(i)->sk->sndbuf = SK_WMEM_MAX*2; 00988 icmp_socket_cpu(i)->sk->protinfo.af_inet.ttl = MAXTTL; 00989 icmp_socket_cpu(i)->sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT; 00990 00991 /* Unhash it so that IP input processing does not even 00992 * see it, we do not wish this socket to see incoming 00993 * packets. 00994 */ 00995 icmp_socket_cpu(i)->sk->prot->unhash(icmp_socket_cpu(i)->sk); 00996 } 00997 }

Generated on Wed Dec 1 21:25:30 2004 for Linux 2.4.23 Networking by doxygen 1.3.8