Main Page | Class List | File List | Class Members | File Members

af_inet.c

Go to the documentation of this file.
00001 /* 00002 * INET An implementation of the TCP/IP protocol suite for the LINUX 00003 * operating system. INET is implemented using the BSD Socket 00004 * interface as the means of communication with the user level. 00005 * 00006 * PF_INET protocol family socket handler. 00007 * 00008 * Version: $Id: af_inet.c,v 1.136 2001/11/06 22:21:08 davem Exp $ 00009 * 00010 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 00011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 00012 * Florian La Roche, <flla@stud.uni-sb.de> 00013 * Alan Cox, <A.Cox@swansea.ac.uk> 00014 * 00015 * Changes (see also sock.c) 00016 * 00017 * piggy, 00018 * Karl Knutson : Socket protocol table 00019 * A.N.Kuznetsov : Socket death error in accept(). 00020 * John Richardson : Fix non blocking error in connect() 00021 * so sockets that fail to connect 00022 * don't return -EINPROGRESS. 00023 * Alan Cox : Asynchronous I/O support 00024 * Alan Cox : Keep correct socket pointer on sock structures 00025 * when accept() ed 00026 * Alan Cox : Semantics of SO_LINGER aren't state moved 00027 * to close when you look carefully. With 00028 * this fixed and the accept bug fixed 00029 * some RPC stuff seems happier. 00030 * Niibe Yutaka : 4.4BSD style write async I/O 00031 * Alan Cox, 00032 * Tony Gale : Fixed reuse semantics. 00033 * Alan Cox : bind() shouldn't abort existing but dead 00034 * sockets. Stops FTP netin:.. I hope. 00035 * Alan Cox : bind() works correctly for RAW sockets. Note 00036 * that FreeBSD at least was broken in this respect 00037 * so be careful with compatibility tests... 00038 * Alan Cox : routing cache support 00039 * Alan Cox : memzero the socket structure for compactness. 00040 * Matt Day : nonblock connect error handler 00041 * Alan Cox : Allow large numbers of pending sockets 00042 * (eg for big web sites), but only if 00043 * specifically application requested. 00044 * Alan Cox : New buffering throughout IP. Used dumbly. 00045 * Alan Cox : New buffering now used smartly. 00046 * Alan Cox : BSD rather than common sense interpretation of 00047 * listen. 00048 * Germano Caronni : Assorted small races. 00049 * Alan Cox : sendmsg/recvmsg basic support. 00050 * Alan Cox : Only sendmsg/recvmsg now supported. 00051 * Alan Cox : Locked down bind (see security list). 00052 * Alan Cox : Loosened bind a little. 00053 * Mike McLagan : ADD/DEL DLCI Ioctls 00054 * Willy Konynenberg : Transparent proxying support. 00055 * David S. Miller : New socket lookup architecture. 00056 * Some other random speedups. 00057 * Cyrus Durgin : Cleaned up file for kmod hacks. 00058 * Andi Kleen : Fix inet_stream_connect TCP race. 00059 * 00060 * This program is free software; you can redistribute it and/or 00061 * modify it under the terms of the GNU General Public License 00062 * as published by the Free Software Foundation; either version 00063 * 2 of the License, or (at your option) any later version. 00064 */ 00065 00066 #include <linux/config.h> 00067 #include <linux/errno.h> 00068 #include <linux/types.h> 00069 #include <linux/socket.h> 00070 #include <linux/in.h> 00071 #include <linux/kernel.h> 00072 #include <linux/major.h> 00073 #include <linux/sched.h> 00074 #include <linux/timer.h> 00075 #include <linux/string.h> 00076 #include <linux/sockios.h> 00077 #include <linux/net.h> 00078 #include <linux/fcntl.h> 00079 #include <linux/mm.h> 00080 #include <linux/interrupt.h> 00081 #include <linux/proc_fs.h> 00082 #include <linux/stat.h> 00083 #include <linux/init.h> 00084 #include <linux/poll.h> 00085 #include <linux/netfilter_ipv4.h> 00086 00087 #include <asm/uaccess.h> 00088 #include <asm/system.h> 00089 00090 #include <linux/smp_lock.h> 00091 #include <linux/inet.h> 00092 #include <linux/netdevice.h> 00093 #include <linux/brlock.h> 00094 #include <net/ip.h> 00095 #include <net/protocol.h> 00096 #include <net/arp.h> 00097 #include <net/route.h> 00098 #include <net/tcp.h> 00099 #include <net/udp.h> 00100 #include <linux/skbuff.h> 00101 #include <net/sock.h> 00102 #include <net/raw.h> 00103 #include <net/icmp.h> 00104 #include <net/ipip.h> 00105 #include <net/inet_common.h> 00106 #ifdef CONFIG_IP_MROUTE 00107 #include <linux/mroute.h> 00108 #endif 00109 #include <linux/if_bridge.h> 00110 #ifdef CONFIG_KMOD 00111 #include <linux/kmod.h> 00112 #endif 00113 #ifdef CONFIG_NET_DIVERT 00114 #include <linux/divert.h> 00115 #endif /* CONFIG_NET_DIVERT */ 00116 #if defined(CONFIG_NET_RADIO) || defined(CONFIG_NET_PCMCIA_RADIO) 00117 #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ 00118 #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */ 00119 00120 struct linux_mib net_statistics[NR_CPUS*2]; 00121 00122 #ifdef INET_REFCNT_DEBUG 00123 atomic_t inet_sock_nr; 00124 #endif 00125 00126 extern int raw_get_info(char *, char **, off_t, int); 00127 extern int snmp_get_info(char *, char **, off_t, int); 00128 extern int netstat_get_info(char *, char **, off_t, int); 00129 extern int afinet_get_info(char *, char **, off_t, int); 00130 extern int tcp_get_info(char *, char **, off_t, int); 00131 extern int udp_get_info(char *, char **, off_t, int); 00132 extern void ip_mc_drop_socket(struct sock *sk); 00133 00134 #ifdef CONFIG_DLCI 00135 extern int dlci_ioctl(unsigned int, void*); 00136 #endif 00137 00138 #ifdef CONFIG_DLCI_MODULE 00139 int (*dlci_ioctl_hook)(unsigned int, void *); 00140 #endif 00141 00142 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 00143 int (*br_ioctl_hook)(unsigned long); 00144 #endif 00145 00146 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) 00147 int (*vlan_ioctl_hook)(unsigned long arg); 00148 #endif 00149 00150 /* The inetsw table contains everything that inet_create needs to 00151 * build a new socket. 00152 */ 00153 struct list_head inetsw[SOCK_MAX]; 00154 00155 /* New destruction routine */ 00156 00157 void inet_sock_destruct(struct sock *sk) 00158 { 00159 __skb_queue_purge(&sk->receive_queue); 00160 __skb_queue_purge(&sk->error_queue); 00161 00162 if (sk->type == SOCK_STREAM && sk->state != TCP_CLOSE) { 00163 printk("Attempt to release TCP socket in state %d %p\n", 00164 sk->state, 00165 sk); 00166 return; 00167 } 00168 if (!sk->dead) { 00169 printk("Attempt to release alive inet socket %p\n", sk); 00170 return; 00171 } 00172 00173 BUG_TRAP(atomic_read(&sk->rmem_alloc) == 0); 00174 BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0); 00175 BUG_TRAP(sk->wmem_queued == 0); 00176 BUG_TRAP(sk->forward_alloc == 0); 00177 00178 if (sk->protinfo.af_inet.opt) 00179 kfree(sk->protinfo.af_inet.opt); 00180 dst_release(sk->dst_cache); 00181 #ifdef INET_REFCNT_DEBUG 00182 atomic_dec(&inet_sock_nr); 00183 printk(KERN_DEBUG "INET socket %p released, %d are still alive\n", sk, atomic_read(&inet_sock_nr)); 00184 #endif 00185 } 00186 00187 void inet_sock_release(struct sock *sk) 00188 { 00189 if (sk->prot->destroy) 00190 sk->prot->destroy(sk); 00191 00192 /* Observation: when inet_sock_release is called, processes have 00193 * no access to socket. But net still has. 00194 * Step one, detach it from networking: 00195 * 00196 * A. Remove from hash tables. 00197 */ 00198 00199 sk->prot->unhash(sk); 00200 00201 /* In this point socket cannot receive new packets, 00202 * but it is possible that some packets are in flight 00203 * because some CPU runs receiver and did hash table lookup 00204 * before we unhashed socket. They will achieve receive queue 00205 * and will be purged by socket destructor. 00206 * 00207 * Also we still have packets pending on receive 00208 * queue and probably, our own packets waiting in device queues. 00209 * sock_destroy will drain receive queue, but transmitted 00210 * packets will delay socket destruction until the last reference 00211 * will be released. 00212 */ 00213 00214 sock_orphan(sk); 00215 00216 #ifdef INET_REFCNT_DEBUG 00217 if (atomic_read(&sk->refcnt) != 1) { 00218 printk(KERN_DEBUG "Destruction inet %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt)); 00219 } 00220 #endif 00221 sock_put(sk); 00222 } 00223 00224 00225 /* 00226 * The routines beyond this point handle the behaviour of an AF_INET 00227 * socket object. Mostly it punts to the subprotocols of IP to do 00228 * the work. 00229 */ 00230 00231 00232 /* 00233 * Set socket options on an inet socket. 00234 */ 00235 00236 int inet_setsockopt(struct socket *sock, int level, int optname, 00237 char *optval, int optlen) 00238 { 00239 struct sock *sk=sock->sk; 00240 00241 return sk->prot->setsockopt(sk,level,optname,optval,optlen); 00242 } 00243 00244 /* 00245 * Get a socket option on an AF_INET socket. 00246 * 00247 * FIX: POSIX 1003.1g is very ambiguous here. It states that 00248 * asynchronous errors should be reported by getsockopt. We assume 00249 * this means if you specify SO_ERROR (otherwise whats the point of it). 00250 */ 00251 00252 int inet_getsockopt(struct socket *sock, int level, int optname, 00253 char *optval, int *optlen) 00254 { 00255 struct sock *sk=sock->sk; 00256 00257 return sk->prot->getsockopt(sk,level,optname,optval,optlen); 00258 } 00259 00260 /* 00261 * Automatically bind an unbound socket. 00262 */ 00263 00264 static int inet_autobind(struct sock *sk) 00265 { 00266 /* We may need to bind the socket. */ 00267 lock_sock(sk); 00268 if (sk->num == 0) { 00269 if (sk->prot->get_port(sk, 0) != 0) { 00270 release_sock(sk); 00271 return -EAGAIN; 00272 } 00273 sk->sport = htons(sk->num); 00274 } 00275 release_sock(sk); 00276 return 0; 00277 } 00278 00279 /* 00280 * Move a socket into listening state. 00281 */ 00282 00283 int inet_listen(struct socket *sock, int backlog) 00284 { 00285 struct sock *sk = sock->sk; 00286 unsigned char old_state; 00287 int err; 00288 00289 lock_sock(sk); 00290 00291 err = -EINVAL; 00292 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) 00293 goto out; 00294 00295 old_state = sk->state; 00296 if (!((1<<old_state)&(TCPF_CLOSE|TCPF_LISTEN))) 00297 goto out; 00298 00299 /* Really, if the socket is already in listen state 00300 * we can only allow the backlog to be adjusted. 00301 */ 00302 if (old_state != TCP_LISTEN) { 00303 err = tcp_listen_start(sk); 00304 if (err) 00305 goto out; 00306 } 00307 sk->max_ack_backlog = backlog; 00308 err = 0; 00309 00310 out: 00311 release_sock(sk); 00312 return err; 00313 } 00314 00315 /* 00316 * Create an inet socket. 00317 */ 00318 00319 static int inet_create(struct socket *sock, int protocol) 00320 { 00321 struct sock *sk; 00322 struct list_head *p; 00323 struct inet_protosw *answer; 00324 00325 sock->state = SS_UNCONNECTED; 00326 sk = sk_alloc(PF_INET, GFP_KERNEL, 1); 00327 if (sk == NULL) 00328 goto do_oom; 00329 00330 /* Look for the requested type/protocol pair. */ 00331 answer = NULL; 00332 br_read_lock_bh(BR_NETPROTO_LOCK); 00333 list_for_each(p, &inetsw[sock->type]) { 00334 answer = list_entry(p, struct inet_protosw, list); 00335 00336 /* Check the non-wild match. */ 00337 if (protocol == answer->protocol) { 00338 if (protocol != IPPROTO_IP) 00339 break; 00340 } else { 00341 /* Check for the two wild cases. */ 00342 if (IPPROTO_IP == protocol) { 00343 protocol = answer->protocol; 00344 break; 00345 } 00346 if (IPPROTO_IP == answer->protocol) 00347 break; 00348 } 00349 answer = NULL; 00350 } 00351 br_read_unlock_bh(BR_NETPROTO_LOCK); 00352 00353 if (!answer) 00354 goto free_and_badtype; 00355 if (answer->capability > 0 && !capable(answer->capability)) 00356 goto free_and_badperm; 00357 if (!protocol) 00358 goto free_and_noproto; 00359 00360 sock->ops = answer->ops; 00361 sk->prot = answer->prot; 00362 sk->no_check = answer->no_check; 00363 if (INET_PROTOSW_REUSE & answer->flags) 00364 sk->reuse = 1; 00365 00366 if (SOCK_RAW == sock->type) { 00367 sk->num = protocol; 00368 if (IPPROTO_RAW == protocol) 00369 sk->protinfo.af_inet.hdrincl = 1; 00370 } 00371 00372 if (ipv4_config.no_pmtu_disc) 00373 sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT; 00374 else 00375 sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_WANT; 00376 00377 sk->protinfo.af_inet.id = 0; 00378 00379 sock_init_data(sock,sk); 00380 00381 sk->destruct = inet_sock_destruct; 00382 00383 sk->zapped = 0; 00384 sk->family = PF_INET; 00385 sk->protocol = protocol; 00386 00387 sk->backlog_rcv = sk->prot->backlog_rcv; 00388 00389 sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl; 00390 00391 sk->protinfo.af_inet.mc_loop = 1; 00392 sk->protinfo.af_inet.mc_ttl = 1; 00393 sk->protinfo.af_inet.mc_index = 0; 00394 sk->protinfo.af_inet.mc_list = NULL; 00395 00396 #ifdef INET_REFCNT_DEBUG 00397 atomic_inc(&inet_sock_nr); 00398 #endif 00399 00400 if (sk->num) { 00401 /* It assumes that any protocol which allows 00402 * the user to assign a number at socket 00403 * creation time automatically 00404 * shares. 00405 */ 00406 sk->sport = htons(sk->num); 00407 00408 /* Add to protocol hash chains. */ 00409 sk->prot->hash(sk); 00410 } 00411 00412 if (sk->prot->init) { 00413 int err = sk->prot->init(sk); 00414 if (err != 0) { 00415 inet_sock_release(sk); 00416 return err; 00417 } 00418 } 00419 return 0; 00420 00421 free_and_badtype: 00422 sk_free(sk); 00423 return -ESOCKTNOSUPPORT; 00424 00425 free_and_badperm: 00426 sk_free(sk); 00427 return -EPERM; 00428 00429 free_and_noproto: 00430 sk_free(sk); 00431 return -EPROTONOSUPPORT; 00432 00433 do_oom: 00434 return -ENOBUFS; 00435 } 00436 00437 00438 /* 00439 * The peer socket should always be NULL (or else). When we call this 00440 * function we are destroying the object and from then on nobody 00441 * should refer to it. 00442 */ 00443 00444 int inet_release(struct socket *sock) 00445 { 00446 struct sock *sk = sock->sk; 00447 00448 if (sk) { 00449 long timeout; 00450 00451 /* Applications forget to leave groups before exiting */ 00452 ip_mc_drop_socket(sk); 00453 00454 /* If linger is set, we don't return until the close 00455 * is complete. Otherwise we return immediately. The 00456 * actually closing is done the same either way. 00457 * 00458 * If the close is due to the process exiting, we never 00459 * linger.. 00460 */ 00461 timeout = 0; 00462 if (sk->linger && !(current->flags & PF_EXITING)) 00463 timeout = sk->lingertime; 00464 sock->sk = NULL; 00465 sk->prot->close(sk, timeout); 00466 } 00467 return(0); 00468 } 00469 00470 /* It is off by default, see below. */ 00471 int sysctl_ip_nonlocal_bind; 00472 00473 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 00474 { 00475 struct sockaddr_in *addr=(struct sockaddr_in *)uaddr; 00476 struct sock *sk=sock->sk; 00477 unsigned short snum; 00478 int chk_addr_ret; 00479 int err; 00480 00481 /* If the socket has its own bind function then use it. (RAW) */ 00482 if(sk->prot->bind) 00483 return sk->prot->bind(sk, uaddr, addr_len); 00484 00485 if (addr_len < sizeof(struct sockaddr_in)) 00486 return -EINVAL; 00487 00488 chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); 00489 00490 /* Not specified by any standard per-se, however it breaks too 00491 * many applications when removed. It is unfortunate since 00492 * allowing applications to make a non-local bind solves 00493 * several problems with systems using dynamic addressing. 00494 * (ie. your servers still start up even if your ISDN link 00495 * is temporarily down) 00496 */ 00497 if (sysctl_ip_nonlocal_bind == 0 && 00498 sk->protinfo.af_inet.freebind == 0 && 00499 addr->sin_addr.s_addr != INADDR_ANY && 00500 chk_addr_ret != RTN_LOCAL && 00501 chk_addr_ret != RTN_MULTICAST && 00502 chk_addr_ret != RTN_BROADCAST) 00503 return -EADDRNOTAVAIL; 00504 00505 snum = ntohs(addr->sin_port); 00506 if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 00507 return -EACCES; 00508 00509 /* We keep a pair of addresses. rcv_saddr is the one 00510 * used by hash lookups, and saddr is used for transmit. 00511 * 00512 * In the BSD API these are the same except where it 00513 * would be illegal to use them (multicast/broadcast) in 00514 * which case the sending device address is used. 00515 */ 00516 lock_sock(sk); 00517 00518 /* Check these errors (active socket, double bind). */ 00519 err = -EINVAL; 00520 if ((sk->state != TCP_CLOSE) || 00521 (sk->num != 0)) 00522 goto out; 00523 00524 sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr; 00525 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) 00526 sk->saddr = 0; /* Use device */ 00527 00528 /* Make sure we are allowed to bind here. */ 00529 if (sk->prot->get_port(sk, snum) != 0) { 00530 sk->saddr = sk->rcv_saddr = 0; 00531 err = -EADDRINUSE; 00532 goto out; 00533 } 00534 00535 if (sk->rcv_saddr) 00536 sk->userlocks |= SOCK_BINDADDR_LOCK; 00537 if (snum) 00538 sk->userlocks |= SOCK_BINDPORT_LOCK; 00539 sk->sport = htons(sk->num); 00540 sk->daddr = 0; 00541 sk->dport = 0; 00542 sk_dst_reset(sk); 00543 err = 0; 00544 out: 00545 release_sock(sk); 00546 return err; 00547 } 00548 00549 int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, 00550 int addr_len, int flags) 00551 { 00552 struct sock *sk=sock->sk; 00553 00554 if (uaddr->sa_family == AF_UNSPEC) 00555 return sk->prot->disconnect(sk, flags); 00556 00557 if (sk->num==0 && inet_autobind(sk) != 0) 00558 return -EAGAIN; 00559 return sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len); 00560 } 00561 00562 static long inet_wait_for_connect(struct sock *sk, long timeo) 00563 { 00564 DECLARE_WAITQUEUE(wait, current); 00565 00566 __set_current_state(TASK_INTERRUPTIBLE); 00567 add_wait_queue(sk->sleep, &wait); 00568 00569 /* Basic assumption: if someone sets sk->err, he _must_ 00570 * change state of the socket from TCP_SYN_*. 00571 * Connect() does not allow to get error notifications 00572 * without closing the socket. 00573 */ 00574 while ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) { 00575 release_sock(sk); 00576 timeo = schedule_timeout(timeo); 00577 lock_sock(sk); 00578 if (signal_pending(current) || !timeo) 00579 break; 00580 set_current_state(TASK_INTERRUPTIBLE); 00581 } 00582 __set_current_state(TASK_RUNNING); 00583 remove_wait_queue(sk->sleep, &wait); 00584 return timeo; 00585 } 00586 00587 /* 00588 * Connect to a remote host. There is regrettably still a little 00589 * TCP 'magic' in here. 00590 */ 00591 00592 int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, 00593 int addr_len, int flags) 00594 { 00595 struct sock *sk=sock->sk; 00596 int err; 00597 long timeo; 00598 00599 lock_sock(sk); 00600 00601 if (uaddr->sa_family == AF_UNSPEC) { 00602 err = sk->prot->disconnect(sk, flags); 00603 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 00604 goto out; 00605 } 00606 00607 switch (sock->state) { 00608 default: 00609 err = -EINVAL; 00610 goto out; 00611 case SS_CONNECTED: 00612 err = -EISCONN; 00613 goto out; 00614 case SS_CONNECTING: 00615 err = -EALREADY; 00616 /* Fall out of switch with err, set for this state */ 00617 break; 00618 case SS_UNCONNECTED: 00619 err = -EISCONN; 00620 if (sk->state != TCP_CLOSE) 00621 goto out; 00622 00623 err = sk->prot->connect(sk, uaddr, addr_len); 00624 if (err < 0) 00625 goto out; 00626 00627 sock->state = SS_CONNECTING; 00628 00629 /* Just entered SS_CONNECTING state; the only 00630 * difference is that return value in non-blocking 00631 * case is EINPROGRESS, rather than EALREADY. 00632 */ 00633 err = -EINPROGRESS; 00634 break; 00635 } 00636 00637 timeo = sock_sndtimeo(sk, flags&O_NONBLOCK); 00638 00639 if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) { 00640 /* Error code is set above */ 00641 if (!timeo || !inet_wait_for_connect(sk, timeo)) 00642 goto out; 00643 00644 err = sock_intr_errno(timeo); 00645 if (signal_pending(current)) 00646 goto out; 00647 } 00648 00649 /* Connection was closed by RST, timeout, ICMP error 00650 * or another process disconnected us. 00651 */ 00652 if (sk->state == TCP_CLOSE) 00653 goto sock_error; 00654 00655 /* sk->err may be not zero now, if RECVERR was ordered by user 00656 * and error was received after socket entered established state. 00657 * Hence, it is handled normally after connect() return successfully. 00658 */ 00659 00660 sock->state = SS_CONNECTED; 00661 err = 0; 00662 out: 00663 release_sock(sk); 00664 return err; 00665 00666 sock_error: 00667 err = sock_error(sk) ? : -ECONNABORTED; 00668 sock->state = SS_UNCONNECTED; 00669 if (sk->prot->disconnect(sk, flags)) 00670 sock->state = SS_DISCONNECTING; 00671 goto out; 00672 } 00673 00674 /* 00675 * Accept a pending connection. The TCP layer now gives BSD semantics. 00676 */ 00677 00678 int inet_accept(struct socket *sock, struct socket *newsock, int flags) 00679 { 00680 struct sock *sk1 = sock->sk; 00681 struct sock *sk2; 00682 int err = -EINVAL; 00683 00684 if((sk2 = sk1->prot->accept(sk1,flags,&err)) == NULL) 00685 goto do_err; 00686 00687 lock_sock(sk2); 00688 00689 BUG_TRAP((1<<sk2->state)&(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_CLOSE)); 00690 00691 sock_graft(sk2, newsock); 00692 00693 newsock->state = SS_CONNECTED; 00694 release_sock(sk2); 00695 return 0; 00696 00697 do_err: 00698 return err; 00699 } 00700 00701 00702 /* 00703 * This does both peername and sockname. 00704 */ 00705 00706 int inet_getname(struct socket *sock, struct sockaddr *uaddr, 00707 int *uaddr_len, int peer) 00708 { 00709 struct sock *sk = sock->sk; 00710 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 00711 00712 sin->sin_family = AF_INET; 00713 if (peer) { 00714 if (!sk->dport) 00715 return -ENOTCONN; 00716 if (((1<<sk->state)&(TCPF_CLOSE|TCPF_SYN_SENT)) && peer == 1) 00717 return -ENOTCONN; 00718 sin->sin_port = sk->dport; 00719 sin->sin_addr.s_addr = sk->daddr; 00720 } else { 00721 __u32 addr = sk->rcv_saddr; 00722 if (!addr) 00723 addr = sk->saddr; 00724 sin->sin_port = sk->sport; 00725 sin->sin_addr.s_addr = addr; 00726 } 00727 *uaddr_len = sizeof(*sin); 00728 return(0); 00729 } 00730 00731 00732 00733 int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size, 00734 int flags, struct scm_cookie *scm) 00735 { 00736 struct sock *sk = sock->sk; 00737 int addr_len = 0; 00738 int err; 00739 00740 err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT, 00741 flags&~MSG_DONTWAIT, &addr_len); 00742 if (err >= 0) 00743 msg->msg_namelen = addr_len; 00744 return err; 00745 } 00746 00747 00748 int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size, 00749 struct scm_cookie *scm) 00750 { 00751 struct sock *sk = sock->sk; 00752 00753 /* We may need to bind the socket. */ 00754 if (sk->num==0 && inet_autobind(sk) != 0) 00755 return -EAGAIN; 00756 00757 return sk->prot->sendmsg(sk, msg, size); 00758 } 00759 00760 int inet_shutdown(struct socket *sock, int how) 00761 { 00762 struct sock *sk = sock->sk; 00763 int err = 0; 00764 00765 /* This should really check to make sure 00766 * the socket is a TCP socket. (WHY AC...) 00767 */ 00768 how++; /* maps 0->1 has the advantage of making bit 1 rcvs and 00769 1->2 bit 2 snds. 00770 2->3 */ 00771 if ((how & ~SHUTDOWN_MASK) || how==0) /* MAXINT->0 */ 00772 return -EINVAL; 00773 00774 lock_sock(sk); 00775 if (sock->state == SS_CONNECTING) { 00776 if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE)) 00777 sock->state = SS_DISCONNECTING; 00778 else 00779 sock->state = SS_CONNECTED; 00780 } 00781 00782 switch (sk->state) { 00783 case TCP_CLOSE: 00784 err = -ENOTCONN; 00785 /* Hack to wake up other listeners, who can poll for 00786 POLLHUP, even on eg. unconnected UDP sockets -- RR */ 00787 default: 00788 sk->shutdown |= how; 00789 if (sk->prot->shutdown) 00790 sk->prot->shutdown(sk, how); 00791 break; 00792 00793 /* Remaining two branches are temporary solution for missing 00794 * close() in multithreaded environment. It is _not_ a good idea, 00795 * but we have no choice until close() is repaired at VFS level. 00796 */ 00797 case TCP_LISTEN: 00798 if (!(how & RCV_SHUTDOWN)) 00799 break; 00800 /* Fall through */ 00801 case TCP_SYN_SENT: 00802 err = sk->prot->disconnect(sk, O_NONBLOCK); 00803 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 00804 break; 00805 } 00806 00807 /* Wake up anyone sleeping in poll. */ 00808 sk->state_change(sk); 00809 release_sock(sk); 00810 return err; 00811 } 00812 00813 /* 00814 * ioctl() calls you can issue on an INET socket. Most of these are 00815 * device configuration and stuff and very rarely used. Some ioctls 00816 * pass on to the socket itself. 00817 * 00818 * NOTE: I like the idea of a module for the config stuff. ie ifconfig 00819 * loads the devconfigure module does its configuring and unloads it. 00820 * There's a good 20K of config code hanging around the kernel. 00821 */ 00822 00823 int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 00824 { 00825 struct sock *sk = sock->sk; 00826 int err; 00827 int pid; 00828 00829 switch(cmd) { 00830 case FIOSETOWN: 00831 case SIOCSPGRP: 00832 err = get_user(pid, (int *) arg); 00833 if (err) 00834 return err; 00835 if (current->pid != pid && current->pgrp != -pid && 00836 !capable(CAP_NET_ADMIN)) 00837 return -EPERM; 00838 sk->proc = pid; 00839 return(0); 00840 case FIOGETOWN: 00841 case SIOCGPGRP: 00842 return put_user(sk->proc, (int *)arg); 00843 case SIOCGSTAMP: 00844 if(sk->stamp.tv_sec==0) 00845 return -ENOENT; 00846 err = copy_to_user((void *)arg,&sk->stamp,sizeof(struct timeval)); 00847 if (err) 00848 err = -EFAULT; 00849 return err; 00850 case SIOCADDRT: 00851 case SIOCDELRT: 00852 case SIOCRTMSG: 00853 return(ip_rt_ioctl(cmd,(void *) arg)); 00854 case SIOCDARP: 00855 case SIOCGARP: 00856 case SIOCSARP: 00857 return(arp_ioctl(cmd,(void *) arg)); 00858 case SIOCGIFADDR: 00859 case SIOCSIFADDR: 00860 case SIOCGIFBRDADDR: 00861 case SIOCSIFBRDADDR: 00862 case SIOCGIFNETMASK: 00863 case SIOCSIFNETMASK: 00864 case SIOCGIFDSTADDR: 00865 case SIOCSIFDSTADDR: 00866 case SIOCSIFPFLAGS: 00867 case SIOCGIFPFLAGS: 00868 case SIOCSIFFLAGS: 00869 return(devinet_ioctl(cmd,(void *) arg)); 00870 case SIOCGIFBR: 00871 case SIOCSIFBR: 00872 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 00873 #ifdef CONFIG_KMOD 00874 if (br_ioctl_hook == NULL) 00875 request_module("bridge"); 00876 #endif 00877 if (br_ioctl_hook != NULL) 00878 return br_ioctl_hook(arg); 00879 #endif 00880 return -ENOPKG; 00881 00882 case SIOCGIFVLAN: 00883 case SIOCSIFVLAN: 00884 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) 00885 #ifdef CONFIG_KMOD 00886 if (vlan_ioctl_hook == NULL) 00887 request_module("8021q"); 00888 #endif 00889 if (vlan_ioctl_hook != NULL) 00890 return vlan_ioctl_hook(arg); 00891 #endif 00892 return -ENOPKG; 00893 00894 case SIOCGIFDIVERT: 00895 case SIOCSIFDIVERT: 00896 #ifdef CONFIG_NET_DIVERT 00897 return divert_ioctl(cmd, (struct divert_cf *) arg); 00898 #else 00899 return -ENOPKG; 00900 #endif /* CONFIG_NET_DIVERT */ 00901 00902 case SIOCADDDLCI: 00903 case SIOCDELDLCI: 00904 #ifdef CONFIG_DLCI 00905 lock_kernel(); 00906 err = dlci_ioctl(cmd, (void *) arg); 00907 unlock_kernel(); 00908 return err; 00909 #endif 00910 00911 #ifdef CONFIG_DLCI_MODULE 00912 00913 #ifdef CONFIG_KMOD 00914 if (dlci_ioctl_hook == NULL) 00915 request_module("dlci"); 00916 #endif 00917 00918 if (dlci_ioctl_hook) { 00919 lock_kernel(); 00920 err = (*dlci_ioctl_hook)(cmd, (void *) arg); 00921 unlock_kernel(); 00922 return err; 00923 } 00924 #endif 00925 return -ENOPKG; 00926 00927 default: 00928 if ((cmd >= SIOCDEVPRIVATE) && 00929 (cmd <= (SIOCDEVPRIVATE + 15))) 00930 return(dev_ioctl(cmd,(void *) arg)); 00931 00932 #ifdef WIRELESS_EXT 00933 if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST)) 00934 return(dev_ioctl(cmd,(void *) arg)); 00935 #endif /* WIRELESS_EXT */ 00936 00937 if (sk->prot->ioctl==NULL || (err=sk->prot->ioctl(sk, cmd, arg))==-ENOIOCTLCMD) 00938 return(dev_ioctl(cmd,(void *) arg)); 00939 return err; 00940 } 00941 /*NOTREACHED*/ 00942 return(0); 00943 } 00944 00945 struct proto_ops inet_stream_ops = { 00946 family: PF_INET, 00947 00948 release: inet_release, 00949 bind: inet_bind, 00950 connect: inet_stream_connect, 00951 socketpair: sock_no_socketpair, 00952 accept: inet_accept, 00953 getname: inet_getname, 00954 poll: tcp_poll, 00955 ioctl: inet_ioctl, 00956 listen: inet_listen, 00957 shutdown: inet_shutdown, 00958 setsockopt: inet_setsockopt, 00959 getsockopt: inet_getsockopt, 00960 sendmsg: inet_sendmsg, 00961 recvmsg: inet_recvmsg, 00962 mmap: sock_no_mmap, 00963 sendpage: tcp_sendpage 00964 }; 00965 00966 struct proto_ops inet_dgram_ops = { 00967 family: PF_INET, 00968 00969 release: inet_release, 00970 bind: inet_bind, 00971 connect: inet_dgram_connect, 00972 socketpair: sock_no_socketpair, 00973 accept: sock_no_accept, 00974 getname: inet_getname, 00975 poll: datagram_poll, 00976 ioctl: inet_ioctl, 00977 listen: sock_no_listen, 00978 shutdown: inet_shutdown, 00979 setsockopt: inet_setsockopt, 00980 getsockopt: inet_getsockopt, 00981 sendmsg: inet_sendmsg, 00982 recvmsg: inet_recvmsg, 00983 mmap: sock_no_mmap, 00984 sendpage: sock_no_sendpage, 00985 }; 00986 00987 struct net_proto_family inet_family_ops = { 00988 family: PF_INET, 00989 create: inet_create 00990 }; 00991 00992 00993 extern void tcp_init(void); 00994 extern void tcp_v4_init(struct net_proto_family *); 00995 00996 /* Upon startup we insert all the elements in inetsw_array[] into 00997 * the linked list inetsw. 00998 */ 00999 static struct inet_protosw inetsw_array[] = 01000 { 01001 { 01002 type: SOCK_STREAM, 01003 protocol: IPPROTO_TCP, 01004 prot: &tcp_prot, 01005 ops: &inet_stream_ops, 01006 capability: -1, 01007 no_check: 0, 01008 flags: INET_PROTOSW_PERMANENT, 01009 }, 01010 01011 { 01012 type: SOCK_DGRAM, 01013 protocol: IPPROTO_UDP, 01014 prot: &udp_prot, 01015 ops: &inet_dgram_ops, 01016 capability: -1, 01017 no_check: UDP_CSUM_DEFAULT, 01018 flags: INET_PROTOSW_PERMANENT, 01019 }, 01020 01021 01022 { 01023 type: SOCK_RAW, 01024 protocol: IPPROTO_IP, /* wild card */ 01025 prot: &raw_prot, 01026 ops: &inet_dgram_ops, 01027 capability: CAP_NET_RAW, 01028 no_check: UDP_CSUM_DEFAULT, 01029 flags: INET_PROTOSW_REUSE, 01030 } 01031 }; 01032 01033 #define INETSW_ARRAY_LEN (sizeof(inetsw_array) / sizeof(struct inet_protosw)) 01034 01035 void 01036 inet_register_protosw(struct inet_protosw *p) 01037 { 01038 struct list_head *lh; 01039 struct inet_protosw *answer; 01040 int protocol = p->protocol; 01041 struct list_head *last_perm; 01042 01043 br_write_lock_bh(BR_NETPROTO_LOCK); 01044 01045 if (p->type > SOCK_MAX) 01046 goto out_illegal; 01047 01048 /* If we are trying to override a permanent protocol, bail. */ 01049 answer = NULL; 01050 last_perm = &inetsw[p->type]; 01051 list_for_each(lh, &inetsw[p->type]) { 01052 answer = list_entry(lh, struct inet_protosw, list); 01053 01054 /* Check only the non-wild match. */ 01055 if (INET_PROTOSW_PERMANENT & answer->flags) { 01056 if (protocol == answer->protocol) 01057 break; 01058 last_perm = lh; 01059 } 01060 01061 answer = NULL; 01062 } 01063 if (answer) 01064 goto out_permanent; 01065 01066 /* Add the new entry after the last permanent entry if any, so that 01067 * the new entry does not override a permanent entry when matched with 01068 * a wild-card protocol. But it is allowed to override any existing 01069 * non-permanent entry. This means that when we remove this entry, the 01070 * system automatically returns to the old behavior. 01071 */ 01072 list_add(&p->list, last_perm); 01073 out: 01074 br_write_unlock_bh(BR_NETPROTO_LOCK); 01075 return; 01076 01077 out_permanent: 01078 printk(KERN_ERR "Attempt to override permanent protocol %d.\n", 01079 protocol); 01080 goto out; 01081 01082 out_illegal: 01083 printk(KERN_ERR 01084 "Ignoring attempt to register illegal socket type %d.\n", 01085 p->type); 01086 goto out; 01087 } 01088 01089 void 01090 inet_unregister_protosw(struct inet_protosw *p) 01091 { 01092 if (INET_PROTOSW_PERMANENT & p->flags) { 01093 printk(KERN_ERR 01094 "Attempt to unregister permanent protocol %d.\n", 01095 p->protocol); 01096 } else { 01097 br_write_lock_bh(BR_NETPROTO_LOCK); 01098 list_del(&p->list); 01099 br_write_unlock_bh(BR_NETPROTO_LOCK); 01100 } 01101 } 01102 01103 extern void ipfrag_init(void); 01104 01105 /* 01106 * Called by socket.c on kernel startup. 01107 */ 01108 01109 static int __init inet_init(void) 01110 { 01111 struct sk_buff *dummy_skb; 01112 struct inet_protocol *p; 01113 struct inet_protosw *q; 01114 struct list_head *r; 01115 01116 printk(KERN_INFO "NET4: Linux TCP/IP 1.0 for NET4.0\n"); 01117 01118 if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) { 01119 printk(KERN_CRIT "inet_proto_init: panic\n"); 01120 return -EINVAL; 01121 } 01122 01123 /* 01124 * Tell SOCKET that we are alive... 01125 */ 01126 01127 (void) sock_register(&inet_family_ops); 01128 01129 /* 01130 * Add all the protocols. 01131 */ 01132 01133 printk(KERN_INFO "IP Protocols: "); 01134 for (p = inet_protocol_base; p != NULL;) { 01135 struct inet_protocol *tmp = (struct inet_protocol *) p->next; 01136 inet_add_protocol(p); 01137 printk("%s%s",p->name,tmp?", ":"\n"); 01138 p = tmp; 01139 } 01140 01141 /* Register the socket-side information for inet_create. */ 01142 for(r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) 01143 INIT_LIST_HEAD(r); 01144 01145 for(q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) 01146 inet_register_protosw(q); 01147 01148 /* 01149 * Set the ARP module up 01150 */ 01151 01152 arp_init(); 01153 01154 /* 01155 * Set the IP module up 01156 */ 01157 01158 ip_init(); 01159 01160 tcp_v4_init(&inet_family_ops); 01161 01162 /* Setup TCP slab cache for open requests. */ 01163 tcp_init(); 01164 01165 01166 /* 01167 * Set the ICMP layer up 01168 */ 01169 01170 icmp_init(&inet_family_ops); 01171 01172 /* I wish inet_add_protocol had no constructor hook... 01173 I had to move IPIP from net/ipv4/protocol.c :-( --ANK 01174 */ 01175 #ifdef CONFIG_NET_IPIP 01176 ipip_init(); 01177 #endif 01178 #ifdef CONFIG_NET_IPGRE 01179 ipgre_init(); 01180 #endif 01181 01182 /* 01183 * Initialise the multicast router 01184 */ 01185 #if defined(CONFIG_IP_MROUTE) 01186 ip_mr_init(); 01187 #endif 01188 01189 /* 01190 * Create all the /proc entries. 01191 */ 01192 #ifdef CONFIG_PROC_FS 01193 proc_net_create ("raw", 0, raw_get_info); 01194 proc_net_create ("netstat", 0, netstat_get_info); 01195 proc_net_create ("snmp", 0, snmp_get_info); 01196 proc_net_create ("sockstat", 0, afinet_get_info); 01197 proc_net_create ("tcp", 0, tcp_get_info); 01198 proc_net_create ("udp", 0, udp_get_info); 01199 #endif /* CONFIG_PROC_FS */ 01200 01201 ipfrag_init(); 01202 01203 return 0; 01204 } 01205 module_init(inet_init);

Generated on Wed Dec 1 21:25:29 2004 for Linux 2.4.23 Networking by doxygen 1.3.8