Main Page | Class List | File List | Class Members | File Members

ip_vs_ctl.c

Go to the documentation of this file.
00001 /* 00002 * IPVS An implementation of the IP virtual server support for the 00003 * LINUX operating system. IPVS is now implemented as a module 00004 * over the NetFilter framework. IPVS can be used to build a 00005 * high-performance and highly available server based on a 00006 * cluster of servers. 00007 * 00008 * Version: $Id: ip_vs_ctl.c,v 1.30.2.3 2003/07/29 14:37:12 wensong Exp $ 00009 * 00010 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 00011 * Peter Kese <peter.kese@ijs.si> 00012 * Julian Anastasov <ja@ssi.bg> 00013 * 00014 * This program is free software; you can redistribute it and/or 00015 * modify it under the terms of the GNU General Public License 00016 * as published by the Free Software Foundation; either version 00017 * 2 of the License, or (at your option) any later version. 00018 * 00019 * Changes: 00020 * 00021 */ 00022 00023 #include <linux/config.h> 00024 #include <linux/kernel.h> 00025 #include <linux/module.h> 00026 #include <linux/init.h> 00027 #include <linux/types.h> 00028 #include <linux/errno.h> 00029 #include <linux/fs.h> 00030 #include <linux/sysctl.h> 00031 #include <linux/proc_fs.h> 00032 #include <linux/timer.h> 00033 #include <linux/swap.h> 00034 #include <linux/proc_fs.h> 00035 00036 #include <linux/netfilter.h> 00037 #include <linux/netfilter_ipv4.h> 00038 00039 #include <net/ip.h> 00040 #include <net/sock.h> 00041 00042 #include <asm/uaccess.h> 00043 00044 #include <net/ip_vs.h> 00045 00046 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 00047 static DECLARE_MUTEX(__ip_vs_mutex); 00048 00049 /* lock for service table */ 00050 rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED; 00051 00052 /* lock for table with the real services */ 00053 static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED; 00054 00055 /* lock for state and timeout tables */ 00056 static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED; 00057 00058 /* lock for drop entry handling */ 00059 static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED; 00060 00061 /* lock for drop packet handling */ 00062 static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED; 00063 00064 /* 1/rate drop and drop-entry variables */ 00065 int ip_vs_drop_rate = 0; 00066 int ip_vs_drop_counter = 0; 00067 atomic_t ip_vs_dropentry = ATOMIC_INIT(0); 00068 00069 /* number of virtual services */ 00070 static int ip_vs_num_services = 0; 00071 00072 /* sysctl variables */ 00073 static int sysctl_ip_vs_drop_entry = 0; 00074 static int sysctl_ip_vs_drop_packet = 0; 00075 static int sysctl_ip_vs_secure_tcp = 0; 00076 static int sysctl_ip_vs_amemthresh = 2048; 00077 static int sysctl_ip_vs_am_droprate = 10; 00078 int sysctl_ip_vs_cache_bypass = 0; 00079 int sysctl_ip_vs_expire_nodest_conn = 0; 00080 int sysctl_ip_vs_sync_threshold = 3; 00081 int sysctl_ip_vs_nat_icmp_send = 0; 00082 00083 #ifdef CONFIG_IP_VS_DEBUG 00084 static int sysctl_ip_vs_debug_level = 0; 00085 00086 int ip_vs_get_debug_level(void) 00087 { 00088 return sysctl_ip_vs_debug_level; 00089 } 00090 #endif 00091 00092 /* 00093 * update_defense_level is called from timer bh and from sysctl. 00094 */ 00095 static void update_defense_level(void) 00096 { 00097 struct sysinfo i; 00098 int availmem; 00099 int nomem; 00100 00101 /* we only count free and buffered memory (in pages) */ 00102 si_meminfo(&i); 00103 availmem = i.freeram + i.bufferram; 00104 00105 nomem = (availmem < sysctl_ip_vs_amemthresh); 00106 00107 /* drop_entry */ 00108 spin_lock(&__ip_vs_dropentry_lock); 00109 switch (sysctl_ip_vs_drop_entry) { 00110 case 0: 00111 atomic_set(&ip_vs_dropentry, 0); 00112 break; 00113 case 1: 00114 if (nomem) { 00115 atomic_set(&ip_vs_dropentry, 1); 00116 sysctl_ip_vs_drop_entry = 2; 00117 } else { 00118 atomic_set(&ip_vs_dropentry, 0); 00119 } 00120 break; 00121 case 2: 00122 if (nomem) { 00123 atomic_set(&ip_vs_dropentry, 1); 00124 } else { 00125 atomic_set(&ip_vs_dropentry, 0); 00126 sysctl_ip_vs_drop_entry = 1; 00127 }; 00128 break; 00129 case 3: 00130 atomic_set(&ip_vs_dropentry, 1); 00131 break; 00132 } 00133 spin_unlock(&__ip_vs_dropentry_lock); 00134 00135 /* drop_packet */ 00136 spin_lock(&__ip_vs_droppacket_lock); 00137 switch (sysctl_ip_vs_drop_packet) { 00138 case 0: 00139 ip_vs_drop_rate = 0; 00140 break; 00141 case 1: 00142 if (nomem) { 00143 ip_vs_drop_rate = ip_vs_drop_counter 00144 = sysctl_ip_vs_amemthresh / 00145 (sysctl_ip_vs_amemthresh - availmem); 00146 sysctl_ip_vs_drop_packet = 2; 00147 } else { 00148 ip_vs_drop_rate = 0; 00149 } 00150 break; 00151 case 2: 00152 if (nomem) { 00153 ip_vs_drop_rate = ip_vs_drop_counter 00154 = sysctl_ip_vs_amemthresh / 00155 (sysctl_ip_vs_amemthresh - availmem); 00156 } else { 00157 ip_vs_drop_rate = 0; 00158 sysctl_ip_vs_drop_packet = 1; 00159 } 00160 break; 00161 case 3: 00162 ip_vs_drop_rate = sysctl_ip_vs_am_droprate; 00163 break; 00164 } 00165 spin_unlock(&__ip_vs_droppacket_lock); 00166 00167 /* secure_tcp */ 00168 write_lock(&__ip_vs_securetcp_lock); 00169 switch (sysctl_ip_vs_secure_tcp) { 00170 case 0: 00171 ip_vs_secure_tcp_set(0); 00172 break; 00173 case 1: 00174 if (nomem) { 00175 ip_vs_secure_tcp_set(1); 00176 sysctl_ip_vs_secure_tcp = 2; 00177 } else { 00178 ip_vs_secure_tcp_set(0); 00179 } 00180 break; 00181 case 2: 00182 if (nomem) { 00183 ip_vs_secure_tcp_set(1); 00184 } else { 00185 ip_vs_secure_tcp_set(0); 00186 sysctl_ip_vs_secure_tcp = 1; 00187 } 00188 break; 00189 case 3: 00190 ip_vs_secure_tcp_set(1); 00191 break; 00192 } 00193 write_unlock(&__ip_vs_securetcp_lock); 00194 } 00195 00196 00197 /* 00198 * Timer for checking the defense 00199 */ 00200 static struct timer_list defense_timer; 00201 #define DEFENSE_TIMER_PERIOD 1*HZ 00202 00203 static void defense_timer_handler(unsigned long data) 00204 { 00205 update_defense_level(); 00206 if (atomic_read(&ip_vs_dropentry)) 00207 ip_vs_random_dropentry(); 00208 00209 mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD); 00210 } 00211 00212 00213 /* 00214 * Hash table: for virtual service lookups 00215 */ 00216 #define IP_VS_SVC_TAB_BITS 8 00217 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 00218 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 00219 00220 /* the service table hashed by <protocol, addr, port> */ 00221 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 00222 /* the service table hashed by fwmark */ 00223 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 00224 00225 /* 00226 * Hash table: for real service lookups 00227 */ 00228 #define IP_VS_RTAB_BITS 4 00229 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) 00230 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) 00231 00232 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE]; 00233 00234 /* 00235 * Trash for destinations 00236 */ 00237 static LIST_HEAD(ip_vs_dest_trash); 00238 00239 /* 00240 * FTP & NULL virtual service counters 00241 */ 00242 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0); 00243 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); 00244 00245 00246 /* 00247 * Returns hash value for virtual service 00248 */ 00249 static __inline__ unsigned 00250 ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port) 00251 { 00252 register unsigned porth = ntohs(port); 00253 00254 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth) 00255 & IP_VS_SVC_TAB_MASK; 00256 } 00257 00258 /* 00259 * Returns hash value of fwmark for virtual service lookup 00260 */ 00261 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) 00262 { 00263 return fwmark & IP_VS_SVC_TAB_MASK; 00264 } 00265 00266 /* 00267 * Hashes ip_vs_service in the ip_vs_svc_table by <proto,addr,port> 00268 * or in the ip_vs_svc_fwm_table by fwmark. 00269 * Should be called with locked tables. 00270 * Returns bool success. 00271 */ 00272 static int ip_vs_svc_hash(struct ip_vs_service *svc) 00273 { 00274 unsigned hash; 00275 00276 if (svc->flags & IP_VS_SVC_F_HASHED) { 00277 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, " 00278 "called from %p\n", __builtin_return_address(0)); 00279 return 0; 00280 } 00281 00282 if (svc->fwmark == 0) { 00283 /* 00284 * Hash it by <protocol,addr,port> in ip_vs_svc_table 00285 */ 00286 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port); 00287 list_add(&svc->s_list, &ip_vs_svc_table[hash]); 00288 } else { 00289 /* 00290 * Hash it by fwmark in ip_vs_svc_fwm_table 00291 */ 00292 hash = ip_vs_svc_fwm_hashkey(svc->fwmark); 00293 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 00294 } 00295 00296 svc->flags |= IP_VS_SVC_F_HASHED; 00297 /* increase its refcnt because it is referenced by the svc table */ 00298 atomic_inc(&svc->refcnt); 00299 return 1; 00300 } 00301 00302 00303 /* 00304 * Unhashes ip_vs_service from ip_vs_svc_table/ip_vs_svc_fwm_table. 00305 * Should be called with locked tables. 00306 * Returns bool success. 00307 */ 00308 static int ip_vs_svc_unhash(struct ip_vs_service *svc) 00309 { 00310 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 00311 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, " 00312 "called from %p\n", __builtin_return_address(0)); 00313 return 0; 00314 } 00315 00316 if (svc->fwmark == 0) { 00317 /* 00318 * Remove it from the ip_vs_svc_table table. 00319 */ 00320 list_del(&svc->s_list); 00321 } else { 00322 /* 00323 * Remove it from the ip_vs_svc_fwm_table table. 00324 */ 00325 list_del(&svc->f_list); 00326 } 00327 00328 svc->flags &= ~IP_VS_SVC_F_HASHED; 00329 atomic_dec(&svc->refcnt); 00330 return 1; 00331 } 00332 00333 00334 /* 00335 * Get service by {proto,addr,port} in the service table. 00336 */ 00337 static __inline__ struct ip_vs_service * 00338 __ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport) 00339 { 00340 unsigned hash; 00341 struct ip_vs_service *svc; 00342 struct list_head *l,*e; 00343 00344 /* 00345 * Check for "full" addressed entries 00346 */ 00347 hash = ip_vs_svc_hashkey(protocol, vaddr, vport); 00348 00349 l = &ip_vs_svc_table[hash]; 00350 for (e=l->next; e!=l; e=e->next) { 00351 svc = list_entry(e, struct ip_vs_service, s_list); 00352 if ((svc->addr == vaddr) 00353 && (svc->port == vport) 00354 && (svc->protocol == protocol)) { 00355 /* HIT */ 00356 atomic_inc(&svc->usecnt); 00357 return svc; 00358 } 00359 } 00360 00361 return NULL; 00362 } 00363 00364 00365 /* 00366 * Get service by {fwmark} in the service table. 00367 */ 00368 static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) 00369 { 00370 unsigned hash; 00371 struct ip_vs_service *svc; 00372 struct list_head *l,*e; 00373 00374 /* 00375 * Check for "full" addressed entries 00376 */ 00377 hash = ip_vs_svc_fwm_hashkey(fwmark); 00378 00379 l = &ip_vs_svc_fwm_table[hash]; 00380 for (e=l->next; e!=l; e=e->next) { 00381 svc = list_entry(e, struct ip_vs_service, f_list); 00382 if (svc->fwmark == fwmark) { 00383 /* HIT */ 00384 atomic_inc(&svc->usecnt); 00385 return svc; 00386 } 00387 } 00388 00389 return NULL; 00390 } 00391 00392 struct ip_vs_service * 00393 ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport) 00394 { 00395 struct ip_vs_service *svc; 00396 00397 read_lock(&__ip_vs_svc_lock); 00398 00399 /* 00400 * Check the table hashed by fwmark first 00401 */ 00402 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark))) 00403 goto out; 00404 00405 /* 00406 * Check the table hashed by <protocol,addr,port> 00407 * for "full" addressed entries 00408 */ 00409 svc = __ip_vs_service_get(protocol, vaddr, vport); 00410 00411 if (svc == NULL 00412 && protocol == IPPROTO_TCP 00413 && atomic_read(&ip_vs_ftpsvc_counter) 00414 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { 00415 /* 00416 * Check if ftp service entry exists, the packet 00417 * might belong to FTP data connections. 00418 */ 00419 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT); 00420 } 00421 00422 if (svc == NULL 00423 && atomic_read(&ip_vs_nullsvc_counter)) { 00424 /* 00425 * Check if the catch-all port (port zero) exists 00426 */ 00427 svc = __ip_vs_service_get(protocol, vaddr, 0); 00428 } 00429 00430 out: 00431 read_unlock(&__ip_vs_svc_lock); 00432 00433 IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", 00434 fwmark, ip_vs_proto_name(protocol), 00435 NIPQUAD(vaddr), ntohs(vport), 00436 svc?"hit":"not hit"); 00437 00438 return svc; 00439 } 00440 00441 00442 static inline void 00443 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 00444 { 00445 atomic_inc(&svc->refcnt); 00446 dest->svc = svc; 00447 } 00448 00449 static inline void 00450 __ip_vs_unbind_svc(struct ip_vs_dest *dest) 00451 { 00452 struct ip_vs_service *svc = dest->svc; 00453 00454 dest->svc = NULL; 00455 if (atomic_dec_and_test(&svc->refcnt)) 00456 kfree(svc); 00457 } 00458 00459 /* 00460 * Returns hash value for real service 00461 */ 00462 static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port) 00463 { 00464 register unsigned porth = ntohs(port); 00465 00466 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth) 00467 & IP_VS_RTAB_MASK; 00468 } 00469 00470 /* 00471 * Hashes ip_vs_dest in ip_vs_rtable by proto,addr,port. 00472 * should be called with locked tables. 00473 * returns bool success. 00474 */ 00475 static int ip_vs_rs_hash(struct ip_vs_dest *dest) 00476 { 00477 unsigned hash; 00478 00479 if (!list_empty(&dest->d_list)) { 00480 return 0; 00481 } 00482 00483 /* 00484 * Hash by proto,addr,port, 00485 * which are the parameters of the real service. 00486 */ 00487 hash = ip_vs_rs_hashkey(dest->addr, dest->port); 00488 list_add(&dest->d_list, &ip_vs_rtable[hash]); 00489 00490 return 1; 00491 } 00492 00493 /* 00494 * UNhashes ip_vs_dest from ip_vs_rtable. 00495 * should be called with locked tables. 00496 * returns bool success. 00497 */ 00498 static int ip_vs_rs_unhash(struct ip_vs_dest *dest) 00499 { 00500 /* 00501 * Remove it from the ip_vs_rtable table. 00502 */ 00503 if (!list_empty(&dest->d_list)) { 00504 list_del(&dest->d_list); 00505 INIT_LIST_HEAD(&dest->d_list); 00506 } 00507 00508 return 1; 00509 } 00510 00511 /* 00512 * Lookup real service by {proto,addr,port} in the real service table. 00513 */ 00514 struct ip_vs_dest * 00515 ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport) 00516 { 00517 unsigned hash; 00518 struct ip_vs_dest *dest; 00519 struct list_head *l,*e; 00520 00521 /* 00522 * Check for "full" addressed entries 00523 * Return the first found entry 00524 */ 00525 hash = ip_vs_rs_hashkey(daddr, dport); 00526 00527 l = &ip_vs_rtable[hash]; 00528 00529 read_lock(&__ip_vs_rs_lock); 00530 for (e=l->next; e!=l; e=e->next) { 00531 dest = list_entry(e, struct ip_vs_dest, d_list); 00532 if ((dest->addr == daddr) 00533 && (dest->port == dport) 00534 && ((dest->protocol == protocol) || 00535 dest->vfwmark)) { 00536 /* HIT */ 00537 read_unlock(&__ip_vs_rs_lock); 00538 return dest; 00539 } 00540 } 00541 read_unlock(&__ip_vs_rs_lock); 00542 00543 return NULL; 00544 } 00545 00546 /* 00547 * Lookup destination by {addr,port} in the given service 00548 */ 00549 static struct ip_vs_dest * 00550 ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) 00551 { 00552 struct ip_vs_dest *dest; 00553 struct list_head *l, *e; 00554 00555 /* 00556 * Find the destination for the given service 00557 */ 00558 l = &svc->destinations; 00559 for (e=l->next; e!=l; e=e->next) { 00560 dest = list_entry(e, struct ip_vs_dest, n_list); 00561 if ((dest->addr == daddr) && (dest->port == dport)) { 00562 /* HIT */ 00563 return dest; 00564 } 00565 } 00566 00567 return NULL; 00568 } 00569 00570 00571 /* 00572 * Lookup dest by {svc,addr,port} in the destination trash. 00573 * The destination trash is used to hold the destinations that are removed 00574 * from the service table but are still referenced by some conn entries. 00575 * The reason to add the destination trash is when the dest is temporary 00576 * down (either by administrator or by monitor program), the dest can be 00577 * picked back from the trash, the remaining connections to the dest can 00578 * continue, and the counting information of the dest is also useful for 00579 * scheduling. 00580 */ 00581 static struct ip_vs_dest * 00582 ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) 00583 { 00584 struct ip_vs_dest *dest; 00585 struct list_head *l, *e; 00586 00587 /* 00588 * Find the destination in trash 00589 */ 00590 l = &ip_vs_dest_trash; 00591 00592 for (e=l->next; e!=l; e=e->next) { 00593 dest = list_entry(e, struct ip_vs_dest, n_list); 00594 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " 00595 "refcnt=%d\n", 00596 dest->vfwmark, 00597 NIPQUAD(dest->addr), ntohs(dest->port), 00598 atomic_read(&dest->refcnt)); 00599 if (dest->addr == daddr && 00600 dest->port == dport && 00601 dest->vfwmark == svc->fwmark && 00602 dest->protocol == svc->protocol && 00603 (svc->fwmark || 00604 (dest->vaddr == svc->addr && 00605 dest->vport == svc->port))) { 00606 /* HIT */ 00607 return dest; 00608 } 00609 00610 /* 00611 * Try to purge the destination from trash if not referenced 00612 */ 00613 if (atomic_read(&dest->refcnt) == 1) { 00614 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u " 00615 "from trash\n", 00616 dest->vfwmark, 00617 NIPQUAD(dest->addr), ntohs(dest->port)); 00618 e = e->prev; 00619 list_del(&dest->n_list); 00620 __ip_vs_dst_reset(dest); 00621 __ip_vs_unbind_svc(dest); 00622 kfree(dest); 00623 } 00624 } 00625 00626 return NULL; 00627 } 00628 00629 00630 /* 00631 * Clean up all the destinations in the trash 00632 * Called by the ip_vs_control_cleanup() 00633 * 00634 * When the ip_vs_control_clearup is activated by ipvs module exit, 00635 * the service tables must have been flushed and all the connections 00636 * are expired, and the refcnt of each destination in the trash must 00637 * be 1, so we simply release them here. 00638 */ 00639 static void ip_vs_trash_cleanup(void) 00640 { 00641 struct ip_vs_dest *dest; 00642 struct list_head *l; 00643 00644 l = &ip_vs_dest_trash; 00645 00646 while (l->next != l) { 00647 dest = list_entry(l->next, struct ip_vs_dest, n_list); 00648 list_del(&dest->n_list); 00649 __ip_vs_dst_reset(dest); 00650 __ip_vs_unbind_svc(dest); 00651 kfree(dest); 00652 } 00653 } 00654 00655 00656 static inline void 00657 __ip_vs_zero_stats(struct ip_vs_stats *stats) 00658 { 00659 spin_lock_bh(&stats->lock); 00660 memset(stats, 0, (char *)&stats->lock - (char *)stats); 00661 spin_unlock_bh(&stats->lock); 00662 ip_vs_zero_estimator(stats); 00663 } 00664 00665 /* 00666 * Update a destination in the given service 00667 */ 00668 static void __ip_vs_update_dest(struct ip_vs_service *svc, 00669 struct ip_vs_dest *dest, 00670 struct ip_vs_rule_user *ur) 00671 { 00672 int conn_flags; 00673 00674 /* 00675 * Set the weight and the flags 00676 */ 00677 atomic_set(&dest->weight, ur->weight); 00678 00679 conn_flags = ur->conn_flags | IP_VS_CONN_F_INACTIVE; 00680 00681 /* 00682 * Check if local node and update the flags 00683 */ 00684 if (inet_addr_type(ur->daddr) == RTN_LOCAL) { 00685 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) 00686 | IP_VS_CONN_F_LOCALNODE; 00687 } 00688 00689 /* 00690 * Set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading 00691 */ 00692 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { 00693 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 00694 } else { 00695 /* 00696 * Put the real service in ip_vs_rtable if not present. 00697 * For now only for NAT! 00698 */ 00699 write_lock_bh(&__ip_vs_rs_lock); 00700 ip_vs_rs_hash(dest); 00701 write_unlock_bh(&__ip_vs_rs_lock); 00702 } 00703 atomic_set(&dest->conn_flags, conn_flags); 00704 00705 /* bind the service */ 00706 if (!dest->svc) { 00707 __ip_vs_bind_svc(dest, svc); 00708 } else { 00709 if (dest->svc != svc) { 00710 __ip_vs_unbind_svc(dest); 00711 __ip_vs_zero_stats(&dest->stats); 00712 __ip_vs_bind_svc(dest, svc); 00713 } 00714 } 00715 00716 /* set the dest status flags */ 00717 dest->flags |= IP_VS_DEST_F_AVAILABLE; 00718 } 00719 00720 00721 /* 00722 * Create a destination for the given service 00723 */ 00724 static int 00725 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_rule_user *ur, 00726 struct ip_vs_dest **destp) 00727 { 00728 struct ip_vs_dest *dest; 00729 unsigned atype; 00730 00731 EnterFunction(2); 00732 00733 atype = inet_addr_type(ur->daddr); 00734 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 00735 return -EINVAL; 00736 00737 *destp = dest = (struct ip_vs_dest*) 00738 kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); 00739 if (dest == NULL) { 00740 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n"); 00741 return -ENOMEM; 00742 } 00743 memset(dest, 0, sizeof(struct ip_vs_dest)); 00744 00745 dest->protocol = svc->protocol; 00746 dest->vaddr = svc->addr; 00747 dest->vport = svc->port; 00748 dest->vfwmark = svc->fwmark; 00749 dest->addr = ur->daddr; 00750 dest->port = ur->dport; 00751 00752 atomic_set(&dest->activeconns, 0); 00753 atomic_set(&dest->inactconns, 0); 00754 atomic_set(&dest->refcnt, 0); 00755 00756 INIT_LIST_HEAD(&dest->d_list); 00757 dest->dst_lock = SPIN_LOCK_UNLOCKED; 00758 dest->stats.lock = SPIN_LOCK_UNLOCKED; 00759 __ip_vs_update_dest(svc, dest, ur); 00760 ip_vs_new_estimator(&dest->stats); 00761 00762 LeaveFunction(2); 00763 return 0; 00764 } 00765 00766 00767 /* 00768 * Add a destination into an existing service 00769 */ 00770 static int ip_vs_add_dest(struct ip_vs_service *svc, 00771 struct ip_vs_rule_user *ur) 00772 { 00773 struct ip_vs_dest *dest; 00774 __u32 daddr = ur->daddr; 00775 __u16 dport = ur->dport; 00776 int ret; 00777 00778 EnterFunction(2); 00779 00780 if (ur->weight < 0) { 00781 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n"); 00782 return -ERANGE; 00783 } 00784 00785 /* 00786 * Check if the dest already exists in the list 00787 */ 00788 dest = ip_vs_lookup_dest(svc, daddr, dport); 00789 if (dest != NULL) { 00790 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n"); 00791 return -EEXIST; 00792 } 00793 00794 /* 00795 * Check if the dest already exists in the trash and 00796 * is from the same service 00797 */ 00798 dest = ip_vs_trash_get_dest(svc, daddr, dport); 00799 if (dest != NULL) { 00800 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " 00801 "refcnt=%d, service %u/%u.%u.%u.%u:%u\n", 00802 NIPQUAD(daddr), ntohs(dport), 00803 atomic_read(&dest->refcnt), 00804 dest->vfwmark, 00805 NIPQUAD(dest->vaddr), 00806 ntohs(dest->vport)); 00807 __ip_vs_update_dest(svc, dest, ur); 00808 00809 /* 00810 * Get the destination from the trash 00811 */ 00812 list_del(&dest->n_list); 00813 00814 ip_vs_new_estimator(&dest->stats); 00815 00816 write_lock_bh(&__ip_vs_svc_lock); 00817 00818 /* 00819 * Wait until all other svc users go away. 00820 */ 00821 while (atomic_read(&svc->usecnt) > 1) {}; 00822 00823 list_add(&dest->n_list, &svc->destinations); 00824 svc->num_dests++; 00825 00826 /* call the update_service function of its scheduler */ 00827 svc->scheduler->update_service(svc); 00828 00829 write_unlock_bh(&__ip_vs_svc_lock); 00830 return 0; 00831 } 00832 00833 /* 00834 * Allocate and initialize the dest structure 00835 */ 00836 ret = ip_vs_new_dest(svc, ur, &dest); 00837 if (ret) { 00838 return ret; 00839 } 00840 00841 /* 00842 * Add the dest entry into the list 00843 */ 00844 atomic_inc(&dest->refcnt); 00845 00846 write_lock_bh(&__ip_vs_svc_lock); 00847 00848 /* 00849 * Wait until all other svc users go away. 00850 */ 00851 while (atomic_read(&svc->usecnt) > 1) {}; 00852 00853 list_add(&dest->n_list, &svc->destinations); 00854 svc->num_dests++; 00855 00856 /* call the update_service function of its scheduler */ 00857 svc->scheduler->update_service(svc); 00858 00859 write_unlock_bh(&__ip_vs_svc_lock); 00860 00861 LeaveFunction(2); 00862 00863 return 0; 00864 } 00865 00866 00867 /* 00868 * Edit a destination in the given service 00869 */ 00870 static int ip_vs_edit_dest(struct ip_vs_service *svc, 00871 struct ip_vs_rule_user *ur) 00872 { 00873 struct ip_vs_dest *dest; 00874 __u32 daddr = ur->daddr; 00875 __u16 dport = ur->dport; 00876 00877 EnterFunction(2); 00878 00879 if (ur->weight < 0) { 00880 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n"); 00881 return -ERANGE; 00882 } 00883 00884 /* 00885 * Lookup the destination list 00886 */ 00887 dest = ip_vs_lookup_dest(svc, daddr, dport); 00888 if (dest == NULL) { 00889 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n"); 00890 return -ENOENT; 00891 } 00892 00893 __ip_vs_update_dest(svc, dest, ur); 00894 00895 /* call the update_service, because server weight may be changed */ 00896 svc->scheduler->update_service(svc); 00897 00898 LeaveFunction(2); 00899 00900 return 0; 00901 } 00902 00903 00904 /* 00905 * Delete a destination (must be already unlinked from the service) 00906 */ 00907 static void __ip_vs_del_dest(struct ip_vs_dest *dest) 00908 { 00909 ip_vs_kill_estimator(&dest->stats); 00910 00911 /* 00912 * Remove it from the d-linked list with the real services. 00913 */ 00914 write_lock_bh(&__ip_vs_rs_lock); 00915 ip_vs_rs_unhash(dest); 00916 write_unlock_bh(&__ip_vs_rs_lock); 00917 00918 /* 00919 * Decrease the refcnt of the dest, and free the dest 00920 * if nobody refers to it (refcnt=0). Otherwise, throw 00921 * the destination into the trash. 00922 */ 00923 if (atomic_dec_and_test(&dest->refcnt)) { 00924 __ip_vs_dst_reset(dest); 00925 /* simply decrease svc->refcnt here, let the caller check 00926 and release the service if nobody refers to it. 00927 Only user context can release destination and service, 00928 and only one user context can update virtual service at a 00929 time, so the operation here is OK */ 00930 atomic_dec(&dest->svc->refcnt); 00931 kfree(dest); 00932 } else { 00933 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n", 00934 NIPQUAD(dest->addr), ntohs(dest->port), 00935 atomic_read(&dest->refcnt)); 00936 list_add(&dest->n_list, &ip_vs_dest_trash); 00937 atomic_inc(&dest->refcnt); 00938 } 00939 } 00940 00941 00942 /* 00943 * Unlink a destination from the given service 00944 */ 00945 static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 00946 struct ip_vs_dest *dest, 00947 int svcupd) 00948 { 00949 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 00950 00951 /* 00952 * Remove it from the d-linked destination list. 00953 */ 00954 list_del(&dest->n_list); 00955 svc->num_dests--; 00956 if (svcupd) { 00957 /* 00958 * Call the update_service function of its scheduler 00959 */ 00960 svc->scheduler->update_service(svc); 00961 } 00962 } 00963 00964 00965 /* 00966 * Delete a destination server in the given service 00967 */ 00968 static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_rule_user *ur) 00969 { 00970 struct ip_vs_dest *dest; 00971 __u32 daddr = ur->daddr; 00972 __u16 dport = ur->dport; 00973 00974 EnterFunction(2); 00975 00976 dest = ip_vs_lookup_dest(svc, daddr, dport); 00977 if (dest == NULL) { 00978 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n"); 00979 return -ENOENT; 00980 } 00981 00982 write_lock_bh(&__ip_vs_svc_lock); 00983 00984 /* 00985 * Wait until all other svc users go away. 00986 */ 00987 while (atomic_read(&svc->usecnt) > 1) {}; 00988 00989 /* 00990 * Unlink dest from the service 00991 */ 00992 __ip_vs_unlink_dest(svc, dest, 1); 00993 00994 write_unlock_bh(&__ip_vs_svc_lock); 00995 00996 /* 00997 * Delete the destination 00998 */ 00999 __ip_vs_del_dest(dest); 01000 01001 LeaveFunction(2); 01002 01003 return 0; 01004 } 01005 01006 01007 /* 01008 * Add a service into the service hash table 01009 */ 01010 static int 01011 ip_vs_add_service(struct ip_vs_rule_user *ur, struct ip_vs_service **svc_p) 01012 { 01013 int ret = 0; 01014 struct ip_vs_scheduler *sched; 01015 struct ip_vs_service *svc = NULL; 01016 01017 MOD_INC_USE_COUNT; 01018 01019 /* 01020 * Lookup the scheduler, by 'ur->sched_name' 01021 */ 01022 sched = ip_vs_scheduler_get(ur->sched_name); 01023 if (sched == NULL) { 01024 IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n", 01025 ur->sched_name); 01026 ret = -ENOENT; 01027 goto out_mod_dec; 01028 } 01029 01030 svc = (struct ip_vs_service*) 01031 kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); 01032 if (svc == NULL) { 01033 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n"); 01034 ret = -ENOMEM; 01035 goto out_err; 01036 } 01037 memset(svc, 0, sizeof(struct ip_vs_service)); 01038 01039 svc->protocol = ur->protocol; 01040 svc->addr = ur->vaddr; 01041 svc->port = ur->vport; 01042 svc->fwmark = ur->vfwmark; 01043 svc->flags = ur->vs_flags; 01044 svc->timeout = ur->timeout * HZ; 01045 svc->netmask = ur->netmask; 01046 01047 INIT_LIST_HEAD(&svc->destinations); 01048 svc->sched_lock = RW_LOCK_UNLOCKED; 01049 svc->stats.lock = SPIN_LOCK_UNLOCKED; 01050 01051 /* 01052 * Bind the scheduler 01053 */ 01054 ret = ip_vs_bind_scheduler(svc, sched); 01055 if (ret) { 01056 goto out_err; 01057 } 01058 01059 /* 01060 * Update the virtual service counters 01061 */ 01062 if (svc->port == FTPPORT) 01063 atomic_inc(&ip_vs_ftpsvc_counter); 01064 else if (svc->port == 0) 01065 atomic_inc(&ip_vs_nullsvc_counter); 01066 01067 /* 01068 * I'm the first user of the service 01069 */ 01070 atomic_set(&svc->usecnt, 1); 01071 atomic_set(&svc->refcnt, 0); 01072 01073 ip_vs_new_estimator(&svc->stats); 01074 ip_vs_num_services++; 01075 01076 /* 01077 * Hash the service into the service table 01078 */ 01079 write_lock_bh(&__ip_vs_svc_lock); 01080 ip_vs_svc_hash(svc); 01081 write_unlock_bh(&__ip_vs_svc_lock); 01082 01083 *svc_p = svc; 01084 return 0; 01085 01086 out_err: 01087 if (svc) 01088 kfree(svc); 01089 ip_vs_scheduler_put(sched); 01090 out_mod_dec: 01091 MOD_DEC_USE_COUNT; 01092 return ret; 01093 } 01094 01095 01096 /* 01097 * Edit a service and bind it with a new scheduler 01098 */ 01099 static int ip_vs_edit_service(struct ip_vs_service *svc, 01100 struct ip_vs_rule_user *ur) 01101 { 01102 struct ip_vs_scheduler *sched, *old_sched; 01103 int ret = 0; 01104 01105 /* 01106 * Lookup the scheduler, by 'ur->sched_name' 01107 */ 01108 sched = ip_vs_scheduler_get(ur->sched_name); 01109 if (sched == NULL) { 01110 IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n", 01111 ur->sched_name); 01112 return -ENOENT; 01113 } 01114 01115 write_lock_bh(&__ip_vs_svc_lock); 01116 01117 /* 01118 * Wait until all other svc users go away. 01119 */ 01120 while (atomic_read(&svc->usecnt) > 1) {}; 01121 01122 /* 01123 * Set the flags and timeout value 01124 */ 01125 svc->flags = ur->vs_flags | IP_VS_SVC_F_HASHED; 01126 svc->timeout = ur->timeout * HZ; 01127 svc->netmask = ur->netmask; 01128 01129 old_sched = svc->scheduler; 01130 if (sched != old_sched) { 01131 /* 01132 * Unbind the old scheduler 01133 */ 01134 if ((ret = ip_vs_unbind_scheduler(svc))) { 01135 old_sched = sched; 01136 goto out; 01137 } 01138 01139 /* 01140 * Bind the new scheduler 01141 */ 01142 if ((ret = ip_vs_bind_scheduler(svc, sched))) { 01143 /* 01144 * If ip_vs_bind_scheduler fails, restore the old 01145 * scheduler. 01146 * The main reason of failure is out of memory. 01147 * 01148 * The question is if the old scheduler can be 01149 * restored all the time. TODO: if it cannot be 01150 * restored some time, we must delete the service, 01151 * otherwise the system may crash. 01152 */ 01153 ip_vs_bind_scheduler(svc, old_sched); 01154 old_sched = sched; 01155 } 01156 } 01157 01158 out: 01159 write_unlock_bh(&__ip_vs_svc_lock); 01160 01161 if (old_sched) 01162 ip_vs_scheduler_put(old_sched); 01163 01164 return ret; 01165 } 01166 01167 01168 /* 01169 * Delete a service from the service list 01170 * The service must be unlinked, unlocked and not referenced! 01171 */ 01172 static void __ip_vs_del_service(struct ip_vs_service *svc) 01173 { 01174 struct list_head *l; 01175 struct ip_vs_dest *dest; 01176 struct ip_vs_scheduler *old_sched; 01177 01178 ip_vs_num_services--; 01179 ip_vs_kill_estimator(&svc->stats); 01180 01181 /* 01182 * Unbind scheduler 01183 */ 01184 old_sched = svc->scheduler; 01185 ip_vs_unbind_scheduler(svc); 01186 if (old_sched && old_sched->module) 01187 __MOD_DEC_USE_COUNT(old_sched->module); 01188 01189 /* 01190 * Unlink the whole destination list 01191 */ 01192 l = &svc->destinations; 01193 while (l->next != l) { 01194 dest = list_entry(l->next, struct ip_vs_dest, n_list); 01195 __ip_vs_unlink_dest(svc, dest, 0); 01196 __ip_vs_del_dest(dest); 01197 } 01198 01199 /* 01200 * Update the virtual service counters 01201 */ 01202 if (svc->port == FTPPORT) 01203 atomic_dec(&ip_vs_ftpsvc_counter); 01204 else if (svc->port == 0) 01205 atomic_dec(&ip_vs_nullsvc_counter); 01206 01207 /* 01208 * Free the service if nobody refers to it 01209 */ 01210 if (atomic_read(&svc->refcnt) == 0) 01211 kfree(svc); 01212 MOD_DEC_USE_COUNT; 01213 } 01214 01215 /* 01216 * Delete a service from the service list 01217 */ 01218 static int ip_vs_del_service(struct ip_vs_service *svc) 01219 { 01220 if (svc == NULL) 01221 return -EEXIST; 01222 01223 /* 01224 * Unhash it from the service table 01225 */ 01226 write_lock_bh(&__ip_vs_svc_lock); 01227 01228 ip_vs_svc_unhash(svc); 01229 01230 /* 01231 * Wait until all the svc users go away. 01232 */ 01233 while (atomic_read(&svc->usecnt) > 1) {}; 01234 01235 __ip_vs_del_service(svc); 01236 01237 write_unlock_bh(&__ip_vs_svc_lock); 01238 01239 return 0; 01240 } 01241 01242 01243 /* 01244 * Flush all the virtual services 01245 */ 01246 static int ip_vs_flush(void) 01247 { 01248 int idx; 01249 struct ip_vs_service *svc; 01250 struct list_head *l; 01251 01252 /* 01253 * Flush the service table hashed by <protocol,addr,port> 01254 */ 01255 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 01256 l = &ip_vs_svc_table[idx]; 01257 while (l->next != l) { 01258 svc = list_entry(l->next,struct ip_vs_service,s_list); 01259 write_lock_bh(&__ip_vs_svc_lock); 01260 ip_vs_svc_unhash(svc); 01261 /* 01262 * Wait until all the svc users go away. 01263 */ 01264 while (atomic_read(&svc->usecnt) > 0) {}; 01265 __ip_vs_del_service(svc); 01266 write_unlock_bh(&__ip_vs_svc_lock); 01267 } 01268 } 01269 01270 /* 01271 * Flush the service table hashed by fwmark 01272 */ 01273 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 01274 l = &ip_vs_svc_fwm_table[idx]; 01275 while (l->next != l) { 01276 svc = list_entry(l->next,struct ip_vs_service,f_list); 01277 write_lock_bh(&__ip_vs_svc_lock); 01278 ip_vs_svc_unhash(svc); 01279 /* 01280 * Wait until all the svc users go away. 01281 */ 01282 while (atomic_read(&svc->usecnt) > 0) {}; 01283 __ip_vs_del_service(svc); 01284 write_unlock_bh(&__ip_vs_svc_lock); 01285 } 01286 } 01287 01288 return 0; 01289 } 01290 01291 01292 /* 01293 * Zero counters in a service or all services 01294 */ 01295 static int ip_vs_zero_service(struct ip_vs_service *svc) 01296 { 01297 struct list_head *l; 01298 struct ip_vs_dest *dest; 01299 01300 write_lock_bh(&__ip_vs_svc_lock); 01301 list_for_each (l, &svc->destinations) { 01302 dest = list_entry(l, struct ip_vs_dest, n_list); 01303 __ip_vs_zero_stats(&dest->stats); 01304 } 01305 __ip_vs_zero_stats(&svc->stats); 01306 write_unlock_bh(&__ip_vs_svc_lock); 01307 return 0; 01308 } 01309 01310 static int ip_vs_zero_all(void) 01311 { 01312 int idx; 01313 struct list_head *l; 01314 struct ip_vs_service *svc; 01315 01316 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 01317 list_for_each (l, &ip_vs_svc_table[idx]) { 01318 svc = list_entry(l, struct ip_vs_service, s_list); 01319 ip_vs_zero_service(svc); 01320 } 01321 } 01322 01323 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 01324 list_for_each (l, &ip_vs_svc_fwm_table[idx]) { 01325 svc = list_entry(l, struct ip_vs_service, f_list); 01326 ip_vs_zero_service(svc); 01327 } 01328 } 01329 01330 __ip_vs_zero_stats(&ip_vs_stats); 01331 return 0; 01332 } 01333 01334 01335 static int ip_vs_sysctl_defense_mode(ctl_table *ctl, int write, 01336 struct file * filp, void *buffer, size_t *lenp) 01337 { 01338 int *valp = ctl->data; 01339 int val = *valp; 01340 int ret; 01341 01342 ret = proc_dointvec(ctl, write, filp, buffer, lenp); 01343 if (write && (*valp != val)) { 01344 if ((*valp < 0) || (*valp > 3)) { 01345 /* Restore the correct value */ 01346 *valp = val; 01347 } else { 01348 local_bh_disable(); 01349 update_defense_level(); 01350 local_bh_enable(); 01351 } 01352 } 01353 return ret; 01354 } 01355 01356 01357 /* 01358 * IPVS sysctl table 01359 */ 01360 struct ip_vs_sysctl_table { 01361 struct ctl_table_header *sysctl_header; 01362 ctl_table vs_vars[NET_IPV4_VS_LAST]; 01363 ctl_table vs_dir[2]; 01364 ctl_table ipv4_dir[2]; 01365 ctl_table root_dir[2]; 01366 }; 01367 01368 01369 static struct ip_vs_sysctl_table ipv4_vs_table = { 01370 NULL, 01371 {{NET_IPV4_VS_AMEMTHRESH, "amemthresh", 01372 &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL, 01373 &proc_dointvec}, 01374 #ifdef CONFIG_IP_VS_DEBUG 01375 {NET_IPV4_VS_DEBUG_LEVEL, "debug_level", 01376 &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL, 01377 &proc_dointvec}, 01378 #endif 01379 {NET_IPV4_VS_AMDROPRATE, "am_droprate", 01380 &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL, 01381 &proc_dointvec}, 01382 {NET_IPV4_VS_DROP_ENTRY, "drop_entry", 01383 &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL, 01384 &ip_vs_sysctl_defense_mode}, 01385 {NET_IPV4_VS_DROP_PACKET, "drop_packet", 01386 &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL, 01387 &ip_vs_sysctl_defense_mode}, 01388 {NET_IPV4_VS_SECURE_TCP, "secure_tcp", 01389 &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL, 01390 &ip_vs_sysctl_defense_mode}, 01391 {NET_IPV4_VS_TO_ES, "timeout_established", 01392 &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], 01393 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01394 {NET_IPV4_VS_TO_SS, "timeout_synsent", 01395 &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], 01396 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01397 {NET_IPV4_VS_TO_SR, "timeout_synrecv", 01398 &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], 01399 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01400 {NET_IPV4_VS_TO_FW, "timeout_finwait", 01401 &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], 01402 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01403 {NET_IPV4_VS_TO_TW, "timeout_timewait", 01404 &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], 01405 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01406 {NET_IPV4_VS_TO_CL, "timeout_close", 01407 &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], 01408 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01409 {NET_IPV4_VS_TO_CW, "timeout_closewait", 01410 &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], 01411 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01412 {NET_IPV4_VS_TO_LA, "timeout_lastack", 01413 &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], 01414 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01415 {NET_IPV4_VS_TO_LI, "timeout_listen", 01416 &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], 01417 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01418 {NET_IPV4_VS_TO_SA, "timeout_synack", 01419 &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], 01420 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01421 {NET_IPV4_VS_TO_UDP, "timeout_udp", 01422 &vs_timeout_table_dos.timeout[IP_VS_S_UDP], 01423 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01424 {NET_IPV4_VS_TO_ICMP, "timeout_icmp", 01425 &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], 01426 sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, 01427 {NET_IPV4_VS_CACHE_BYPASS, "cache_bypass", 01428 &sysctl_ip_vs_cache_bypass, sizeof(int), 0644, NULL, 01429 &proc_dointvec}, 01430 {NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn", 01431 &sysctl_ip_vs_expire_nodest_conn, sizeof(int), 0644, NULL, 01432 &proc_dointvec}, 01433 {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold", 01434 &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL, 01435 &proc_dointvec}, 01436 {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send", 01437 &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL, 01438 &proc_dointvec}, 01439 {0}}, 01440 {{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars}, 01441 {0}}, 01442 {{NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_vs_table.vs_dir}, 01443 {0}}, 01444 {{CTL_NET, "net", NULL, 0, 0555, ipv4_vs_table.ipv4_dir}, 01445 {0}} 01446 }; 01447 01448 01449 /* 01450 * Write the contents of the VS rule table to a PROCfs file. 01451 * (It is kept just for backward compatibility) 01452 */ 01453 static inline char *ip_vs_fwd_name(unsigned flags) 01454 { 01455 char *fwd; 01456 01457 switch (flags & IP_VS_CONN_F_FWD_MASK) { 01458 case IP_VS_CONN_F_LOCALNODE: 01459 fwd = "Local"; 01460 break; 01461 case IP_VS_CONN_F_TUNNEL: 01462 fwd = "Tunnel"; 01463 break; 01464 case IP_VS_CONN_F_DROUTE: 01465 fwd = "Route"; 01466 break; 01467 default: 01468 fwd = "Masq"; 01469 } 01470 return fwd; 01471 } 01472 01473 static int ip_vs_get_info(char *buf, char **start, off_t offset, int length) 01474 { 01475 int len=0; 01476 off_t pos=0; 01477 char temp[64], temp2[32]; 01478 int idx; 01479 struct ip_vs_service *svc; 01480 struct ip_vs_dest *dest; 01481 struct list_head *l, *e, *p, *q; 01482 01483 /* 01484 * Note: since the length of the buffer is usually the multiple 01485 * of 512, it is good to use fixed record of the divisor of 512, 01486 * so that records won't be truncated at buffer boundary. 01487 */ 01488 pos = 192; 01489 if (pos > offset) { 01490 sprintf(temp, 01491 "IP Virtual Server version %d.%d.%d (size=%d)", 01492 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE); 01493 len += sprintf(buf+len, "%-63s\n", temp); 01494 len += sprintf(buf+len, "%-63s\n", 01495 "Prot LocalAddress:Port Scheduler Flags"); 01496 len += sprintf(buf+len, "%-63s\n", 01497 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn"); 01498 } 01499 01500 read_lock_bh(&__ip_vs_svc_lock); 01501 01502 /* print the service table hashed by <protocol,addr,port> */ 01503 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 01504 l = &ip_vs_svc_table[idx]; 01505 for (e=l->next; e!=l; e=e->next) { 01506 svc = list_entry(e, struct ip_vs_service, s_list); 01507 pos += 64; 01508 if (pos > offset) { 01509 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 01510 sprintf(temp2, "persistent %d %08X", 01511 svc->timeout, 01512 ntohl(svc->netmask)); 01513 else 01514 temp2[0] = '\0'; 01515 01516 sprintf(temp, "%s %08X:%04X %s %s", 01517 ip_vs_proto_name(svc->protocol), 01518 ntohl(svc->addr), 01519 ntohs(svc->port), 01520 svc->scheduler->name, temp2); 01521 len += sprintf(buf+len, "%-63s\n", temp); 01522 if (len >= length) 01523 goto done; 01524 } 01525 01526 p = &svc->destinations; 01527 for (q=p->next; q!=p; q=q->next) { 01528 dest = list_entry(q, struct ip_vs_dest, n_list); 01529 pos += 64; 01530 if (pos <= offset) 01531 continue; 01532 sprintf(temp, 01533 " -> %08X:%04X %-7s %-6d %-10d %-10d", 01534 ntohl(dest->addr), 01535 ntohs(dest->port), 01536 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 01537 atomic_read(&dest->weight), 01538 atomic_read(&dest->activeconns), 01539 atomic_read(&dest->inactconns)); 01540 len += sprintf(buf+len, "%-63s\n", temp); 01541 if (len >= length) 01542 goto done; 01543 } 01544 } 01545 } 01546 01547 /* print the service table hashed by fwmark */ 01548 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 01549 l = &ip_vs_svc_fwm_table[idx]; 01550 for (e=l->next; e!=l; e=e->next) { 01551 svc = list_entry(e, struct ip_vs_service, f_list); 01552 pos += 64; 01553 if (pos > offset) { 01554 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 01555 sprintf(temp2, "persistent %d %08X", 01556 svc->timeout, 01557 ntohl(svc->netmask)); 01558 else 01559 temp2[0] = '\0'; 01560 01561 sprintf(temp, "FWM %08X %s %s", 01562 svc->fwmark, 01563 svc->scheduler->name, temp2); 01564 len += sprintf(buf+len, "%-63s\n", temp); 01565 if (len >= length) 01566 goto done; 01567 } 01568 01569 p = &svc->destinations; 01570 for (q=p->next; q!=p; q=q->next) { 01571 dest = list_entry(q, struct ip_vs_dest, n_list); 01572 pos += 64; 01573 if (pos <= offset) 01574 continue; 01575 sprintf(temp, 01576 " -> %08X:%04X %-7s %-6d %-10d %-10d", 01577 ntohl(dest->addr), 01578 ntohs(dest->port), 01579 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 01580 atomic_read(&dest->weight), 01581 atomic_read(&dest->activeconns), 01582 atomic_read(&dest->inactconns)); 01583 len += sprintf(buf+len, "%-63s\n", temp); 01584 if (len >= length) 01585 goto done; 01586 } 01587 } 01588 } 01589 01590 done: 01591 read_unlock_bh(&__ip_vs_svc_lock); 01592 01593 *start = buf+len-(pos-offset); /* Start of wanted data */ 01594 len = pos-offset; 01595 if (len > length) 01596 len = length; 01597 if (len < 0) 01598 len = 0; 01599 return len; 01600 } 01601 01602 01603 struct ip_vs_stats ip_vs_stats; 01604 01605 static int 01606 ip_vs_stats_get_info(char *buf, char **start, off_t offset, int length) 01607 { 01608 int len=0; 01609 off_t pos=0; 01610 char temp[64]; 01611 01612 pos += 320; 01613 if (pos > offset) { 01614 len += sprintf(buf+len, "%-63s\n%-63s\n", 01615 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 01616 " Total Incoming Outgoing Incoming Outgoing", 01617 " Conns Packets Packets Bytes Bytes"); 01618 01619 spin_lock_bh(&ip_vs_stats.lock); 01620 sprintf(temp, "%8X %8X %8X %8X%08X %8X%08X", 01621 ip_vs_stats.conns, 01622 ip_vs_stats.inpkts, 01623 ip_vs_stats.outpkts, 01624 (__u32)(ip_vs_stats.inbytes>>32), 01625 (__u32)ip_vs_stats.inbytes, 01626 (__u32)(ip_vs_stats.outbytes>>32), 01627 (__u32)ip_vs_stats.outbytes); 01628 len += sprintf(buf+len, "%-62s\n\n", temp); 01629 01630 len += sprintf(buf+len, "%-63s\n", 01631 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 01632 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s"); 01633 sprintf(temp, "%8X %8X %8X %16X %16X", 01634 ip_vs_stats.cps, 01635 ip_vs_stats.inpps, 01636 ip_vs_stats.outpps, 01637 ip_vs_stats.inbps, 01638 ip_vs_stats.outbps); 01639 len += sprintf(buf+len, "%-63s\n", temp); 01640 01641 spin_unlock_bh(&ip_vs_stats.lock); 01642 } 01643 01644 *start = buf+len-(pos-offset); /* Start of wanted data */ 01645 len = pos-offset; 01646 if (len > length) 01647 len = length; 01648 if (len < 0) 01649 len = 0; 01650 return len; 01651 } 01652 01653 01654 /* 01655 * Set timeout values for tcp tcpfin udp in the vs_timeout_table. 01656 */ 01657 static int ip_vs_set_timeouts(struct ip_vs_rule_user *u) 01658 { 01659 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 01660 u->tcp_timeout, 01661 u->tcp_fin_timeout, 01662 u->udp_timeout); 01663 01664 if (u->tcp_timeout) { 01665 vs_timeout_table.timeout[IP_VS_S_ESTABLISHED] 01666 = u->tcp_timeout * HZ; 01667 } 01668 01669 if (u->tcp_fin_timeout) { 01670 vs_timeout_table.timeout[IP_VS_S_FIN_WAIT] 01671 = u->tcp_fin_timeout * HZ; 01672 } 01673 01674 if (u->udp_timeout) { 01675 vs_timeout_table.timeout[IP_VS_S_UDP] 01676 = u->udp_timeout * HZ; 01677 } 01678 return 0; 01679 } 01680 01681 01682 static int 01683 do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len) 01684 { 01685 int ret; 01686 struct ip_vs_rule_user *urule; 01687 struct ip_vs_service *svc = NULL; 01688 01689 if (!capable(CAP_NET_ADMIN)) 01690 return -EPERM; 01691 01692 /* 01693 * Check the size of mm, no overflow... 01694 * len > 128000 is a sanity check. 01695 */ 01696 if (len < sizeof(struct ip_vs_rule_user)) { 01697 IP_VS_ERR("set_ctl: len %u < %u\n", 01698 len, sizeof(struct ip_vs_rule_user)); 01699 return -EINVAL; 01700 } else if (len > 128000) { 01701 IP_VS_ERR("set_ctl: len %u > 128000\n", len); 01702 return -EINVAL; 01703 } else if ((urule = kmalloc(len, GFP_KERNEL)) == NULL) { 01704 IP_VS_ERR("set_ctl: no mem for len %u\n", len); 01705 return -ENOMEM; 01706 } else if (copy_from_user(urule, user, len) != 0) { 01707 ret = -EFAULT; 01708 goto out_free; 01709 } 01710 01711 MOD_INC_USE_COUNT; 01712 if (down_interruptible(&__ip_vs_mutex)) { 01713 ret = -ERESTARTSYS; 01714 goto out_dec; 01715 } 01716 01717 if (cmd == IP_VS_SO_SET_FLUSH) { 01718 /* Flush the virtual service */ 01719 ret = ip_vs_flush(); 01720 goto out_unlock; 01721 } else if (cmd == IP_VS_SO_SET_TIMEOUTS) { 01722 /* Set timeout values for (tcp tcpfin udp) */ 01723 ret = ip_vs_set_timeouts(urule); 01724 goto out_unlock; 01725 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { 01726 ret = start_sync_thread(urule->state, urule->mcast_ifn); 01727 goto out_unlock; 01728 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { 01729 ret = stop_sync_thread(); 01730 goto out_unlock; 01731 } else if (cmd == IP_VS_SO_SET_ZERO) { 01732 /* if no service address is set, zero counters in all */ 01733 if (!urule->vfwmark && !urule->vaddr && !urule->vport) { 01734 ret = ip_vs_zero_all(); 01735 goto out_unlock; 01736 } 01737 } 01738 01739 /* 01740 * Check for valid protocol: TCP or UDP. Even for fwmark!=0 01741 */ 01742 if (urule->protocol!=IPPROTO_TCP && urule->protocol!=IPPROTO_UDP) { 01743 IP_VS_INFO("vs_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s", 01744 ntohs(urule->protocol), NIPQUAD(urule->vaddr), 01745 ntohs(urule->vport), urule->sched_name); 01746 ret = -EFAULT; 01747 goto out_unlock; 01748 } 01749 01750 /* 01751 * Lookup the exact service by <protocol, vaddr, vport> or fwmark 01752 */ 01753 if (urule->vfwmark == 0) 01754 svc = __ip_vs_service_get(urule->protocol, 01755 urule->vaddr, urule->vport); 01756 else 01757 svc = __ip_vs_svc_fwm_get(urule->vfwmark); 01758 01759 if (cmd != IP_VS_SO_SET_ADD 01760 && (svc == NULL || svc->protocol != urule->protocol)) { 01761 ret = -ESRCH; 01762 goto out_unlock; 01763 } 01764 01765 switch (cmd) { 01766 case IP_VS_SO_SET_ADD: 01767 if (svc != NULL) 01768 ret = -EEXIST; 01769 else 01770 ret = ip_vs_add_service(urule, &svc); 01771 break; 01772 case IP_VS_SO_SET_EDIT: 01773 ret = ip_vs_edit_service(svc, urule); 01774 break; 01775 case IP_VS_SO_SET_DEL: 01776 ret = ip_vs_del_service(svc); 01777 if (!ret) 01778 goto out_unlock; 01779 break; 01780 case IP_VS_SO_SET_ADDDEST: 01781 ret = ip_vs_add_dest(svc, urule); 01782 break; 01783 case IP_VS_SO_SET_EDITDEST: 01784 ret = ip_vs_edit_dest(svc, urule); 01785 break; 01786 case IP_VS_SO_SET_DELDEST: 01787 ret = ip_vs_del_dest(svc, urule); 01788 break; 01789 case IP_VS_SO_SET_ZERO: 01790 ret = ip_vs_zero_service(svc); 01791 break; 01792 default: 01793 ret = -EINVAL; 01794 } 01795 01796 if (svc) 01797 ip_vs_service_put(svc); 01798 01799 out_unlock: 01800 up(&__ip_vs_mutex); 01801 out_dec: 01802 MOD_DEC_USE_COUNT; 01803 out_free: 01804 kfree(urule); 01805 return ret; 01806 } 01807 01808 01809 static inline void 01810 __ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) 01811 { 01812 spin_lock_bh(&src->lock); 01813 memcpy(dst, src, (char*)&src->lock - (char*)src); 01814 spin_unlock_bh(&src->lock); 01815 } 01816 01817 static inline int 01818 __ip_vs_get_service_entries(const struct ip_vs_get_services *get, 01819 struct ip_vs_get_services *uptr) 01820 { 01821 int idx, count=0; 01822 struct ip_vs_service *svc; 01823 struct list_head *l; 01824 struct ip_vs_service_user entry; 01825 int ret = 0; 01826 01827 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 01828 list_for_each (l, &ip_vs_svc_table[idx]) { 01829 if (count >= get->num_services) 01830 goto out; 01831 svc = list_entry(l, struct ip_vs_service, s_list); 01832 entry.protocol = svc->protocol; 01833 entry.addr = svc->addr; 01834 entry.port = svc->port; 01835 entry.fwmark = svc->fwmark; 01836 strcpy(entry.sched_name, svc->scheduler->name); 01837 entry.flags = svc->flags; 01838 entry.timeout = svc->timeout / HZ; 01839 entry.netmask = svc->netmask; 01840 entry.num_dests = svc->num_dests; 01841 __ip_vs_copy_stats(&entry.stats, &svc->stats); 01842 if (copy_to_user(&uptr->entrytable[count], 01843 &entry, sizeof(entry))) { 01844 ret = -EFAULT; 01845 goto out; 01846 } 01847 count++; 01848 } 01849 } 01850 01851 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 01852 list_for_each (l, &ip_vs_svc_fwm_table[idx]) { 01853 if (count >= get->num_services) 01854 goto out; 01855 svc = list_entry(l, struct ip_vs_service, f_list); 01856 entry.protocol = svc->protocol; 01857 entry.addr = svc->addr; 01858 entry.port = svc->port; 01859 entry.fwmark = svc->fwmark; 01860 strcpy(entry.sched_name, svc->scheduler->name); 01861 entry.flags = svc->flags; 01862 entry.timeout = svc->timeout / HZ; 01863 entry.netmask = svc->netmask; 01864 entry.num_dests = svc->num_dests; 01865 __ip_vs_copy_stats(&entry.stats, &svc->stats); 01866 if (copy_to_user(&uptr->entrytable[count], 01867 &entry, sizeof(entry))) { 01868 ret = -EFAULT; 01869 goto out; 01870 } 01871 count++; 01872 } 01873 } 01874 out: 01875 return ret; 01876 } 01877 01878 static inline int 01879 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, 01880 struct ip_vs_get_dests *uptr) 01881 { 01882 struct ip_vs_service *svc; 01883 int ret = 0; 01884 01885 if (get->fwmark) 01886 svc = __ip_vs_svc_fwm_get(get->fwmark); 01887 else 01888 svc = __ip_vs_service_get(get->protocol, 01889 get->addr, get->port); 01890 if (svc) { 01891 int count = 0; 01892 struct ip_vs_dest *dest; 01893 struct list_head *l, *e; 01894 struct ip_vs_dest_user entry; 01895 01896 l = &svc->destinations; 01897 for (e=l->next; e!=l; e=e->next) { 01898 if (count >= get->num_dests) 01899 break; 01900 dest = list_entry(e, struct ip_vs_dest, n_list); 01901 entry.addr = dest->addr; 01902 entry.port = dest->port; 01903 entry.flags = atomic_read(&dest->conn_flags); 01904 entry.weight = atomic_read(&dest->weight); 01905 entry.activeconns = atomic_read(&dest->activeconns); 01906 entry.inactconns = atomic_read(&dest->inactconns); 01907 __ip_vs_copy_stats(&entry.stats, &dest->stats); 01908 if (copy_to_user(&uptr->entrytable[count], 01909 &entry, sizeof(entry))) { 01910 ret = -EFAULT; 01911 break; 01912 } 01913 count++; 01914 } 01915 ip_vs_service_put(svc); 01916 } else 01917 ret = -ESRCH; 01918 return ret; 01919 } 01920 01921 static inline void 01922 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u) 01923 { 01924 u->tcp_timeout = vs_timeout_table.timeout[IP_VS_S_ESTABLISHED] / HZ; 01925 u->tcp_fin_timeout = vs_timeout_table.timeout[IP_VS_S_FIN_WAIT] / HZ; 01926 u->udp_timeout = vs_timeout_table.timeout[IP_VS_S_UDP] / HZ; 01927 } 01928 01929 static int 01930 do_ip_vs_get_ctl(struct sock *sk, int cmd, void *user, int *len) 01931 { 01932 int ret = 0; 01933 01934 if (!capable(CAP_NET_ADMIN)) 01935 return -EPERM; 01936 01937 if (down_interruptible(&__ip_vs_mutex)) 01938 return -ERESTARTSYS; 01939 01940 switch (cmd) { 01941 case IP_VS_SO_GET_VERSION: 01942 { 01943 char buf[64]; 01944 01945 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 01946 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE); 01947 if (*len < strlen(buf)+1) { 01948 ret = -EINVAL; 01949 goto out; 01950 } 01951 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 01952 ret = -EFAULT; 01953 goto out; 01954 } 01955 *len = strlen(buf)+1; 01956 } 01957 break; 01958 01959 case IP_VS_SO_GET_INFO: 01960 { 01961 struct ip_vs_getinfo info; 01962 info.version = IP_VS_VERSION_CODE; 01963 info.size = IP_VS_CONN_TAB_SIZE; 01964 info.num_services = ip_vs_num_services; 01965 if (copy_to_user(user, &info, sizeof(info)) != 0) 01966 ret = -EFAULT; 01967 } 01968 break; 01969 01970 case IP_VS_SO_GET_SERVICES: 01971 { 01972 struct ip_vs_get_services get; 01973 01974 if (*len < sizeof(get)) { 01975 IP_VS_ERR("length: %u < %u\n", *len, sizeof(get)); 01976 ret = -EINVAL; 01977 goto out; 01978 } 01979 if (copy_from_user(&get, user, sizeof(get))) { 01980 ret = -EFAULT; 01981 goto out; 01982 } 01983 if (*len != (sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services)) { 01984 IP_VS_ERR("length: %u != %u\n", *len, 01985 sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services); 01986 ret = -EINVAL; 01987 goto out; 01988 } 01989 ret = __ip_vs_get_service_entries(&get, user); 01990 } 01991 break; 01992 01993 case IP_VS_SO_GET_SERVICE: 01994 { 01995 struct ip_vs_service_user get; 01996 struct ip_vs_service *svc; 01997 01998 if (*len != sizeof(get)) { 01999 IP_VS_ERR("length: %u != %u\n", *len, sizeof(get)); 02000 ret = -EINVAL; 02001 goto out; 02002 } 02003 if (copy_from_user(&get, user, sizeof(get))) { 02004 ret = -EFAULT; 02005 goto out; 02006 } 02007 02008 if (get.fwmark) 02009 svc = __ip_vs_svc_fwm_get(get.fwmark); 02010 else 02011 svc = __ip_vs_service_get(get.protocol, 02012 get.addr, get.port); 02013 if (svc) { 02014 strcpy(get.sched_name, svc->scheduler->name); 02015 get.flags = svc->flags; 02016 get.timeout = svc->timeout / HZ; 02017 get.netmask = svc->netmask; 02018 get.num_dests = svc->num_dests; 02019 __ip_vs_copy_stats(&get.stats, &svc->stats); 02020 if (copy_to_user(user, &get, *len) != 0) 02021 ret = -EFAULT; 02022 ip_vs_service_put(svc); 02023 } else 02024 ret = -ESRCH; 02025 } 02026 break; 02027 02028 case IP_VS_SO_GET_DESTS: 02029 { 02030 struct ip_vs_get_dests get; 02031 02032 if (*len < sizeof(get)) { 02033 IP_VS_ERR("length: %u < %u\n", *len, sizeof(get)); 02034 ret = -EINVAL; 02035 goto out; 02036 } 02037 if (copy_from_user(&get, user, sizeof(get))) { 02038 ret = -EFAULT; 02039 goto out; 02040 } 02041 if (*len != (sizeof(get) + 02042 sizeof(struct ip_vs_dest_user)*get.num_dests)) { 02043 IP_VS_ERR("length: %u != %u\n", *len, 02044 sizeof(get)+sizeof(struct ip_vs_dest_user)*get.num_dests); 02045 ret = -EINVAL; 02046 goto out; 02047 } 02048 ret = __ip_vs_get_dest_entries(&get, user); 02049 } 02050 break; 02051 02052 case IP_VS_SO_GET_TIMEOUTS: 02053 { 02054 struct ip_vs_timeout_user u; 02055 02056 if (*len < sizeof(u)) { 02057 IP_VS_ERR("length: %u < %u\n", *len, sizeof(u)); 02058 ret = -EINVAL; 02059 goto out; 02060 } 02061 __ip_vs_get_timeouts(&u); 02062 if (copy_to_user(user, &u, sizeof(u)) != 0) 02063 ret = -EFAULT; 02064 } 02065 break; 02066 02067 case IP_VS_SO_GET_DAEMON: 02068 { 02069 struct ip_vs_daemon_user u; 02070 02071 if (*len < sizeof(u)) { 02072 IP_VS_ERR("length: %u < %u\n", *len, sizeof(u)); 02073 ret = -EINVAL; 02074 goto out; 02075 } 02076 u.state = ip_vs_sync_state; 02077 strcpy(u.mcast_ifn, ip_vs_mcast_ifn); 02078 if (copy_to_user(user, &u, sizeof(u)) != 0) 02079 ret = -EFAULT; 02080 } 02081 break; 02082 02083 default: 02084 ret = -EINVAL; 02085 } 02086 02087 out: 02088 up(&__ip_vs_mutex); 02089 return ret; 02090 } 02091 02092 02093 static struct nf_sockopt_ops ip_vs_sockopts = { 02094 { NULL, NULL }, PF_INET, 02095 IP_VS_BASE_CTL, IP_VS_SO_SET_MAX+1, do_ip_vs_set_ctl, 02096 IP_VS_BASE_CTL, IP_VS_SO_GET_MAX+1, do_ip_vs_get_ctl 02097 }; 02098 02099 02100 int ip_vs_control_init(void) 02101 { 02102 int ret; 02103 int idx; 02104 02105 EnterFunction(2); 02106 02107 ret = nf_register_sockopt(&ip_vs_sockopts); 02108 if (ret) { 02109 IP_VS_ERR("cannot register sockopt.\n"); 02110 return ret; 02111 } 02112 02113 proc_net_create("ip_vs", 0, ip_vs_get_info); 02114 proc_net_create("ip_vs_stats", 0, ip_vs_stats_get_info); 02115 02116 ipv4_vs_table.sysctl_header = 02117 register_sysctl_table(ipv4_vs_table.root_dir, 0); 02118 /* 02119 * Initilize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable, 02120 * ip_vs_schedulers. 02121 */ 02122 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 02123 INIT_LIST_HEAD(&ip_vs_svc_table[idx]); 02124 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); 02125 } 02126 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { 02127 INIT_LIST_HEAD(&ip_vs_rtable[idx]); 02128 } 02129 02130 memset(&ip_vs_stats, 0, sizeof(ip_vs_stats)); 02131 ip_vs_stats.lock = SPIN_LOCK_UNLOCKED; 02132 ip_vs_new_estimator(&ip_vs_stats); 02133 02134 /* Hook the defense timer */ 02135 init_timer(&defense_timer); 02136 defense_timer.function = defense_timer_handler; 02137 defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD; 02138 add_timer(&defense_timer); 02139 02140 LeaveFunction(2); 02141 return 0; 02142 } 02143 02144 void ip_vs_control_cleanup(void) 02145 { 02146 EnterFunction(2); 02147 ip_vs_trash_cleanup(); 02148 del_timer_sync(&defense_timer); 02149 ip_vs_kill_estimator(&ip_vs_stats); 02150 unregister_sysctl_table(ipv4_vs_table.sysctl_header); 02151 proc_net_remove("ip_vs_stats"); 02152 proc_net_remove("ip_vs"); 02153 nf_unregister_sockopt(&ip_vs_sockopts); 02154 LeaveFunction(2); 02155 }

Generated on Wed Dec 1 21:25:31 2004 for Linux 2.4.23 Networking by doxygen 1.3.8