00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
#include <linux/config.h>
00028
#include <linux/module.h>
00029
#include <linux/types.h>
00030
#include <linux/kernel.h>
00031
#include <linux/errno.h>
00032
#include <linux/ip.h>
00033
#include <linux/tcp.h>
00034
#include <linux/icmp.h>
00035
00036
#include <net/ip.h>
00037
#include <net/tcp.h>
00038
#include <net/udp.h>
00039
#include <net/icmp.h>
00040
#include <net/route.h>
00041
00042
#include <linux/netfilter.h>
00043
#include <linux/netfilter_ipv4.h>
00044
00045
#include <net/ip_vs.h>
00046
00047
00048
EXPORT_SYMBOL(register_ip_vs_scheduler);
00049
EXPORT_SYMBOL(unregister_ip_vs_scheduler);
00050
EXPORT_SYMBOL(ip_vs_skb_replace);
00051
EXPORT_SYMBOL(ip_vs_proto_name);
00052
EXPORT_SYMBOL(ip_vs_conn_new);
00053
EXPORT_SYMBOL(ip_vs_conn_in_get);
00054
EXPORT_SYMBOL(ip_vs_conn_out_get);
00055
EXPORT_SYMBOL(ip_vs_conn_listen);
00056
EXPORT_SYMBOL(ip_vs_conn_put);
00057
#ifdef CONFIG_IP_VS_DEBUG
00058
EXPORT_SYMBOL(ip_vs_get_debug_level);
00059
#endif
00060
EXPORT_SYMBOL(check_for_ip_vs_out);
00061
00062
00063
00064 #define icmp_id(icmph) ((icmph->un).echo.id)
00065
00066 const char *
ip_vs_proto_name(
unsigned proto)
00067 {
00068
static char buf[20];
00069
00070
switch (proto) {
00071
case IPPROTO_IP:
00072
return "IP";
00073
case IPPROTO_UDP:
00074
return "UDP";
00075
case IPPROTO_TCP:
00076
return "TCP";
00077
case IPPROTO_ICMP:
00078
return "ICMP";
00079
default:
00080 sprintf(buf,
"IP_%d", proto);
00081
return buf;
00082 }
00083 }
00084
00085
00086
static inline void
00087 ip_vs_in_stats(
struct ip_vs_conn *cp,
struct sk_buff *skb)
00088 {
00089
struct ip_vs_dest *dest = cp->
dest;
00090
if (dest && (dest->flags &
IP_VS_DEST_F_AVAILABLE)) {
00091 spin_lock(&dest->stats.lock);
00092 dest->stats.inpkts++;
00093 dest->stats.inbytes += skb->
len;
00094 spin_unlock(&dest->stats.lock);
00095
00096 spin_lock(&dest->svc->stats.lock);
00097 dest->svc->stats.inpkts++;
00098 dest->svc->stats.inbytes += skb->
len;
00099 spin_unlock(&dest->svc->stats.lock);
00100
00101 spin_lock(&
ip_vs_stats.lock);
00102
ip_vs_stats.inpkts++;
00103
ip_vs_stats.inbytes += skb->
len;
00104 spin_unlock(&
ip_vs_stats.lock);
00105 }
00106 }
00107
00108
00109
static inline void
00110 ip_vs_out_stats(
struct ip_vs_conn *cp,
struct sk_buff *skb)
00111 {
00112
struct ip_vs_dest *dest = cp->
dest;
00113
if (dest && (dest->flags &
IP_VS_DEST_F_AVAILABLE)) {
00114 spin_lock(&dest->stats.lock);
00115 dest->stats.outpkts++;
00116 dest->stats.outbytes += skb->
len;
00117 spin_unlock(&dest->stats.lock);
00118
00119 spin_lock(&dest->svc->stats.lock);
00120 dest->svc->stats.outpkts++;
00121 dest->svc->stats.outbytes += skb->
len;
00122 spin_unlock(&dest->svc->stats.lock);
00123
00124 spin_lock(&
ip_vs_stats.lock);
00125
ip_vs_stats.outpkts++;
00126
ip_vs_stats.outbytes += skb->
len;
00127 spin_unlock(&
ip_vs_stats.lock);
00128 }
00129 }
00130
00131
00132
static inline void
00133 ip_vs_conn_stats(
struct ip_vs_conn *cp,
struct ip_vs_service *svc)
00134 {
00135 spin_lock(&cp->
dest->
stats.
lock);
00136 cp->
dest->
stats.
conns++;
00137 spin_unlock(&cp->
dest->
stats.
lock);
00138
00139 spin_lock(&svc->
stats.
lock);
00140 svc->
stats.
conns++;
00141 spin_unlock(&svc->
stats.
lock);
00142
00143 spin_lock(&
ip_vs_stats.lock);
00144
ip_vs_stats.conns++;
00145 spin_unlock(&
ip_vs_stats.lock);
00146 }
00147
00148
00149
00150
00151
00152
00153
00154
static struct ip_vs_conn *
00155 ip_vs_sched_persist(
struct ip_vs_service *svc,
struct iphdr *iph)
00156 {
00157
struct ip_vs_conn *cp = NULL;
00158
struct ip_vs_dest *dest;
00159
const __u16 *portp;
00160
struct ip_vs_conn *ct;
00161 __u16 dport;
00162 __u32 snet;
00163
00164 portp = (__u16 *)&(((
char *)iph)[iph->ihl*4]);
00165
00166
00167 snet = iph->
saddr & svc->
netmask;
00168
00169
IP_VS_DBG(6,
"P-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u "
00170
"mnet %u.%u.%u.%u\n",
00171 NIPQUAD(iph->
saddr), ntohs(portp[0]),
00172 NIPQUAD(iph->
daddr), ntohs(portp[1]),
00173 NIPQUAD(snet));
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
if (portp[1] == svc->
port) {
00189
00190
if (svc->
port !=
FTPPORT)
00191 ct =
ip_vs_conn_in_get(iph->
protocol, snet, 0,
00192 iph->
daddr, portp[1]);
00193
else
00194 ct =
ip_vs_conn_in_get(iph->
protocol, snet, 0,
00195 iph->
daddr, 0);
00196
00197
if (!ct || !
ip_vs_check_template(ct)) {
00198
00199
00200
00201
00202 dest = svc->
scheduler->
schedule(svc, iph);
00203
if (dest == NULL) {
00204
IP_VS_DBG(1,
"P-schedule: no dest found.\n");
00205
return NULL;
00206 }
00207
00208
00209
00210
00211
00212
00213
00214
if (svc->
port !=
FTPPORT)
00215 ct =
ip_vs_conn_new(iph->
protocol,
00216 snet, 0,
00217 iph->
daddr, portp[1],
00218 dest->addr, dest->port,
00219 0,
00220 dest);
00221
else
00222 ct =
ip_vs_conn_new(iph->
protocol,
00223 snet, 0,
00224 iph->
daddr, 0,
00225 dest->addr, 0,
00226 0,
00227 dest);
00228
if (ct == NULL)
00229
return NULL;
00230
00231 ct->timeout = svc->
timeout;
00232 }
else {
00233
00234 dest = ct->dest;
00235 }
00236 dport = dest->port;
00237 }
else {
00238
00239
00240
00241
00242
00243
00244
if (svc->
fwmark)
00245 ct =
ip_vs_conn_in_get(
IPPROTO_IP, snet, 0,
00246 htonl(svc->
fwmark), 0);
00247
else
00248 ct =
ip_vs_conn_in_get(iph->
protocol, snet, 0,
00249 iph->
daddr, 0);
00250
00251
if (!ct || !
ip_vs_check_template(ct)) {
00252
00253
00254
00255
00256
if (svc->
port)
00257
return NULL;
00258
00259 dest = svc->
scheduler->
schedule(svc, iph);
00260
if (dest == NULL) {
00261
IP_VS_DBG(1,
"P-schedule: no dest found.\n");
00262
return NULL;
00263 }
00264
00265
00266
00267
00268
if (svc->
fwmark)
00269 ct =
ip_vs_conn_new(
IPPROTO_IP,
00270 snet, 0,
00271 htonl(svc->
fwmark), 0,
00272 dest->addr, 0,
00273 0,
00274 dest);
00275
else
00276 ct =
ip_vs_conn_new(iph->
protocol,
00277 snet, 0,
00278 iph->
daddr, 0,
00279 dest->addr, 0,
00280 0,
00281 dest);
00282
if (ct == NULL)
00283
return NULL;
00284
00285 ct->timeout = svc->
timeout;
00286 }
else {
00287
00288 dest = ct->dest;
00289 }
00290 dport = portp[1];
00291 }
00292
00293
00294
00295
00296 cp =
ip_vs_conn_new(iph->
protocol,
00297 iph->
saddr, portp[0],
00298 iph->
daddr, portp[1],
00299 dest->addr, dport,
00300 0,
00301 dest);
00302
if (cp == NULL) {
00303
ip_vs_conn_put(ct);
00304
return NULL;
00305 }
00306
00307
00308
00309
00310
00311
00312 atomic_inc(&dest->inactconns);
00313
00314
00315
00316
00317
ip_vs_control_add(cp, ct);
00318
00319
ip_vs_conn_put(ct);
00320
return cp;
00321 }
00322
00323
00324
00325
00326
00327
00328
00329
static struct ip_vs_conn *
00330 ip_vs_schedule(
struct ip_vs_service *svc,
struct iphdr *iph)
00331 {
00332
struct ip_vs_conn *cp = NULL;
00333
struct ip_vs_dest *dest;
00334
const __u16 *portp;
00335
00336
00337
00338
00339
if (svc->
flags &
IP_VS_SVC_F_PERSISTENT)
00340
return ip_vs_sched_persist(svc, iph);
00341
00342
00343
00344
00345 portp = (__u16 *)&(((
char *)iph)[iph->ihl*4]);
00346
if (!svc->
fwmark && portp[1] != svc->
port) {
00347
if (!svc->
port)
00348
IP_VS_ERR(
"Schedule: port zero only supported "
00349
"in persistent services, "
00350
"check your ipvs configuration\n");
00351
return NULL;
00352 }
00353
00354 dest = svc->
scheduler->
schedule(svc, iph);
00355
if (dest == NULL) {
00356
IP_VS_DBG(1,
"Schedule: no dest found.\n");
00357
return NULL;
00358 }
00359
00360
00361
00362
00363 cp =
ip_vs_conn_new(iph->
protocol,
00364 iph->
saddr, portp[0],
00365 iph->
daddr, portp[1],
00366 dest->addr, dest->port?dest->port:portp[1],
00367 0,
00368 dest);
00369
if (cp == NULL)
00370
return NULL;
00371
00372
00373
00374
00375
00376 atomic_inc(&dest->inactconns);
00377
00378
IP_VS_DBG(6,
"Schedule fwd:%c s:%s c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
00379
"d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
00380
ip_vs_fwd_tag(cp),
ip_vs_state_name(cp->state),
00381 NIPQUAD(cp->caddr), ntohs(cp->cport),
00382 NIPQUAD(cp->vaddr), ntohs(cp->vport),
00383 NIPQUAD(cp->daddr), ntohs(cp->dport),
00384 cp->flags, atomic_read(&cp->refcnt));
00385
00386
return cp;
00387 }
00388
00389
00390
00391
00392
00393
00394
00395 static int ip_vs_leave(
struct ip_vs_service *svc,
struct sk_buff *skb)
00396 {
00397
struct iphdr *iph = skb->
nh.iph;
00398 __u16 *portp = (__u16 *)&(((
char *)iph)[iph->ihl*4]);
00399
00400
00401
00402
00403
if (
sysctl_ip_vs_cache_bypass && svc->
fwmark
00404 && (
inet_addr_type(iph->daddr) == RTN_UNICAST)) {
00405
int ret;
00406
struct ip_vs_conn *cp;
00407
00408
ip_vs_service_put(svc);
00409
00410
00411
IP_VS_DBG(6,
"ip_vs_leave: create a cache_bypass entry\n");
00412 cp =
ip_vs_conn_new(iph->protocol,
00413 iph->saddr, portp[0],
00414 iph->daddr, portp[1],
00415 0, 0,
00416
IP_VS_CONN_F_BYPASS,
00417 NULL);
00418
if (cp == NULL) {
00419
kfree_skb(skb);
00420
return NF_STOLEN;
00421 }
00422
00423
00424
ip_vs_in_stats(cp, skb);
00425
00426
00427
ip_vs_set_state(cp,
VS_STATE_INPUT, iph, portp);
00428
00429
00430 ret = cp->packet_xmit(skb, cp);
00431
00432 atomic_inc(&cp->in_pkts);
00433
ip_vs_conn_put(cp);
00434
return ret;
00435 }
00436
00437
00438
00439
00440
00441
00442
00443
if ((svc->
port ==
FTPPORT) && (portp[1] !=
FTPPORT)) {
00444
ip_vs_service_put(svc);
00445
return NF_ACCEPT;
00446 }
00447
00448
ip_vs_service_put(svc);
00449
00450
00451
00452
00453
00454
00455
00456
00457
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
00458
kfree_skb(skb);
00459
return NF_STOLEN;
00460 }
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470 static unsigned int ip_vs_post_routing(
unsigned int hooknum,
00471
struct sk_buff **skb_p,
00472
const struct net_device *in,
00473
const struct net_device *out,
00474
int (*okfn)(
struct sk_buff *))
00475 {
00476
struct sk_buff *skb = *skb_p;
00477
00478
if (!(skb->nfcache &
NFC_IPVS_PROPERTY))
00479
return NF_ACCEPT;
00480
00481
00482 (*okfn)(skb);
00483
00484
return NF_STOLEN;
00485 }
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495 static int ip_vs_out_icmp(
struct sk_buff **skb_p)
00496 {
00497
struct sk_buff *skb = *skb_p;
00498
struct iphdr *iph;
00499
struct icmphdr *icmph;
00500
struct iphdr *ciph;
00501 __u16 *pptr;
00502
unsigned short ihl;
00503
unsigned short len;
00504
unsigned short clen, csize;
00505
struct ip_vs_conn *cp;
00506
00507
00508
if (skb->nh.iph->frag_off & __constant_htons(
IP_MF|
IP_OFFSET)) {
00509 skb =
ip_defrag(skb);
00510
if (!skb)
00511
return NF_STOLEN;
00512 *skb_p = skb;
00513 }
00514
00515
if (
skb_is_nonlinear(skb)) {
00516
if (
skb_linearize(skb, GFP_ATOMIC) != 0)
00517
return NF_DROP;
00518
ip_send_check(skb->nh.iph);
00519 }
00520
00521 iph = skb->nh.iph;
00522 ihl = iph->ihl << 2;
00523 icmph = (
struct icmphdr *)((
char *)iph + ihl);
00524 len = ntohs(iph->tot_len) - ihl;
00525
if (len <
sizeof(
struct icmphdr))
00526 return
NF_DROP;
00527
00528
IP_VS_DBG(12,
"outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
00529 icmph->type, ntohs(
icmp_id(icmph)),
00530 NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
00531
00532
00533
00534
00535
00536
00537
00538
00539
if ((icmph->type != ICMP_DEST_UNREACH) &&
00540 (icmph->type != ICMP_SOURCE_QUENCH) &&
00541 (icmph->type != ICMP_TIME_EXCEEDED))
00542
return NF_ACCEPT;
00543
00544
00545 clen = len -
sizeof(
struct icmphdr);
00546
if (clen <
sizeof(
struct iphdr))
00547 return
NF_DROP;
00548 ciph = (
struct iphdr *) (icmph + 1);
00549 csize = ciph->ihl << 2;
00550
if (clen < csize)
00551
return NF_DROP;
00552
00553
00554
if (ciph->protocol !=
IPPROTO_UDP && ciph->protocol !=
IPPROTO_TCP)
00555
return NF_ACCEPT;
00556
00557
00558
if (ciph->frag_off & __constant_htons(
IP_OFFSET))
00559
return NF_ACCEPT;
00560
00561
00562
if (clen < csize +
sizeof(
struct udphdr))
00563 return
NF_DROP;
00564
00565
00566
00567
00568
00569
00570 pptr = (__u16 *)&(((
char *)ciph)[csize]);
00571
00572
00573
if (
ip_compute_csum((
unsigned char *) icmph, len)) {
00574
00575
IP_VS_DBG(1,
"forward ICMP: failed checksum from %d.%d.%d.%d!\n",
00576 NIPQUAD(iph->saddr));
00577
return NF_DROP;
00578 }
00579
00580
IP_VS_DBG(11,
"Handling outgoing ICMP for "
00581
"%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
00582 NIPQUAD(ciph->saddr), ntohs(pptr[0]),
00583 NIPQUAD(ciph->daddr), ntohs(pptr[1]));
00584
00585
00586 cp =
ip_vs_conn_out_get(ciph->protocol, ciph->daddr, pptr[1],
00587 ciph->saddr, pptr[0]);
00588
if (!cp)
00589
return NF_ACCEPT;
00590
00591
if (
IP_VS_FWD_METHOD(cp) != 0) {
00592
IP_VS_ERR(
"shouldn't reach here, because the box is on the"
00593
"half connection in the tun/dr module.\n");
00594 }
00595
00596
00597
00598 iph->saddr = cp->vaddr;
00599
ip_send_check(iph);
00600
00601
00602 ciph->daddr = cp->vaddr;
00603
ip_send_check(ciph);
00604
00605
00606 pptr[1] = cp->vport;
00607
00608
00609 icmph->checksum = 0;
00610 icmph->checksum =
ip_compute_csum((
unsigned char *) icmph, len);
00611 skb->ip_summed =
CHECKSUM_UNNECESSARY;
00612
00613
00614
ip_vs_out_stats(cp, skb);
00615
ip_vs_conn_put(cp);
00616
00617
IP_VS_DBG(11,
"Forwarding correct outgoing ICMP to "
00618
"%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
00619 NIPQUAD(ciph->saddr), ntohs(pptr[0]),
00620 NIPQUAD(ciph->daddr), ntohs(pptr[1]));
00621
00622 skb->nfcache |=
NFC_IPVS_PROPERTY;
00623
00624
return NF_ACCEPT;
00625 }
00626
00627
00628
00629
00630
00631
00632
00633 static unsigned int ip_vs_out(
unsigned int hooknum,
00634
struct sk_buff **skb_p,
00635
const struct net_device *in,
00636
const struct net_device *out,
00637
int (*okfn)(
struct sk_buff *))
00638 {
00639
struct sk_buff *skb = *skb_p;
00640
struct iphdr *iph;
00641
union ip_vs_tphdr h;
00642
struct ip_vs_conn *cp;
00643
int size;
00644
int ihl;
00645
00646
EnterFunction(11);
00647
00648
if (skb->nfcache &
NFC_IPVS_PROPERTY)
00649
return NF_ACCEPT;
00650
00651 iph = skb->nh.iph;
00652
if (iph->protocol ==
IPPROTO_ICMP)
00653
return ip_vs_out_icmp(skb_p);
00654
00655
00656
if (iph->protocol !=
IPPROTO_TCP && iph->protocol !=
IPPROTO_UDP)
00657
return NF_ACCEPT;
00658
00659
00660
if (iph->frag_off & __constant_htons(
IP_MF|
IP_OFFSET)) {
00661 skb =
ip_defrag(skb);
00662
if (!skb)
00663
return NF_STOLEN;
00664 iph = skb->nh.iph;
00665 *skb_p = skb;
00666 }
00667
00668
00669
00670 ihl = iph->ihl << 2;
00671
if (
ip_vs_header_check(skb, iph->protocol, ihl) == -1)
00672
return NF_DROP;
00673
00674 iph = skb->nh.iph;
00675 h.
raw = (
char*) iph + ihl;
00676
00677
00678
00679
00680 cp =
ip_vs_conn_out_get(iph->protocol, iph->saddr, h.
portp[0],
00681 iph->daddr, h.
portp[1]);
00682
if (!cp) {
00683
if (
sysctl_ip_vs_nat_icmp_send &&
00684
ip_vs_lookup_real_service(iph->protocol,
00685 iph->saddr, h.
portp[0])) {
00686
00687
00688
00689
00690
if (!h.
th->rst || iph->protocol !=
IPPROTO_TCP) {
00691
icmp_send(skb, ICMP_DEST_UNREACH,
00692 ICMP_PORT_UNREACH, 0);
00693
kfree_skb(skb);
00694
return NF_STOLEN;
00695 }
00696 }
00697
IP_VS_DBG(12,
"packet for %s %d.%d.%d.%d:%d "
00698
"continue traversal as normal.\n",
00699
ip_vs_proto_name(iph->protocol),
00700 NIPQUAD(iph->daddr),
00701 ntohs(h.
portp[1]));
00702
if (
skb_is_nonlinear(skb))
00703
ip_send_check(iph);
00704
return NF_ACCEPT;
00705 }
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
if (cp->app &&
skb_is_nonlinear(skb)) {
00716
if (
skb_linearize(skb, GFP_ATOMIC) != 0) {
00717
ip_vs_conn_put(cp);
00718
return NF_DROP;
00719 }
00720 iph = skb->nh.iph;
00721 h.
raw = (
char*) iph + ihl;
00722 }
00723
00724 size = skb->len - ihl;
00725
IP_VS_DBG(11,
"O-pkt: %s size=%d\n",
00726
ip_vs_proto_name(iph->protocol), size);
00727
00728
00729
if (cp->app && (iph->protocol !=
IPPROTO_UDP || h.
uh->check != 0)) {
00730
switch (skb->ip_summed) {
00731
case CHECKSUM_NONE:
00732 skb->csum =
csum_partial(h.
raw, size, 0);
00733
case CHECKSUM_HW:
00734
if (
csum_tcpudp_magic(iph->saddr, iph->daddr, size,
00735 iph->protocol, skb->csum)) {
00736
ip_vs_conn_put(cp);
00737
IP_VS_DBG_RL(
"Outgoing failed %s checksum "
00738
"from %d.%d.%d.%d (size=%d)!\n",
00739
ip_vs_proto_name(iph->protocol),
00740 NIPQUAD(iph->saddr),
00741 size);
00742
return NF_DROP;
00743 }
00744
break;
00745
default:
00746
00747
break;
00748 }
00749 }
00750
00751
IP_VS_DBG(11,
"Outgoing %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d\n",
00752
ip_vs_proto_name(iph->protocol),
00753 NIPQUAD(iph->saddr), ntohs(h.
portp[0]),
00754 NIPQUAD(iph->daddr), ntohs(h.
portp[1]));
00755
00756
00757 iph->saddr = cp->vaddr;
00758 h.
portp[0] = cp->vport;
00759
00760
00761
00762
00763
if (
ip_vs_app_pkt_out(cp, skb) != 0) {
00764
00765 iph = skb->nh.iph;
00766 h.
raw = (
char*)iph + ihl;
00767 size = skb->len - ihl;
00768 }
00769
00770
00771
00772
00773
if (!cp->app && (iph->protocol !=
IPPROTO_UDP || h.
uh->check != 0)) {
00774
00775
ip_vs_fast_check_update(&h, cp->daddr, cp->vaddr,
00776 cp->dport, cp->vport, iph->protocol);
00777
if (skb->ip_summed ==
CHECKSUM_HW)
00778 skb->ip_summed =
CHECKSUM_NONE;
00779 }
else {
00780
00781
switch (iph->protocol) {
00782
case IPPROTO_TCP:
00783 h.
th->check = 0;
00784 skb->csum =
csum_partial(h.
raw, size, 0);
00785 h.
th->check =
csum_tcpudp_magic(iph->saddr, iph->daddr,
00786 size, iph->protocol,
00787 skb->csum);
00788
IP_VS_DBG(11,
"O-pkt: %s O-csum=%d (+%d)\n",
00789
ip_vs_proto_name(iph->protocol), h.
th->check,
00790 (
char*)&(h.
th->check) - (
char*)h.
raw);
00791
break;
00792
case IPPROTO_UDP:
00793 h.
uh->check = 0;
00794 skb->csum =
csum_partial(h.
raw, size, 0);
00795 h.
uh->check =
csum_tcpudp_magic(iph->saddr, iph->daddr,
00796 size, iph->protocol,
00797 skb->csum);
00798
if (h.
uh->check == 0)
00799 h.
uh->check = 0xFFFF;
00800
IP_VS_DBG(11,
"O-pkt: %s O-csum=%d (+%d)\n",
00801
ip_vs_proto_name(iph->protocol), h.
uh->check,
00802 (
char*)&(h.
uh->check) - (
char*)h.
raw);
00803
break;
00804 }
00805 }
00806
ip_send_check(iph);
00807
00808
ip_vs_out_stats(cp, skb);
00809
ip_vs_set_state(cp,
VS_STATE_OUTPUT, iph, h.
portp);
00810
ip_vs_conn_put(cp);
00811
00812 skb->nfcache |=
NFC_IPVS_PROPERTY;
00813
00814
LeaveFunction(11);
00815
return NF_ACCEPT;
00816 }
00817
00818
00819
00820
00821
00822
00823
00824
00825 unsigned int check_for_ip_vs_out(
struct sk_buff **skb_p,
00826
int (*okfn)(
struct sk_buff *))
00827 {
00828
unsigned int ret;
00829
00830 ret =
ip_vs_out(
NF_IP_FORWARD, skb_p, NULL, NULL, NULL);
00831
if (ret !=
NF_ACCEPT) {
00832
return ret;
00833 }
else {
00834
00835
00836
if ((*skb_p)->nfcache &
NFC_IPVS_PROPERTY) {
00837 (*okfn)(*skb_p);
00838
return NF_STOLEN;
00839 }
00840 }
00841
return NF_ACCEPT;
00842 }
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852 static int ip_vs_in_icmp(
struct sk_buff **skb_p)
00853 {
00854
struct sk_buff *skb = *skb_p;
00855
struct iphdr *iph;
00856
struct icmphdr *icmph;
00857
struct iphdr *ciph;
00858 __u16 *pptr;
00859
unsigned short len;
00860
unsigned short clen, csize;
00861
struct ip_vs_conn *cp;
00862
struct rtable *rt;
00863
int mtu;
00864
00865
if (
skb_is_nonlinear(skb)) {
00866
if (
skb_linearize(skb, GFP_ATOMIC) != 0)
00867
return NF_DROP;
00868 }
00869
00870 iph = skb->nh.iph;
00871
ip_send_check(iph);
00872 icmph = (
struct icmphdr *)((
char *)iph + (iph->ihl << 2));
00873 len = ntohs(iph->tot_len) - (iph->ihl<<2);
00874
if (len <
sizeof(
struct icmphdr))
00875 return
NF_DROP;
00876
00877
IP_VS_DBG(12,
"icmp in (%d,%d) %u.%u.%u.%u -> %u.%u.%u.%u\n",
00878 icmph->type, ntohs(
icmp_id(icmph)),
00879 NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
00880
00881
if ((icmph->type != ICMP_DEST_UNREACH) &&
00882 (icmph->type != ICMP_SOURCE_QUENCH) &&
00883 (icmph->type != ICMP_TIME_EXCEEDED))
00884
return NF_ACCEPT;
00885
00886
00887
00888
00889
00890 clen = len -
sizeof(
struct icmphdr);
00891
if (clen <
sizeof(
struct iphdr))
00892 return
NF_DROP;
00893 ciph = (
struct iphdr *) (icmph + 1);
00894 csize = ciph->ihl << 2;
00895
if (clen < csize)
00896
return NF_DROP;
00897
00898
00899
if (ciph->protocol !=
IPPROTO_UDP && ciph->protocol !=
IPPROTO_TCP)
00900
return NF_ACCEPT;
00901
00902
00903
if (ciph->frag_off & __constant_htons(
IP_OFFSET))
00904
return NF_ACCEPT;
00905
00906
00907
if (clen < csize +
sizeof(
struct udphdr))
00908 return
NF_DROP;
00909
00910
00911
if (
ip_compute_csum((
unsigned char *) icmph, len)) {
00912
00913
IP_VS_ERR_RL(
"incoming ICMP: failed checksum from "
00914
"%d.%d.%d.%d!\n", NIPQUAD(iph->saddr));
00915
return NF_DROP;
00916 }
00917
00918 pptr = (__u16 *)&(((
char *)ciph)[csize]);
00919
00920
IP_VS_DBG(11,
"Handling incoming ICMP for "
00921
"%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
00922 NIPQUAD(ciph->saddr), ntohs(pptr[0]),
00923 NIPQUAD(ciph->daddr), ntohs(pptr[1]));
00924
00925
00926
00927 cp =
ip_vs_conn_in_get(ciph->protocol,
00928 ciph->daddr, pptr[1],
00929 ciph->saddr, pptr[0]);
00930
if (cp == NULL)
00931
return NF_ACCEPT;
00932
00933
ip_vs_in_stats(cp, skb);
00934
00935
00936
00937
00938
if (
IP_VS_FWD_METHOD(cp) !=
IP_VS_CONN_F_MASQ) {
00939
int ret;
00940
if (cp->packet_xmit)
00941 ret = cp->packet_xmit(skb, cp);
00942
else
00943 ret =
NF_ACCEPT;
00944 atomic_inc(&cp->in_pkts);
00945
ip_vs_conn_put(cp);
00946
return ret;
00947 }
00948
00949
00950
00951
00952
if (!(rt =
__ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
00953
goto tx_error_icmp;
00954
00955
00956 mtu = rt->u.dst.pmtu;
00957
if ((skb->len > mtu) && (iph->frag_off&__constant_htons(
IP_DF))) {
00958
ip_rt_put(rt);
00959
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
00960
IP_VS_DBG_RL(
"ip_vs_in_icmp(): frag needed\n");
00961
goto tx_error;
00962 }
00963
00964
00965
dst_release(skb->dst);
00966 skb->dst = &rt->u.dst;
00967
00968
00969
if (
ip_vs_skb_cow(skb, rt->u.dst.dev->hard_header_len,
00970 &iph, (
unsigned char**)&icmph)) {
00971
ip_vs_conn_put(cp);
00972
return NF_DROP;
00973 }
00974 ciph = (
struct iphdr *) (icmph + 1);
00975 pptr = (__u16 *)&(((
char *)ciph)[csize]);
00976
00977
00978
00979
00980
00981 iph->daddr = cp->daddr;
00982
ip_send_check(iph);
00983
00984
00985 ciph->saddr = cp->daddr;
00986
ip_send_check(ciph);
00987
00988
00989 pptr[0] = cp->dport;
00990
00991
00992 icmph->checksum = 0;
00993 icmph->checksum =
ip_compute_csum((
unsigned char *) icmph, len);
00994 skb->ip_summed =
CHECKSUM_UNNECESSARY;
00995
00996
IP_VS_DBG(11,
"Forwarding incoming ICMP to "
00997
"%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
00998 NIPQUAD(ciph->saddr), ntohs(pptr[0]),
00999 NIPQUAD(ciph->daddr), ntohs(pptr[1]));
01000
01001
#ifdef CONFIG_NETFILTER_DEBUG
01002
skb->nf_debug = 1 <<
NF_IP_LOCAL_OUT;
01003
#endif
01004
ip_send(skb);
01005
ip_vs_conn_put(cp);
01006
return NF_STOLEN;
01007
01008 tx_error_icmp:
01009
dst_link_failure(skb);
01010 tx_error:
01011
dev_kfree_skb(skb);
01012
ip_vs_conn_put(cp);
01013
return NF_STOLEN;
01014 }
01015
01016
01017
01018
01019
01020
01021 static unsigned int ip_vs_in(
unsigned int hooknum,
01022
struct sk_buff **skb_p,
01023
const struct net_device *in,
01024
const struct net_device *out,
01025
int (*okfn)(
struct sk_buff *))
01026 {
01027
struct sk_buff *skb = *skb_p;
01028
struct iphdr *iph = skb->
nh.iph;
01029
union ip_vs_tphdr h;
01030
struct ip_vs_conn *cp;
01031
struct ip_vs_service *svc;
01032
int ihl;
01033
int ret;
01034
01035
01036
01037
01038
01039
if (skb->pkt_type != PACKET_HOST || skb->dev == &
loopback_dev) {
01040
IP_VS_DBG(12,
"packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
01041 skb->pkt_type,
01042 iph->protocol,
01043 NIPQUAD(iph->daddr));
01044
return NF_ACCEPT;
01045 }
01046
01047
if (iph->protocol ==
IPPROTO_ICMP)
01048
return ip_vs_in_icmp(skb_p);
01049
01050
01051
if (iph->protocol !=
IPPROTO_TCP && iph->protocol !=
IPPROTO_UDP)
01052
return NF_ACCEPT;
01053
01054
01055
01056 ihl = iph->ihl << 2;
01057
if (
ip_vs_header_check(skb, iph->protocol, ihl) == -1)
01058
return NF_DROP;
01059 iph = skb->nh.iph;
01060 h.
raw = (
char*) iph + ihl;
01061
01062
01063
01064
01065 cp =
ip_vs_conn_in_get(iph->protocol, iph->saddr, h.
portp[0],
01066 iph->daddr, h.
portp[1]);
01067
01068
if (!cp &&
01069 (h.
th->syn || (iph->protocol!=
IPPROTO_TCP)) &&
01070 (svc =
ip_vs_service_get(skb->nfmark, iph->protocol,
01071 iph->daddr, h.
portp[1]))) {
01072
if (
ip_vs_todrop()) {
01073
01074
01075
01076
01077
ip_vs_service_put(svc);
01078
return NF_DROP;
01079 }
01080
01081
01082
01083
01084
01085 cp =
ip_vs_schedule(svc, iph);
01086
if (!cp)
01087
return ip_vs_leave(svc, skb);
01088
ip_vs_conn_stats(cp, svc);
01089
ip_vs_service_put(svc);
01090 }
01091
01092
if (!cp) {
01093
01094
IP_VS_DBG(12,
"packet for %s %d.%d.%d.%d:%d continue "
01095
"traversal as normal.\n",
01096
ip_vs_proto_name(iph->protocol),
01097 NIPQUAD(iph->daddr),
01098 ntohs(h.
portp[1]));
01099
return NF_ACCEPT;
01100 }
01101
01102
IP_VS_DBG(11,
"Incoming %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d\n",
01103
ip_vs_proto_name(iph->protocol),
01104 NIPQUAD(iph->saddr), ntohs(h.
portp[0]),
01105 NIPQUAD(iph->daddr), ntohs(h.
portp[1]));
01106
01107
01108
if (cp->dest && !(cp->dest->flags &
IP_VS_DEST_F_AVAILABLE)) {
01109
01110
01111
if (
sysctl_ip_vs_expire_nodest_conn) {
01112
01113
ip_vs_conn_expire_now(cp);
01114 }
else {
01115
01116
01117
__ip_vs_conn_put(cp);
01118 }
01119
return NF_DROP;
01120 }
01121
01122
ip_vs_in_stats(cp, skb);
01123
ip_vs_set_state(cp,
VS_STATE_INPUT, iph, h.
portp);
01124
if (cp->packet_xmit)
01125 ret = cp->packet_xmit(skb, cp);
01126
else {
01127
IP_VS_DBG_RL(
"warning: packet_xmit is null");
01128 ret =
NF_ACCEPT;
01129 }
01130
01131
01132
01133 atomic_inc(&cp->in_pkts);
01134
if (
ip_vs_sync_state ==
IP_VS_STATE_MASTER &&
01135 (cp->protocol !=
IPPROTO_TCP ||
01136 cp->state ==
IP_VS_S_ESTABLISHED) &&
01137 (atomic_read(&cp->in_pkts) % 50 ==
sysctl_ip_vs_sync_threshold))
01138
ip_vs_sync_conn(cp);
01139
01140
ip_vs_conn_put(cp);
01141
return ret;
01142 }
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154 static unsigned int ip_vs_forward_icmp(
unsigned int hooknum,
01155
struct sk_buff **skb_p,
01156
const struct net_device *in,
01157
const struct net_device *out,
01158
int (*okfn)(
struct sk_buff *))
01159 {
01160
struct sk_buff *skb = *skb_p;
01161
struct iphdr *iph = skb->
nh.iph;
01162
01163
if (iph->protocol !=
IPPROTO_ICMP)
01164
return NF_ACCEPT;
01165
01166
if (iph->frag_off & __constant_htons(
IP_MF|
IP_OFFSET)) {
01167 skb =
ip_defrag(skb);
01168
if (!skb)
01169
return NF_STOLEN;
01170 *skb_p = skb;
01171 }
01172
01173
return ip_vs_in_icmp(skb_p);
01174 }
01175
01176
01177
01178
01179
01180 static struct nf_hook_ops
ip_vs_in_ops = {
01181 { NULL, NULL },
01182
ip_vs_in, PF_INET,
NF_IP_LOCAL_IN, 100
01183 };
01184
01185
01186 static struct nf_hook_ops
ip_vs_out_ops = {
01187 { NULL, NULL },
01188
ip_vs_out, PF_INET,
NF_IP_FORWARD, 100
01189 };
01190
01191
01192
01193 static struct nf_hook_ops
ip_vs_forward_icmp_ops = {
01194 { NULL, NULL },
01195
ip_vs_forward_icmp, PF_INET,
NF_IP_FORWARD, 99
01196 };
01197
01198
01199 static struct nf_hook_ops
ip_vs_post_routing_ops = {
01200 { NULL, NULL },
01201
ip_vs_post_routing, PF_INET,
NF_IP_POST_ROUTING,
NF_IP_PRI_NAT_SRC-1
01202 };
01203
01204
01205
01206
01207
01208 static int __init
ip_vs_init(
void)
01209 {
01210
int ret;
01211
01212 ret =
ip_vs_control_init();
01213
if (ret < 0) {
01214
IP_VS_ERR(
"can't setup control.\n");
01215
goto cleanup_nothing;
01216 }
01217
01218 ret =
ip_vs_conn_init();
01219
if (ret < 0) {
01220
IP_VS_ERR(
"can't setup connection table.\n");
01221
goto cleanup_control;
01222 }
01223
01224 ret =
ip_vs_app_init();
01225
if (ret < 0) {
01226
IP_VS_ERR(
"can't setup application helper.\n");
01227
goto cleanup_conn;
01228 }
01229
01230 ret =
nf_register_hook(&
ip_vs_in_ops);
01231
if (ret < 0) {
01232
IP_VS_ERR(
"can't register in hook.\n");
01233
goto cleanup_app;
01234 }
01235 ret =
nf_register_hook(&
ip_vs_out_ops);
01236
if (ret < 0) {
01237
IP_VS_ERR(
"can't register out hook.\n");
01238
goto cleanup_inops;
01239 }
01240 ret =
nf_register_hook(&
ip_vs_post_routing_ops);
01241
if (ret < 0) {
01242
IP_VS_ERR(
"can't register post_routing hook.\n");
01243
goto cleanup_outops;
01244 }
01245 ret =
nf_register_hook(&
ip_vs_forward_icmp_ops);
01246
if (ret < 0) {
01247
IP_VS_ERR(
"can't register forward_icmp hook.\n");
01248
goto cleanup_postroutingops;
01249 }
01250
01251
IP_VS_INFO(
"ipvs loaded.\n");
01252
return ret;
01253
01254 cleanup_postroutingops:
01255
nf_unregister_hook(&
ip_vs_post_routing_ops);
01256 cleanup_outops:
01257
nf_unregister_hook(&
ip_vs_out_ops);
01258 cleanup_inops:
01259
nf_unregister_hook(&
ip_vs_in_ops);
01260 cleanup_app:
01261
ip_vs_app_cleanup();
01262 cleanup_conn:
01263
ip_vs_conn_cleanup();
01264 cleanup_control:
01265
ip_vs_control_cleanup();
01266 cleanup_nothing:
01267
return ret;
01268 }
01269
01270 static void __exit
ip_vs_cleanup(
void)
01271 {
01272
nf_unregister_hook(&
ip_vs_forward_icmp_ops);
01273
nf_unregister_hook(&
ip_vs_post_routing_ops);
01274
nf_unregister_hook(&
ip_vs_out_ops);
01275
nf_unregister_hook(&
ip_vs_in_ops);
01276
ip_vs_app_cleanup();
01277
ip_vs_conn_cleanup();
01278
ip_vs_control_cleanup();
01279
IP_VS_INFO(
"ipvs unloaded.\n");
01280 }
01281
01282
module_init(ip_vs_init);
01283
module_exit(ip_vs_cleanup);
01284
MODULE_LICENSE(
"GPL");