00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
#include <linux/config.h>
00064
#include <asm/uaccess.h>
00065
#include <asm/system.h>
00066
#include <asm/bitops.h>
00067
#include <linux/types.h>
00068
#include <linux/kernel.h>
00069
#include <linux/sched.h>
00070
#include <linux/mm.h>
00071
#include <linux/string.h>
00072
#include <linux/socket.h>
00073
#include <linux/sockios.h>
00074
#include <linux/errno.h>
00075
#include <linux/in.h>
00076
#include <linux/inet.h>
00077
#include <linux/netdevice.h>
00078
#include <linux/proc_fs.h>
00079
#include <linux/init.h>
00080
#include <linux/skbuff.h>
00081
#include <linux/rtnetlink.h>
00082
#include <linux/inetdevice.h>
00083
#include <linux/igmp.h>
00084
#include <linux/pkt_sched.h>
00085
#include <linux/mroute.h>
00086
#include <linux/netfilter_ipv4.h>
00087
#include <linux/random.h>
00088
#include <linux/jhash.h>
00089
#include <net/protocol.h>
00090
#include <net/ip.h>
00091
#include <net/route.h>
00092
#include <net/inetpeer.h>
00093
#include <net/sock.h>
00094
#include <net/ip_fib.h>
00095
#include <net/arp.h>
00096
#include <net/tcp.h>
00097
#include <net/icmp.h>
00098
#ifdef CONFIG_SYSCTL
00099
#include <linux/sysctl.h>
00100
#endif
00101
00102 #define IP_MAX_MTU 0xFFF0
00103
00104 #define RT_GC_TIMEOUT (300*HZ)
00105
00106 int ip_rt_min_delay = 2 * HZ;
00107 int ip_rt_max_delay = 10 * HZ;
00108 int ip_rt_max_size;
00109 int ip_rt_gc_timeout =
RT_GC_TIMEOUT;
00110 int ip_rt_gc_interval = 60 * HZ;
00111 int ip_rt_gc_min_interval = HZ / 2;
00112 int ip_rt_redirect_number = 9;
00113 int ip_rt_redirect_load = HZ / 50;
00114 int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1));
00115 int ip_rt_error_cost = HZ;
00116 int ip_rt_error_burst = 5 * HZ;
00117 int ip_rt_gc_elasticity = 8;
00118 int ip_rt_mtu_expires = 10 * 60 * HZ;
00119 int ip_rt_min_pmtu = 512 + 20 + 20;
00120 int ip_rt_min_advmss = 256;
00121 int ip_rt_secret_interval = 10 * 60 * HZ;
00122 static unsigned long rt_deadline;
00123
00124 #define RTprint(a...) printk(KERN_DEBUG a)
00125
00126 static struct timer_list
rt_flush_timer;
00127 static struct timer_list
rt_periodic_timer;
00128 static struct timer_list
rt_secret_timer;
00129
00130
00131
00132
00133
00134
static struct dst_entry *
ipv4_dst_check(
struct dst_entry *dst, u32 cookie);
00135
static struct dst_entry *
ipv4_dst_reroute(
struct dst_entry *dst,
00136
struct sk_buff *skb);
00137
static void ipv4_dst_destroy(
struct dst_entry *dst);
00138
static struct dst_entry *
ipv4_negative_advice(
struct dst_entry *dst);
00139
static void ipv4_link_failure(
struct sk_buff *skb);
00140
static int rt_garbage_collect(
void);
00141
00142
00143 struct dst_ops ipv4_dst_ops = {
00144 family: AF_INET,
00145 protocol: __constant_htons(ETH_P_IP),
00146 gc:
rt_garbage_collect,
00147 check:
ipv4_dst_check,
00148 reroute:
ipv4_dst_reroute,
00149 destroy:
ipv4_dst_destroy,
00150 negative_advice:
ipv4_negative_advice,
00151 link_failure:
ipv4_link_failure,
00152 entry_size:
sizeof(
struct rtable),
00153 };
00154
00155 #define ECN_OR_COST(class) TC_PRIO_##class
00156
00157 __u8
ip_tos2prio[16] = {
00158 TC_PRIO_BESTEFFORT,
00159
ECN_OR_COST(FILLER),
00160 TC_PRIO_BESTEFFORT,
00161
ECN_OR_COST(BESTEFFORT),
00162 TC_PRIO_BULK,
00163
ECN_OR_COST(BULK),
00164 TC_PRIO_BULK,
00165
ECN_OR_COST(BULK),
00166 TC_PRIO_INTERACTIVE,
00167
ECN_OR_COST(INTERACTIVE),
00168 TC_PRIO_INTERACTIVE,
00169
ECN_OR_COST(INTERACTIVE),
00170 TC_PRIO_INTERACTIVE_BULK,
00171
ECN_OR_COST(INTERACTIVE_BULK),
00172 TC_PRIO_INTERACTIVE_BULK,
00173
ECN_OR_COST(INTERACTIVE_BULK)
00174 };
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191 struct rt_hash_bucket {
00192 struct rtable *
chain;
00193 rwlock_t
lock;
00194 } __attribute__((
__aligned__(8)));
00195
00196 static struct rt_hash_bucket *
rt_hash_table;
00197 static unsigned rt_hash_mask;
00198 static int rt_hash_log;
00199 static unsigned int rt_hash_rnd;
00200
00201 struct rt_cache_stat rt_cache_stat[NR_CPUS];
00202
00203
static int rt_intern_hash(
unsigned hash,
struct rtable *rth,
00204
struct rtable **res);
00205
00206 static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos)
00207 {
00208
return (jhash_3words(daddr, saddr, (u32) tos,
rt_hash_rnd)
00209 &
rt_hash_mask);
00210 }
00211
00212 static int rt_cache_get_info(
char *buffer,
char **start, off_t offset,
00213
int length)
00214 {
00215
int len = 0;
00216 off_t pos = 128;
00217
char temp[256];
00218
struct rtable *r;
00219
int i;
00220
00221
if (offset < 128) {
00222 sprintf(buffer,
"%-127s\n",
00223
"Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
00224
"Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
00225
"HHUptod\tSpecDst");
00226 len = 128;
00227 }
00228
00229
for (i =
rt_hash_mask; i >= 0; i--) {
00230 read_lock_bh(&
rt_hash_table[i].lock);
00231
for (r =
rt_hash_table[i].
chain; r; r = r->
u.rt_next) {
00232
00233
00234
00235 pos += 128;
00236
00237
if (pos <= offset) {
00238 len = 0;
00239
continue;
00240 }
00241 sprintf(temp,
"%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t"
00242
"%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X",
00243 r->u.dst.dev ? r->u.dst.dev->name :
"*",
00244 (
unsigned long)r->rt_dst,
00245 (
unsigned long)r->rt_gateway,
00246 r->rt_flags,
00247 atomic_read(&r->u.dst.__refcnt),
00248 r->u.dst.__use,
00249 0,
00250 (
unsigned long)r->rt_src,
00251 (r->u.dst.advmss ?
00252 (
int) r->u.dst.advmss + 40 : 0),
00253 r->u.dst.window,
00254 (
int)((r->u.dst.rtt >> 3) + r->u.dst.rttvar),
00255 r->key.tos,
00256 r->u.dst.hh ?
00257 atomic_read(&r->u.dst.hh->hh_refcnt) :
00258 -1,
00259 r->u.dst.hh ?
00260 (r->u.dst.hh->hh_output ==
00261
dev_queue_xmit) : 0,
00262 r->rt_spec_dst);
00263 sprintf(buffer + len,
"%-127s\n", temp);
00264 len += 128;
00265
if (pos >= offset+length) {
00266 read_unlock_bh(&
rt_hash_table[i].lock);
00267
goto done;
00268 }
00269 }
00270 read_unlock_bh(&
rt_hash_table[i].lock);
00271 }
00272
00273 done:
00274 *start = buffer + len - (pos - offset);
00275 len = pos - offset;
00276
if (len > length)
00277 len = length;
00278
return len;
00279 }
00280
00281 static int rt_cache_stat_get_info(
char *buffer,
char **start, off_t offset,
int length)
00282 {
00283
unsigned int dst_entries = atomic_read(&
ipv4_dst_ops.
entries);
00284
int i, lcpu;
00285
int len = 0;
00286
00287
for (lcpu = 0; lcpu < smp_num_cpus; lcpu++) {
00288 i = cpu_logical_map(lcpu);
00289
00290 len += sprintf(buffer+len,
"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
00291 dst_entries,
00292
rt_cache_stat[i].in_hit,
00293
rt_cache_stat[i].in_slow_tot,
00294
rt_cache_stat[i].in_slow_mc,
00295
rt_cache_stat[i].in_no_route,
00296
rt_cache_stat[i].in_brd,
00297
rt_cache_stat[i].in_martian_dst,
00298
rt_cache_stat[i].in_martian_src,
00299
00300
rt_cache_stat[i].out_hit,
00301
rt_cache_stat[i].out_slow_tot,
00302
rt_cache_stat[i].out_slow_mc,
00303
00304
rt_cache_stat[i].gc_total,
00305
rt_cache_stat[i].gc_ignored,
00306
rt_cache_stat[i].gc_goal_miss,
00307
rt_cache_stat[i].gc_dst_overflow,
00308
rt_cache_stat[i].in_hlist_search,
00309
rt_cache_stat[i].out_hlist_search
00310
00311 );
00312 }
00313 len -= offset;
00314
00315
if (len > length)
00316 len = length;
00317
if (len < 0)
00318 len = 0;
00319
00320 *start = buffer + offset;
00321
return len;
00322 }
00323
00324 static __inline__
void rt_free(
struct rtable *rt)
00325 {
00326
dst_free(&rt->
u.dst);
00327 }
00328
00329 static __inline__
void rt_drop(
struct rtable *rt)
00330 {
00331
ip_rt_put(rt);
00332
dst_free(&rt->
u.dst);
00333 }
00334
00335 static __inline__
int rt_fast_clean(
struct rtable *rth)
00336 {
00337
00338
00339
return (rth->
rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
00340 rth->
key.
iif && rth->
u.rt_next;
00341 }
00342
00343 static __inline__
int rt_valuable(
struct rtable *rth)
00344 {
00345
return (rth->
rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
00346 rth->
u.dst.expires;
00347 }
00348
00349 static __inline__
int rt_may_expire(
struct rtable *rth,
unsigned long tmo1,
unsigned long tmo2)
00350 {
00351
unsigned long age;
00352
int ret = 0;
00353
00354
if (atomic_read(&rth->
u.dst.__refcnt))
00355
goto out;
00356
00357 ret = 1;
00358
if (rth->
u.dst.expires &&
00359 time_after_eq(jiffies, rth->
u.dst.expires))
00360
goto out;
00361
00362 age = jiffies - rth->
u.dst.lastuse;
00363 ret = 0;
00364
if ((age <= tmo1 && !
rt_fast_clean(rth)) ||
00365 (age <= tmo2 &&
rt_valuable(rth)))
00366
goto out;
00367 ret = 1;
00368 out:
return ret;
00369 }
00370
00371
00372
00373
00374
00375
00376 static inline u32
rt_score(
struct rtable *rt)
00377 {
00378 u32 score = jiffies - rt->
u.dst.lastuse;
00379
00380 score = ~score & ~(3<<30);
00381
00382
if (
rt_valuable(rt))
00383 score |= (1<<31);
00384
00385
if (!rt->
key.
iif ||
00386 !(rt->
rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
00387 score |= (1<<30);
00388
00389
return score;
00390 }
00391
00392
00393 static void SMP_TIMER_NAME(rt_check_expire)(
unsigned long dummy)
00394 {
00395
static int rover;
00396
int i = rover, t;
00397
struct rtable *rth, **rthp;
00398
unsigned long now = jiffies;
00399
00400
for (t = ip_rt_gc_interval << rt_hash_log; t >= 0;
00401 t -=
ip_rt_gc_timeout) {
00402
unsigned long tmo =
ip_rt_gc_timeout;
00403
00404 i = (i + 1) &
rt_hash_mask;
00405 rthp = &
rt_hash_table[i].
chain;
00406
00407 write_lock(&
rt_hash_table[i].lock);
00408
while ((rth = *rthp) != NULL) {
00409
if (rth->u.dst.expires) {
00410
00411
if (time_before_eq(now, rth->u.dst.expires)) {
00412 tmo >>= 1;
00413 rthp = &rth->u.rt_next;
00414
continue;
00415 }
00416 }
else if (!
rt_may_expire(rth, tmo,
ip_rt_gc_timeout)) {
00417 tmo >>= 1;
00418 rthp = &rth->u.rt_next;
00419
continue;
00420 }
00421
00422
00423 *rthp = rth->u.rt_next;
00424
rt_free(rth);
00425 }
00426 write_unlock(&
rt_hash_table[i].lock);
00427
00428
00429
if (time_after(jiffies, now))
00430
break;
00431 }
00432 rover = i;
00433 mod_timer(&
rt_periodic_timer, now +
ip_rt_gc_interval);
00434 }
00435
00436
SMP_TIMER_DEFINE(rt_check_expire, rt_gc_task);
00437
00438
00439
00440
00441 static void SMP_TIMER_NAME(rt_run_flush)(
unsigned long dummy)
00442 {
00443
int i;
00444
struct rtable *rth, *next;
00445
00446
rt_deadline = 0;
00447
00448 get_random_bytes(&
rt_hash_rnd, 4);
00449
00450
for (i =
rt_hash_mask; i >= 0; i--) {
00451 write_lock_bh(&
rt_hash_table[i].lock);
00452 rth =
rt_hash_table[i].
chain;
00453
if (rth)
00454
rt_hash_table[i].
chain = NULL;
00455 write_unlock_bh(&
rt_hash_table[i].lock);
00456
00457
for (; rth; rth = next) {
00458 next = rth->u.rt_next;
00459
rt_free(rth);
00460 }
00461 }
00462 }
00463
00464
SMP_TIMER_DEFINE(rt_run_flush, rt_cache_flush_task);
00465
00466 static spinlock_t
rt_flush_lock = SPIN_LOCK_UNLOCKED;
00467
00468 void rt_cache_flush(
int delay)
00469 {
00470
unsigned long now = jiffies;
00471
int user_mode = !in_softirq();
00472
00473
if (delay < 0)
00474 delay =
ip_rt_min_delay;
00475
00476 spin_lock_bh(&
rt_flush_lock);
00477
00478
if (del_timer(&
rt_flush_timer) && delay > 0 &&
rt_deadline) {
00479
long tmo = (
long)(
rt_deadline - now);
00480
00481
00482
00483
00484
00485
00486
00487
00488
if (user_mode && tmo <
ip_rt_max_delay-
ip_rt_min_delay)
00489 tmo = 0;
00490
00491
if (delay > tmo)
00492 delay = tmo;
00493 }
00494
00495
if (delay <= 0) {
00496 spin_unlock_bh(&
rt_flush_lock);
00497 SMP_TIMER_NAME(
rt_run_flush)(0);
00498
return;
00499 }
00500
00501
if (
rt_deadline == 0)
00502
rt_deadline = now +
ip_rt_max_delay;
00503
00504 mod_timer(&
rt_flush_timer, now+delay);
00505 spin_unlock_bh(&
rt_flush_lock);
00506 }
00507
00508 static void rt_secret_rebuild(
unsigned long dummy)
00509 {
00510
unsigned long now = jiffies;
00511
00512
rt_cache_flush(0);
00513 mod_timer(&
rt_secret_timer, now +
ip_rt_secret_interval);
00514 }
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529 static int rt_garbage_collect(
void)
00530 {
00531
static unsigned long expire =
RT_GC_TIMEOUT;
00532
static unsigned long last_gc;
00533
static int rover;
00534
static int equilibrium;
00535
struct rtable *rth, **rthp;
00536
unsigned long now = jiffies;
00537
int goal;
00538
00539
00540
00541
00542
00543
00544
rt_cache_stat[smp_processor_id()].
gc_total++;
00545
00546
if (now - last_gc <
ip_rt_gc_min_interval &&
00547 atomic_read(&
ipv4_dst_ops.
entries) <
ip_rt_max_size) {
00548
rt_cache_stat[smp_processor_id()].
gc_ignored++;
00549
goto out;
00550 }
00551
00552
00553 goal = atomic_read(&
ipv4_dst_ops.
entries) -
00554 (
ip_rt_gc_elasticity <<
rt_hash_log);
00555
if (goal <= 0) {
00556
if (equilibrium <
ipv4_dst_ops.
gc_thresh)
00557 equilibrium =
ipv4_dst_ops.
gc_thresh;
00558 goal = atomic_read(&
ipv4_dst_ops.
entries) - equilibrium;
00559
if (goal > 0) {
00560 equilibrium += min_t(
unsigned int, goal / 2,
rt_hash_mask + 1);
00561 goal = atomic_read(&
ipv4_dst_ops.
entries) - equilibrium;
00562 }
00563 }
else {
00564
00565
00566
00567 goal = max_t(
unsigned int, goal / 2,
rt_hash_mask + 1);
00568 equilibrium = atomic_read(&
ipv4_dst_ops.
entries) - goal;
00569 }
00570
00571
if (now - last_gc >=
ip_rt_gc_min_interval)
00572 last_gc = now;
00573
00574
if (goal <= 0) {
00575 equilibrium += goal;
00576
goto work_done;
00577 }
00578
00579
do {
00580
int i, k;
00581
00582
for (i =
rt_hash_mask, k = rover; i >= 0; i--) {
00583
unsigned long tmo = expire;
00584
00585 k = (k + 1) &
rt_hash_mask;
00586 rthp = &
rt_hash_table[k].
chain;
00587 write_lock_bh(&
rt_hash_table[k].lock);
00588
while ((rth = *rthp) != NULL) {
00589
if (!
rt_may_expire(rth, tmo, expire)) {
00590 tmo >>= 1;
00591 rthp = &rth->u.rt_next;
00592
continue;
00593 }
00594 *rthp = rth->u.rt_next;
00595
rt_free(rth);
00596 goal--;
00597 }
00598 write_unlock_bh(&
rt_hash_table[k].lock);
00599
if (goal <= 0)
00600
break;
00601 }
00602 rover = k;
00603
00604
if (goal <= 0)
00605
goto work_done;
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
rt_cache_stat[smp_processor_id()].
gc_goal_miss++;
00617
00618
if (expire == 0)
00619
break;
00620
00621 expire >>= 1;
00622
#if RT_CACHE_DEBUG >= 2
00623
printk(KERN_DEBUG
"expire>> %u %d %d %d\n", expire,
00624 atomic_read(&
ipv4_dst_ops.
entries), goal, i);
00625
#endif
00626
00627
if (atomic_read(&
ipv4_dst_ops.
entries) <
ip_rt_max_size)
00628
goto out;
00629 }
while (!in_softirq() && time_before_eq(jiffies, now));
00630
00631
if (atomic_read(&
ipv4_dst_ops.
entries) <
ip_rt_max_size)
00632
goto out;
00633
if (
net_ratelimit())
00634 printk(KERN_WARNING
"dst cache overflow\n");
00635
rt_cache_stat[smp_processor_id()].
gc_dst_overflow++;
00636
return 1;
00637
00638 work_done:
00639 expire +=
ip_rt_gc_min_interval;
00640
if (expire >
ip_rt_gc_timeout ||
00641 atomic_read(&
ipv4_dst_ops.
entries) <
ipv4_dst_ops.
gc_thresh)
00642 expire =
ip_rt_gc_timeout;
00643
#if RT_CACHE_DEBUG >= 2
00644
printk(KERN_DEBUG
"expire++ %u %d %d %d\n", expire,
00645 atomic_read(&
ipv4_dst_ops.
entries), goal, rover);
00646
#endif
00647
out:
return 0;
00648 }
00649
00650 static int rt_intern_hash(
unsigned hash,
struct rtable *rt,
struct rtable **rp)
00651 {
00652
struct rtable *rth, **rthp;
00653
unsigned long now;
00654
struct rtable *cand, **candp;
00655 u32 min_score;
00656
int chain_length;
00657
int attempts = !in_softirq();
00658
00659 restart:
00660 chain_length = 0;
00661 min_score = ~(u32)0;
00662 cand = NULL;
00663 candp = NULL;
00664 now = jiffies;
00665
00666 rthp = &
rt_hash_table[hash].
chain;
00667
00668 write_lock_bh(&
rt_hash_table[hash].lock);
00669
while ((rth = *rthp) != NULL) {
00670
if (memcmp(&rth->key, &rt->
key,
sizeof(rt->
key)) == 0) {
00671
00672 *rthp = rth->u.rt_next;
00673 rth->u.rt_next =
rt_hash_table[hash].
chain;
00674
rt_hash_table[hash].
chain = rth;
00675
00676 rth->
u.dst.__use++;
00677
dst_hold(&rth->u.dst);
00678 rth->u.dst.lastuse = now;
00679 write_unlock_bh(&
rt_hash_table[hash].lock);
00680
00681
rt_drop(rt);
00682 *rp = rth;
00683
return 0;
00684 }
00685
00686
if (!atomic_read(&rth->u.dst.__refcnt)) {
00687 u32 score =
rt_score(rth);
00688
00689
if (score <= min_score) {
00690 cand = rth;
00691 candp = rthp;
00692 min_score = score;
00693 }
00694 }
00695
00696 chain_length++;
00697
00698 rthp = &rth->u.rt_next;
00699 }
00700
00701
if (cand) {
00702
00703
00704
00705
00706
00707
00708
if (chain_length >
ip_rt_gc_elasticity) {
00709 *candp = cand->u.rt_next;
00710
rt_free(cand);
00711 }
00712 }
00713
00714
00715
00716
00717
if (rt->
rt_type == RTN_UNICAST || rt->
key.
iif == 0) {
00718
int err =
arp_bind_neighbour(&rt->
u.dst);
00719
if (err) {
00720 write_unlock_bh(&
rt_hash_table[hash].lock);
00721
00722
if (err != -ENOBUFS) {
00723
rt_drop(rt);
00724
return err;
00725 }
00726
00727
00728
00729
00730
00731
if (attempts-- > 0) {
00732
int saved_elasticity =
ip_rt_gc_elasticity;
00733
int saved_int =
ip_rt_gc_min_interval;
00734
ip_rt_gc_elasticity = 1;
00735
ip_rt_gc_min_interval = 0;
00736
rt_garbage_collect();
00737
ip_rt_gc_min_interval = saved_int;
00738
ip_rt_gc_elasticity = saved_elasticity;
00739
goto restart;
00740 }
00741
00742
if (
net_ratelimit())
00743 printk(KERN_WARNING
"Neighbour table overflow.\n");
00744
rt_drop(rt);
00745
return -ENOBUFS;
00746 }
00747 }
00748
00749 rt->
u.rt_next =
rt_hash_table[hash].
chain;
00750
#if RT_CACHE_DEBUG >= 2
00751
if (rt->
u.rt_next) {
00752
struct rtable *trt;
00753 printk(KERN_DEBUG
"rt_cache @%02x: %u.%u.%u.%u", hash,
00754 NIPQUAD(rt->
rt_dst));
00755
for (trt = rt->
u.rt_next; trt; trt = trt->u.rt_next)
00756 printk(
" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst));
00757 printk(
"\n");
00758 }
00759
#endif
00760
rt_hash_table[hash].
chain = rt;
00761 write_unlock_bh(&
rt_hash_table[hash].lock);
00762 *rp = rt;
00763
return 0;
00764 }
00765
00766 void rt_bind_peer(
struct rtable *rt,
int create)
00767 {
00768
static spinlock_t rt_peer_lock = SPIN_LOCK_UNLOCKED;
00769
struct inet_peer *peer;
00770
00771 peer =
inet_getpeer(rt->
rt_dst, create);
00772
00773 spin_lock_bh(&rt_peer_lock);
00774
if (rt->
peer == NULL) {
00775 rt->
peer = peer;
00776 peer = NULL;
00777 }
00778 spin_unlock_bh(&rt_peer_lock);
00779
if (peer)
00780
inet_putpeer(peer);
00781 }
00782
00783
00784
00785
00786
00787
00788
00789
00790 static void ip_select_fb_ident(
struct iphdr *iph)
00791 {
00792
static spinlock_t ip_fb_id_lock = SPIN_LOCK_UNLOCKED;
00793
static u32 ip_fallback_id;
00794 u32 salt;
00795
00796 spin_lock_bh(&ip_fb_id_lock);
00797 salt = secure_ip_id(ip_fallback_id ^ iph->
daddr);
00798 iph->
id = htons(salt & 0xFFFF);
00799 ip_fallback_id = salt;
00800 spin_unlock_bh(&ip_fb_id_lock);
00801 }
00802
00803 void __ip_select_ident(
struct iphdr *iph,
struct dst_entry *dst)
00804 {
00805
struct rtable *rt = (
struct rtable *) dst;
00806
00807
if (rt) {
00808
if (rt->peer == NULL)
00809
rt_bind_peer(rt, 1);
00810
00811
00812
00813
00814
if (rt->peer) {
00815 iph->
id = htons(
inet_getid(rt->peer));
00816
return;
00817 }
00818 }
else
00819 printk(KERN_DEBUG
"rt_bind_peer(0) @%p\n",
NET_CALLER(iph));
00820
00821
ip_select_fb_ident(iph);
00822 }
00823
00824 static void rt_del(
unsigned hash,
struct rtable *rt)
00825 {
00826
struct rtable **rthp;
00827
00828 write_lock_bh(&
rt_hash_table[hash].lock);
00829
ip_rt_put(rt);
00830
for (rthp = &
rt_hash_table[hash].
chain; *rthp;
00831 rthp = &(*rthp)->
u.rt_next)
00832
if (*rthp == rt) {
00833 *rthp = rt->
u.rt_next;
00834
rt_free(rt);
00835
break;
00836 }
00837 write_unlock_bh(&
rt_hash_table[hash].lock);
00838 }
00839
00840 void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
00841 u32 saddr, u8 tos,
struct net_device *dev)
00842 {
00843
int i, k;
00844
struct in_device *in_dev =
in_dev_get(dev);
00845
struct rtable *rth, **rthp;
00846 u32 skeys[2] = { saddr, 0 };
00847
int ikeys[2] = { dev->
ifindex, 0 };
00848
00849 tos &=
IPTOS_RT_MASK;
00850
00851
if (!in_dev)
00852
return;
00853
00854
if (new_gw == old_gw || !
IN_DEV_RX_REDIRECTS(in_dev)
00855 ||
MULTICAST(new_gw) ||
BADCLASS(new_gw) ||
ZERONET(new_gw))
00856
goto reject_redirect;
00857
00858
if (!
IN_DEV_SHARED_MEDIA(in_dev)) {
00859
if (!
inet_addr_onlink(in_dev, new_gw, old_gw))
00860
goto reject_redirect;
00861
if (
IN_DEV_SEC_REDIRECTS(in_dev) &&
ip_fib_check_default(new_gw, dev))
00862
goto reject_redirect;
00863 }
else {
00864
if (
inet_addr_type(new_gw) != RTN_UNICAST)
00865
goto reject_redirect;
00866 }
00867
00868
for (i = 0; i < 2; i++) {
00869
for (k = 0; k < 2; k++) {
00870
unsigned hash =
rt_hash_code(daddr,
00871 skeys[i] ^ (ikeys[k] << 5),
00872 tos);
00873
00874 rthp=&
rt_hash_table[hash].
chain;
00875
00876 read_lock(&
rt_hash_table[hash].lock);
00877
while ((rth = *rthp) != NULL) {
00878
struct rtable *rt;
00879
00880
if (rth->key.dst != daddr ||
00881 rth->key.src != skeys[i] ||
00882 rth->key.tos != tos ||
00883 rth->key.oif != ikeys[k] ||
00884 rth->key.iif != 0) {
00885 rthp = &rth->u.rt_next;
00886
continue;
00887 }
00888
00889
if (rth->rt_dst != daddr ||
00890 rth->rt_src != saddr ||
00891 rth->u.dst.error ||
00892 rth->rt_gateway != old_gw ||
00893 rth->u.dst.dev != dev)
00894
break;
00895
00896
dst_hold(&rth->u.dst);
00897 read_unlock(&
rt_hash_table[hash].lock);
00898
00899 rt =
dst_alloc(&
ipv4_dst_ops);
00900
if (rt == NULL) {
00901
ip_rt_put(rth);
00902
in_dev_put(in_dev);
00903
return;
00904 }
00905
00906
00907 *rt = *rth;
00908 rt->u.dst.__use = 1;
00909 atomic_set(&rt->u.dst.__refcnt, 1);
00910
if (rt->u.dst.dev)
00911
dev_hold(rt->u.dst.dev);
00912 rt->u.dst.lastuse = jiffies;
00913 rt->u.dst.neighbour = NULL;
00914 rt->u.dst.hh = NULL;
00915 rt->u.dst.obsolete = 0;
00916
00917 rt->rt_flags |= RTCF_REDIRECTED;
00918
00919
00920 rt->rt_gateway = new_gw;
00921
00922
00923
dst_confirm(&rth->u.dst);
00924
00925
if (rt->peer)
00926 atomic_inc(&rt->peer->refcnt);
00927
00928
if (
arp_bind_neighbour(&rt->u.dst) ||
00929 !(rt->u.dst.neighbour->nud_state &
00930
NUD_VALID)) {
00931
if (rt->u.dst.neighbour)
00932
neigh_event_send(rt->u.dst.neighbour, NULL);
00933
ip_rt_put(rth);
00934
rt_drop(rt);
00935
goto do_next;
00936 }
00937
00938
rt_del(hash, rth);
00939
if (!
rt_intern_hash(hash, rt, &rt))
00940
ip_rt_put(rt);
00941
goto do_next;
00942 }
00943 read_unlock(&
rt_hash_table[hash].lock);
00944 do_next:
00945 ;
00946 }
00947 }
00948
in_dev_put(in_dev);
00949
return;
00950
00951 reject_redirect:
00952
#ifdef CONFIG_IP_ROUTE_VERBOSE
00953
if (
IN_DEV_LOG_MARTIANS(in_dev) &&
net_ratelimit())
00954 printk(KERN_INFO
"Redirect from %u.%u.%u.%u on %s about "
00955
"%u.%u.%u.%u ignored.\n"
00956
" Advised path = %u.%u.%u.%u -> %u.%u.%u.%u, "
00957
"tos %02x\n",
00958 NIPQUAD(old_gw), dev->
name, NIPQUAD(new_gw),
00959 NIPQUAD(saddr), NIPQUAD(daddr), tos);
00960
#endif
00961
in_dev_put(in_dev);
00962 }
00963
00964 static struct dst_entry *
ipv4_negative_advice(
struct dst_entry *dst)
00965 {
00966
struct rtable *rt = (
struct rtable*)dst;
00967
struct dst_entry *ret = dst;
00968
00969
if (rt) {
00970
if (dst->obsolete) {
00971
ip_rt_put(rt);
00972 ret = NULL;
00973 }
else if ((rt->rt_flags & RTCF_REDIRECTED) ||
00974 rt->u.dst.expires) {
00975
unsigned hash =
rt_hash_code(rt->key.dst,
00976 rt->key.src ^
00977 (rt->key.oif << 5),
00978 rt->key.tos);
00979
#if RT_CACHE_DEBUG >= 1
00980
printk(KERN_DEBUG
"ip_rt_advice: redirect to "
00981
"%u.%u.%u.%u/%02x dropped\n",
00982 NIPQUAD(rt->rt_dst), rt->key.tos);
00983
#endif
00984
rt_del(hash, rt);
00985 ret = NULL;
00986 }
00987 }
00988
return ret;
00989 }
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007 void ip_rt_send_redirect(
struct sk_buff *skb)
01008 {
01009
struct rtable *rt = (
struct rtable*)skb->
dst;
01010
struct in_device *in_dev =
in_dev_get(rt->u.dst.dev);
01011
01012
if (!in_dev)
01013
return;
01014
01015
if (!
IN_DEV_TX_REDIRECTS(in_dev))
01016
goto out;
01017
01018
01019
01020
01021
if (time_after(jiffies, rt->u.dst.rate_last +
ip_rt_redirect_silence))
01022 rt->u.dst.rate_tokens = 0;
01023
01024
01025
01026
01027
if (rt->u.dst.rate_tokens >=
ip_rt_redirect_number) {
01028 rt->u.dst.rate_last = jiffies;
01029
goto out;
01030 }
01031
01032
01033
01034
01035
if (time_after(jiffies,
01036 (rt->u.dst.rate_last +
01037 (
ip_rt_redirect_load << rt->u.dst.rate_tokens)))) {
01038
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
01039 rt->u.dst.rate_last = jiffies;
01040 ++rt->u.dst.rate_tokens;
01041
#ifdef CONFIG_IP_ROUTE_VERBOSE
01042
if (
IN_DEV_LOG_MARTIANS(in_dev) &&
01043 rt->u.dst.rate_tokens ==
ip_rt_redirect_number &&
01044
net_ratelimit())
01045 printk(KERN_WARNING
"host %u.%u.%u.%u/if%d ignores "
01046
"redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n",
01047 NIPQUAD(rt->rt_src), rt->rt_iif,
01048 NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));
01049
#endif
01050
}
01051 out:
01052
in_dev_put(in_dev);
01053 }
01054
01055 static int ip_error(
struct sk_buff *skb)
01056 {
01057
struct rtable *rt = (
struct rtable*)skb->
dst;
01058
unsigned long now;
01059
int code;
01060
01061
switch (rt->u.dst.error) {
01062
case EINVAL:
01063
default:
01064
goto out;
01065
case EHOSTUNREACH:
01066 code = ICMP_HOST_UNREACH;
01067
break;
01068
case ENETUNREACH:
01069 code = ICMP_NET_UNREACH;
01070
break;
01071
case EACCES:
01072 code = ICMP_PKT_FILTERED;
01073
break;
01074 }
01075
01076 now = jiffies;
01077 rt->u.dst.
rate_tokens += now - rt->u.dst.rate_last;
01078
if (rt->u.dst.rate_tokens >
ip_rt_error_burst)
01079 rt->u.dst.rate_tokens =
ip_rt_error_burst;
01080 rt->u.dst.rate_last = now;
01081
if (rt->u.dst.rate_tokens >=
ip_rt_error_cost) {
01082 rt->u.dst.rate_tokens -=
ip_rt_error_cost;
01083
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
01084 }
01085
01086 out:
kfree_skb(skb);
01087
return 0;
01088 }
01089
01090
01091
01092
01093
01094
01095 static unsigned short mtu_plateau[] =
01096 {32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
01097
01098 static __inline__
unsigned short guess_mtu(
unsigned short old_mtu)
01099 {
01100
int i;
01101
01102
for (i = 0; i <
sizeof(
mtu_plateau) /
sizeof(
mtu_plateau[0]); i++)
01103
if (old_mtu >
mtu_plateau[i])
01104
return mtu_plateau[i];
01105
return 68;
01106 }
01107
01108 unsigned short ip_rt_frag_needed(
struct iphdr *iph,
unsigned short new_mtu)
01109 {
01110
int i;
01111
unsigned short old_mtu = ntohs(iph->
tot_len);
01112
struct rtable *rth;
01113 u32 skeys[2] = { iph->
saddr, 0, };
01114 u32 daddr = iph->
daddr;
01115 u8 tos = iph->
tos &
IPTOS_RT_MASK;
01116
unsigned short est_mtu = 0;
01117
01118
if (
ipv4_config.no_pmtu_disc)
01119
return 0;
01120
01121
for (i = 0; i < 2; i++) {
01122
unsigned hash =
rt_hash_code(daddr, skeys[i], tos);
01123
01124 read_lock(&
rt_hash_table[hash].lock);
01125
for (rth =
rt_hash_table[hash].
chain; rth;
01126 rth = rth->
u.rt_next) {
01127
if (rth->key.dst == daddr &&
01128 rth->key.src == skeys[i] &&
01129 rth->rt_dst == daddr &&
01130 rth->rt_src == iph->
saddr &&
01131 rth->key.tos == tos &&
01132 rth->key.iif == 0 &&
01133 !(rth->u.dst.mxlock & (1 << RTAX_MTU))) {
01134
unsigned short mtu = new_mtu;
01135
01136
if (new_mtu < 68 || new_mtu >= old_mtu) {
01137
01138
01139
if (mtu == 0 &&
01140 old_mtu >= rth->u.dst.pmtu &&
01141 old_mtu >= 68 + (iph->ihl << 2))
01142 old_mtu -= iph->ihl << 2;
01143
01144 mtu =
guess_mtu(old_mtu);
01145 }
01146
if (mtu <= rth->u.dst.pmtu) {
01147
if (mtu < rth->u.dst.pmtu) {
01148
dst_confirm(&rth->u.dst);
01149
if (mtu <
ip_rt_min_pmtu) {
01150 mtu =
ip_rt_min_pmtu;
01151 rth->u.dst.mxlock |=
01152 (1 << RTAX_MTU);
01153 }
01154 rth->u.dst.pmtu = mtu;
01155
dst_set_expires(&rth->u.dst,
01156
ip_rt_mtu_expires);
01157 }
01158 est_mtu = mtu;
01159 }
01160 }
01161 }
01162 read_unlock(&
rt_hash_table[hash].lock);
01163 }
01164
return est_mtu ? : new_mtu;
01165 }
01166
01167 void ip_rt_update_pmtu(
struct dst_entry *dst,
unsigned mtu)
01168 {
01169
if (dst->
pmtu > mtu && mtu >= 68 &&
01170 !(dst->
mxlock & (1 << RTAX_MTU))) {
01171
if (mtu <
ip_rt_min_pmtu) {
01172 mtu =
ip_rt_min_pmtu;
01173 dst->
mxlock |= (1 << RTAX_MTU);
01174 }
01175 dst->
pmtu = mtu;
01176
dst_set_expires(dst,
ip_rt_mtu_expires);
01177 }
01178 }
01179
01180 static struct dst_entry *
ipv4_dst_check(
struct dst_entry *dst, u32 cookie)
01181 {
01182
dst_release(dst);
01183
return NULL;
01184 }
01185
01186 static struct dst_entry *
ipv4_dst_reroute(
struct dst_entry *dst,
01187
struct sk_buff *skb)
01188 {
01189
return NULL;
01190 }
01191
01192 static void ipv4_dst_destroy(
struct dst_entry *dst)
01193 {
01194
struct rtable *rt = (
struct rtable *) dst;
01195
struct inet_peer *peer = rt->peer;
01196
01197
if (peer) {
01198 rt->peer = NULL;
01199
inet_putpeer(peer);
01200 }
01201 }
01202
01203 static void ipv4_link_failure(
struct sk_buff *skb)
01204 {
01205
struct rtable *rt;
01206
01207
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
01208
01209 rt = (
struct rtable *) skb->
dst;
01210
if (rt)
01211
dst_set_expires(&rt->u.dst, 0);
01212 }
01213
01214 static int ip_rt_bug(
struct sk_buff *skb)
01215 {
01216 printk(KERN_DEBUG
"ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n",
01217 NIPQUAD(skb->
nh.iph->saddr), NIPQUAD(skb->
nh.iph->daddr),
01218 skb->
dev ? skb->
dev->
name :
"?");
01219
kfree_skb(skb);
01220
return 0;
01221 }
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232 void ip_rt_get_source(u8 *addr,
struct rtable *rt)
01233 {
01234 u32 src;
01235
struct fib_result res;
01236
01237
if (rt->
key.
iif == 0)
01238 src = rt->
rt_src;
01239
else if (
fib_lookup(&rt->
key, &res) == 0) {
01240
#ifdef CONFIG_IP_ROUTE_NAT
01241
if (res.
type == RTN_NAT)
01242 src =
inet_select_addr(rt->
u.dst.dev, rt->
rt_gateway,
01243 RT_SCOPE_UNIVERSE);
01244
else
01245
#endif
01246
src =
FIB_RES_PREFSRC(res);
01247
fib_res_put(&res);
01248 }
else
01249 src =
inet_select_addr(rt->
u.dst.dev, rt->
rt_gateway,
01250 RT_SCOPE_UNIVERSE);
01251 memcpy(addr, &src, 4);
01252 }
01253
01254
#ifdef CONFIG_NET_CLS_ROUTE
01255
static void set_class_tag(
struct rtable *rt, u32 tag)
01256 {
01257
if (!(rt->
u.dst.tclassid & 0xFFFF))
01258 rt->
u.dst.tclassid |= tag & 0xFFFF;
01259
if (!(rt->
u.dst.tclassid & 0xFFFF0000))
01260 rt->
u.dst.tclassid |= tag & 0xFFFF0000;
01261 }
01262
#endif
01263
01264 static void rt_set_nexthop(
struct rtable *rt,
struct fib_result *res, u32 itag)
01265 {
01266
struct fib_info *fi = res->
fi;
01267
01268
if (fi) {
01269
if (
FIB_RES_GW(*res) &&
01270
FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
01271 rt->
rt_gateway =
FIB_RES_GW(*res);
01272 memcpy(&rt->
u.dst.mxlock, fi->fib_metrics,
01273
sizeof(fi->fib_metrics));
01274
if (fi->fib_mtu == 0) {
01275 rt->
u.dst.pmtu = rt->
u.dst.dev->mtu;
01276
if (rt->
u.dst.mxlock & (1 << RTAX_MTU) &&
01277 rt->
rt_gateway != rt->
rt_dst &&
01278 rt->
u.dst.pmtu > 576)
01279 rt->
u.dst.pmtu = 576;
01280 }
01281
#ifdef CONFIG_NET_CLS_ROUTE
01282
rt->
u.dst.tclassid =
FIB_RES_NH(*res).nh_tclassid;
01283 #endif
01284 }
else
01285 rt->
u.dst.pmtu = rt->
u.dst.dev->mtu;
01286
01287
if (rt->
u.dst.pmtu >
IP_MAX_MTU)
01288 rt->
u.dst.pmtu =
IP_MAX_MTU;
01289
if (rt->
u.dst.advmss == 0)
01290 rt->
u.dst.advmss = max_t(
unsigned int, rt->
u.dst.dev->mtu - 40,
01291
ip_rt_min_advmss);
01292
if (rt->
u.dst.advmss > 65535 - 40)
01293 rt->
u.dst.advmss = 65535 - 40;
01294
01295
#ifdef CONFIG_NET_CLS_ROUTE
01296
#ifdef CONFIG_IP_MULTIPLE_TABLES
01297
set_class_tag(rt, fib_rules_tclass(res));
01298
#endif
01299
set_class_tag(rt, itag);
01300
#endif
01301
rt->
rt_type = res->
type;
01302 }
01303
01304 static int ip_route_input_mc(
struct sk_buff *skb, u32 daddr, u32 saddr,
01305 u8 tos,
struct net_device *dev,
int our)
01306 {
01307
unsigned hash;
01308
struct rtable *rth;
01309 u32 spec_dst;
01310
struct in_device *in_dev =
in_dev_get(dev);
01311 u32 itag = 0;
01312
01313
01314
01315
if (in_dev == NULL)
01316
return -EINVAL;
01317
01318
if (
MULTICAST(saddr) ||
BADCLASS(saddr) ||
LOOPBACK(saddr) ||
01319 skb->
protocol != htons(ETH_P_IP))
01320
goto e_inval;
01321
01322
if (
ZERONET(saddr)) {
01323
if (!
LOCAL_MCAST(daddr))
01324
goto e_inval;
01325 spec_dst =
inet_select_addr(dev, 0, RT_SCOPE_LINK);
01326 }
else if (
fib_validate_source(saddr, 0, tos, 0,
01327 dev, &spec_dst, &itag) < 0)
01328
goto e_inval;
01329
01330 rth =
dst_alloc(&
ipv4_dst_ops);
01331
if (!rth)
01332
goto e_nobufs;
01333
01334 rth->u.dst.output=
ip_rt_bug;
01335
01336 atomic_set(&rth->u.dst.__refcnt, 1);
01337 rth->u.dst.flags=
DST_HOST;
01338 rth->key.dst = daddr;
01339 rth->rt_dst = daddr;
01340 rth->key.tos = tos;
01341
#ifdef CONFIG_IP_ROUTE_FWMARK
01342
rth->key.fwmark = skb->nfmark;
01343
#endif
01344
rth->key.src = saddr;
01345 rth->rt_src = saddr;
01346
#ifdef CONFIG_IP_ROUTE_NAT
01347
rth->rt_dst_map = daddr;
01348 rth->rt_src_map = saddr;
01349
#endif
01350
#ifdef CONFIG_NET_CLS_ROUTE
01351
rth->u.
dst.tclassid = itag;
01352
#endif
01353
rth->rt_iif =
01354 rth->key.iif = dev->
ifindex;
01355 rth->u.dst.dev = &
loopback_dev;
01356
dev_hold(rth->u.dst.dev);
01357 rth->key.oif = 0;
01358 rth->rt_gateway = daddr;
01359 rth->rt_spec_dst= spec_dst;
01360 rth->rt_type = RTN_MULTICAST;
01361 rth->rt_flags = RTCF_MULTICAST;
01362
if (our) {
01363 rth->u.dst.input=
ip_local_deliver;
01364 rth->rt_flags |= RTCF_LOCAL;
01365 }
01366
01367
#ifdef CONFIG_IP_MROUTE
01368
if (!
LOCAL_MCAST(daddr) &&
IN_DEV_MFORWARD(in_dev))
01369 rth->u.dst.input =
ip_mr_input;
01370
#endif
01371
rt_cache_stat[smp_processor_id()].
in_slow_mc++;
01372
01373
in_dev_put(in_dev);
01374 hash =
rt_hash_code(daddr, saddr ^ (dev->
ifindex << 5), tos);
01375
return rt_intern_hash(hash, rth, (
struct rtable**) &skb->
dst);
01376
01377 e_nobufs:
01378
in_dev_put(in_dev);
01379
return -ENOBUFS;
01380
01381 e_inval:
01382
in_dev_put(in_dev);
01383
return -EINVAL;
01384 }
01385
01386
01387
01388
01389
01390
01391
01392
01393
01394
01395
01396 int ip_route_input_slow(
struct sk_buff *skb, u32 daddr, u32 saddr,
01397 u8 tos,
struct net_device *dev)
01398 {
01399
struct rt_key key;
01400
struct fib_result res;
01401
struct in_device *in_dev =
in_dev_get(dev);
01402
struct in_device *out_dev = NULL;
01403
unsigned flags = 0;
01404 u32 itag = 0;
01405
struct rtable * rth;
01406
unsigned hash;
01407 u32 spec_dst;
01408
int err = -EINVAL;
01409
int free_res = 0;
01410
01411
01412
01413
if (!in_dev)
01414
goto out;
01415
01416 key.
dst = daddr;
01417 key.
src = saddr;
01418 key.
tos = tos;
01419
#ifdef CONFIG_IP_ROUTE_FWMARK
01420
key.fwmark = skb->nfmark;
01421
#endif
01422
key.
iif = dev->
ifindex;
01423 key.
oif = 0;
01424 key.
scope = RT_SCOPE_UNIVERSE;
01425
01426 hash =
rt_hash_code(daddr, saddr ^ (key.
iif << 5), tos);
01427
01428
01429
01430
01431
01432
if (
MULTICAST(saddr) ||
BADCLASS(saddr) ||
LOOPBACK(saddr))
01433
goto martian_source;
01434
01435
if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0))
01436
goto brd_input;
01437
01438
01439
01440
01441
if (
ZERONET(saddr))
01442
goto martian_source;
01443
01444
if (
BADCLASS(daddr) ||
ZERONET(daddr) ||
LOOPBACK(daddr))
01445
goto martian_destination;
01446
01447
01448
01449
01450
if ((err =
fib_lookup(&key, &res)) != 0) {
01451
if (!
IN_DEV_FORWARD(in_dev))
01452
goto e_inval;
01453
goto no_route;
01454 }
01455 free_res = 1;
01456
01457
rt_cache_stat[smp_processor_id()].
in_slow_tot++;
01458
01459
#ifdef CONFIG_IP_ROUTE_NAT
01460
01461
01462
01463
01464
if (1) {
01465 u32 src_map = saddr;
01466
if (res.r)
01467 src_map =
fib_rules_policy(saddr, &res, &flags);
01468
01469
if (res.
type == RTN_NAT) {
01470 key.
dst =
fib_rules_map_destination(daddr, &res);
01471
fib_res_put(&res);
01472 free_res = 0;
01473
if (
fib_lookup(&key, &res))
01474
goto e_inval;
01475 free_res = 1;
01476
if (res.
type != RTN_UNICAST)
01477
goto e_inval;
01478 flags |= RTCF_DNAT;
01479 }
01480 key.
src = src_map;
01481 }
01482
#endif
01483
01484
if (res.
type == RTN_BROADCAST)
01485
goto brd_input;
01486
01487
if (res.
type == RTN_LOCAL) {
01488
int result;
01489 result =
fib_validate_source(saddr, daddr, tos,
01490
loopback_dev.
ifindex,
01491 dev, &spec_dst, &itag);
01492
if (result < 0)
01493
goto martian_source;
01494
if (result)
01495 flags |= RTCF_DIRECTSRC;
01496 spec_dst = daddr;
01497
goto local_input;
01498 }
01499
01500
if (!
IN_DEV_FORWARD(in_dev))
01501
goto e_inval;
01502
if (res.
type != RTN_UNICAST)
01503
goto martian_destination;
01504
01505
#ifdef CONFIG_IP_ROUTE_MULTIPATH
01506
if (res.
fi->
fib_nhs > 1 && key.
oif == 0)
01507
fib_select_multipath(&key, &res);
01508
#endif
01509
out_dev =
in_dev_get(
FIB_RES_DEV(res));
01510
if (out_dev == NULL) {
01511
if (
net_ratelimit())
01512 printk(KERN_CRIT
"Bug in ip_route_input_slow(). "
01513
"Please, report\n");
01514
goto e_inval;
01515 }
01516
01517 err =
fib_validate_source(saddr, daddr, tos,
FIB_RES_OIF(res), dev,
01518 &spec_dst, &itag);
01519
if (err < 0)
01520
goto martian_source;
01521
01522
if (err)
01523 flags |= RTCF_DIRECTSRC;
01524
01525
if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) &&
01526 (
IN_DEV_SHARED_MEDIA(out_dev) ||
01527
inet_addr_onlink(out_dev, saddr,
FIB_RES_GW(res))))
01528 flags |= RTCF_DOREDIRECT;
01529
01530
if (skb->
protocol != htons(ETH_P_IP)) {
01531
01532
01533
01534
if (out_dev == in_dev && !(flags & RTCF_DNAT))
01535
goto e_inval;
01536 }
01537
01538 rth =
dst_alloc(&
ipv4_dst_ops);
01539
if (!rth)
01540
goto e_nobufs;
01541
01542 atomic_set(&rth->u.dst.__refcnt, 1);
01543 rth->u.dst.flags=
DST_HOST;
01544 rth->key.dst = daddr;
01545 rth->rt_dst = daddr;
01546 rth->key.tos = tos;
01547
#ifdef CONFIG_IP_ROUTE_FWMARK
01548
rth->key.fwmark = skb->nfmark;
01549
#endif
01550
rth->key.src = saddr;
01551 rth->rt_src = saddr;
01552 rth->rt_gateway = daddr;
01553
#ifdef CONFIG_IP_ROUTE_NAT
01554
rth->rt_src_map = key.
src;
01555 rth->rt_dst_map = key.
dst;
01556
if (flags&RTCF_DNAT)
01557 rth->rt_gateway = key.
dst;
01558
#endif
01559
rth->rt_iif =
01560 rth->key.iif = dev->
ifindex;
01561 rth->u.dst.dev = out_dev->dev;
01562
dev_hold(rth->u.dst.dev);
01563 rth->key.oif = 0;
01564 rth->rt_spec_dst= spec_dst;
01565
01566 rth->u.dst.input =
ip_forward;
01567 rth->u.dst.output =
ip_output;
01568
01569
rt_set_nexthop(rth, &res, itag);
01570
01571 rth->rt_flags = flags;
01572
01573
#ifdef CONFIG_NET_FASTROUTE
01574
if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT))) {
01575
struct net_device *odev = rth->u.dst.dev;
01576
if (odev != dev &&
01577 dev->
accept_fastpath &&
01578 odev->mtu >= dev->
mtu &&
01579 dev->
accept_fastpath(dev, &rth->u.dst) == 0)
01580 rth->rt_flags |= RTCF_FAST;
01581 }
01582
#endif
01583
01584 intern:
01585 err =
rt_intern_hash(hash, rth, (
struct rtable**)&skb->
dst);
01586 done:
01587
in_dev_put(in_dev);
01588
if (out_dev)
01589
in_dev_put(out_dev);
01590
if (free_res)
01591
fib_res_put(&res);
01592 out:
return err;
01593
01594 brd_input:
01595
if (skb->
protocol != htons(ETH_P_IP))
01596
goto e_inval;
01597
01598
if (
ZERONET(saddr))
01599 spec_dst =
inet_select_addr(dev, 0, RT_SCOPE_LINK);
01600
else {
01601 err =
fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
01602 &itag);
01603
if (err < 0)
01604
goto martian_source;
01605
if (err)
01606 flags |= RTCF_DIRECTSRC;
01607 }
01608 flags |= RTCF_BROADCAST;
01609 res.
type = RTN_BROADCAST;
01610
rt_cache_stat[smp_processor_id()].
in_brd++;
01611
01612 local_input:
01613 rth =
dst_alloc(&
ipv4_dst_ops);
01614
if (!rth)
01615
goto e_nobufs;
01616
01617 rth->u.dst.output=
ip_rt_bug;
01618
01619 atomic_set(&rth->u.dst.__refcnt, 1);
01620 rth->u.dst.flags=
DST_HOST;
01621 rth->key.dst = daddr;
01622 rth->rt_dst = daddr;
01623 rth->key.tos = tos;
01624
#ifdef CONFIG_IP_ROUTE_FWMARK
01625
rth->key.fwmark = skb->nfmark;
01626
#endif
01627
rth->key.src = saddr;
01628 rth->rt_src = saddr;
01629
#ifdef CONFIG_IP_ROUTE_NAT
01630
rth->rt_dst_map = key.
dst;
01631 rth->rt_src_map = key.
src;
01632
#endif
01633
#ifdef CONFIG_NET_CLS_ROUTE
01634
rth->u.dst.tclassid = itag;
01635
#endif
01636
rth->rt_iif =
01637 rth->key.iif = dev->
ifindex;
01638 rth->u.dst.dev = &
loopback_dev;
01639
dev_hold(rth->u.dst.dev);
01640 rth->key.oif = 0;
01641 rth->rt_gateway = daddr;
01642 rth->rt_spec_dst= spec_dst;
01643 rth->u.dst.input=
ip_local_deliver;
01644 rth->rt_flags = flags|RTCF_LOCAL;
01645
if (res.
type == RTN_UNREACHABLE) {
01646 rth->u.dst.input=
ip_error;
01647 rth->u.dst.error= -err;
01648 rth->rt_flags &= ~RTCF_LOCAL;
01649 }
01650 rth->rt_type = res.
type;
01651
goto intern;
01652
01653 no_route:
01654
rt_cache_stat[smp_processor_id()].
in_no_route++;
01655 spec_dst =
inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
01656 res.
type = RTN_UNREACHABLE;
01657
goto local_input;
01658
01659
01660
01661
01662 martian_destination:
01663
rt_cache_stat[smp_processor_id()].
in_martian_dst++;
01664
#ifdef CONFIG_IP_ROUTE_VERBOSE
01665
if (
IN_DEV_LOG_MARTIANS(in_dev) &&
net_ratelimit())
01666 printk(KERN_WARNING
"martian destination %u.%u.%u.%u from "
01667
"%u.%u.%u.%u, dev %s\n",
01668 NIPQUAD(daddr), NIPQUAD(saddr), dev->
name);
01669
#endif
01670
e_inval:
01671 err = -EINVAL;
01672
goto done;
01673
01674 e_nobufs:
01675 err = -ENOBUFS;
01676
goto done;
01677
01678 martian_source:
01679
01680
rt_cache_stat[smp_processor_id()].
in_martian_src++;
01681
#ifdef CONFIG_IP_ROUTE_VERBOSE
01682
if (
IN_DEV_LOG_MARTIANS(in_dev) &&
net_ratelimit()) {
01683
01684
01685
01686
01687 printk(KERN_WARNING
"martian source %u.%u.%u.%u from "
01688
"%u.%u.%u.%u, on dev %s\n",
01689 NIPQUAD(daddr), NIPQUAD(saddr), dev->
name);
01690
if (dev->
hard_header_len) {
01691
int i;
01692
unsigned char *p = skb->
mac.raw;
01693 printk(KERN_WARNING
"ll header: ");
01694
for (i = 0; i < dev->
hard_header_len; i++, p++) {
01695 printk(
"%02x", *p);
01696
if (i < (dev->
hard_header_len - 1))
01697 printk(
":");
01698 }
01699 printk(
"\n");
01700 }
01701 }
01702
#endif
01703
goto e_inval;
01704 }
01705
01712 int ip_route_input(
struct sk_buff *skb, u32 daddr, u32 saddr,
01713 u8 tos,
struct net_device *dev)
01714 {
01715
struct rtable * rth;
01716
unsigned hash;
01717
int iif = dev->
ifindex;
01718
01719 tos &=
IPTOS_RT_MASK;
01720 hash =
rt_hash_code(daddr, saddr ^ (iif << 5), tos);
01721
01722 read_lock(&
rt_hash_table[hash].lock);
01723
for (rth =
rt_hash_table[hash].
chain; rth; rth = rth->
u.rt_next) {
01724
if (rth->key.dst == daddr &&
01725 rth->key.src == saddr &&
01726 rth->key.iif == iif &&
01727 rth->key.oif == 0 &&
01728
#ifdef CONFIG_IP_ROUTE_FWMARK
01729
rth->key.fwmark == skb->nfmark &&
01730
#endif
01731
rth->key.tos == tos) {
01732 rth->u.dst.lastuse = jiffies;
01733
dst_hold(&rth->u.dst);
01734 rth->u.dst.__use++;
01735
rt_cache_stat[smp_processor_id()].
in_hit++;
01736 read_unlock(&
rt_hash_table[hash].lock);
01737 skb->
dst = (
struct dst_entry*)rth;
01738
return 0;
01739 }
01740
rt_cache_stat[smp_processor_id()].
in_hlist_search++;
01741 }
01742 read_unlock(&
rt_hash_table[hash].lock);
01743
01744
01745
01746
01747
01748
01749
01750
01751
01752
01753
01754
01755
if (
MULTICAST(daddr)) {
01756
struct in_device *in_dev;
01757
01758 read_lock(&
inetdev_lock);
01759
if ((in_dev =
__in_dev_get(dev)) != NULL) {
01760
int our =
ip_check_mc(in_dev, daddr, saddr);
01761
if (our
01762
#ifdef CONFIG_IP_MROUTE
01763
|| (!
LOCAL_MCAST(daddr) &&
IN_DEV_MFORWARD(in_dev))
01764
#endif
01765
) {
01766 read_unlock(&
inetdev_lock);
01767
return ip_route_input_mc(skb, daddr, saddr,
01768 tos, dev, our);
01769 }
01770 }
01771 read_unlock(&
inetdev_lock);
01772
return -EINVAL;
01773 }
01774
return ip_route_input_slow(skb, daddr, saddr, tos, dev);
01775 }
01776
01777
01778
01779
01780
01781 int ip_route_output_slow(
struct rtable **rp,
const struct rt_key *oldkey)
01782 {
01783
struct rt_key key;
01784
struct fib_result res;
01785
unsigned flags = 0;
01786
struct rtable *rth;
01787
struct net_device *dev_out = NULL;
01788
unsigned hash;
01789
int free_res = 0;
01790
int err;
01791 u32 tos;
01792
01793 tos = oldkey->
tos & (
IPTOS_RT_MASK |
RTO_ONLINK);
01794 key.
dst = oldkey->
dst;
01795 key.
src = oldkey->
src;
01796 key.
tos = tos &
IPTOS_RT_MASK;
01797 key.
iif =
loopback_dev.
ifindex;
01798 key.
oif = oldkey->
oif;
01799
#ifdef CONFIG_IP_ROUTE_FWMARK
01800
key.fwmark = oldkey->fwmark;
01801
#endif
01802
key.
scope = (tos &
RTO_ONLINK) ? RT_SCOPE_LINK :
01803 RT_SCOPE_UNIVERSE;
01804 res.
fi = NULL;
01805
#ifdef CONFIG_IP_MULTIPLE_TABLES
01806
res.r = NULL;
01807
#endif
01808
01809
if (oldkey->
src) {
01810 err = -EINVAL;
01811
if (
MULTICAST(oldkey->
src) ||
01812
BADCLASS(oldkey->
src) ||
01813
ZERONET(oldkey->
src))
01814
goto out;
01815
01816
01817 dev_out =
ip_dev_find(oldkey->
src);
01818
if (dev_out == NULL)
01819
goto out;
01820
01821
01822
01823
01824
01825
01826
01827
01828
01829
if (oldkey->
oif == 0
01830 && (
MULTICAST(oldkey->
dst) || oldkey->
dst == 0xFFFFFFFF)) {
01831
01832
01833
01834
01835
01836
01837
01838
01839
01840
01841
01842
01843
01844
01845
01846 key.
oif = dev_out->ifindex;
01847
goto make_route;
01848 }
01849
if (dev_out)
01850
dev_put(dev_out);
01851 dev_out = NULL;
01852 }
01853
if (oldkey->
oif) {
01854 dev_out =
dev_get_by_index(oldkey->
oif);
01855 err = -ENODEV;
01856
if (dev_out == NULL)
01857
goto out;
01858
if (
__in_dev_get(dev_out) == NULL) {
01859
dev_put(dev_out);
01860
goto out;
01861 }
01862 err = -ENETDOWN;
01863
if (!(dev_out->flags&IFF_UP)) {
01864
dev_put(dev_out);
01865
goto out;
01866 }
01867
01868
if (
LOCAL_MCAST(oldkey->
dst) || oldkey->
dst == 0xFFFFFFFF) {
01869
if (!key.
src)
01870 key.
src =
inet_select_addr(dev_out, 0,
01871 RT_SCOPE_LINK);
01872
goto make_route;
01873 }
01874
if (!key.
src) {
01875
if (
MULTICAST(oldkey->
dst))
01876 key.
src =
inet_select_addr(dev_out, 0,
01877 key.
scope);
01878
else if (!oldkey->
dst)
01879 key.
src =
inet_select_addr(dev_out, 0,
01880 RT_SCOPE_HOST);
01881 }
01882 }
01883
01884
if (!key.
dst) {
01885 key.
dst = key.
src;
01886
if (!key.
dst)
01887 key.
dst = key.
src = htonl(
INADDR_LOOPBACK);
01888
if (dev_out)
01889
dev_put(dev_out);
01890 dev_out = &
loopback_dev;
01891
dev_hold(dev_out);
01892 key.
oif =
loopback_dev.
ifindex;
01893 res.
type = RTN_LOCAL;
01894 flags |= RTCF_LOCAL;
01895
goto make_route;
01896 }
01897
01898
if (
fib_lookup(&key, &res)) {
01899 res.
fi = NULL;
01900
if (oldkey->
oif) {
01901
01902
01903
01904
01905
01906
01907
01908
01909
01910
01911
01912
01913
01914
01915
01916
01917
01918
01919
if (key.
src == 0)
01920 key.
src =
inet_select_addr(dev_out, 0,
01921 RT_SCOPE_LINK);
01922 res.
type = RTN_UNICAST;
01923
goto make_route;
01924 }
01925
if (dev_out)
01926
dev_put(dev_out);
01927 err = -ENETUNREACH;
01928
goto out;
01929 }
01930 free_res = 1;
01931
01932
if (res.
type == RTN_NAT)
01933
goto e_inval;
01934
01935
if (res.
type == RTN_LOCAL) {
01936
struct in_device *in_dev;
01937 u32 src;
01938
01939
if (dev_out)
01940
dev_put(dev_out);
01941 dev_out =
FIB_RES_DEV(res);
01942 in_dev =
in_dev_get(dev_out);
01943 src = key.
src? :
FIB_RES_PREFSRC(res);
01944
if (in_dev &&
IN_DEV_LOOP(in_dev) && src) {
01945
struct net_device *dev_src;
01946
01947
in_dev_put(in_dev);
01948 in_dev = NULL;
01949 dev_src =
ip_dev_find(src);
01950
if (dev_src && dev_src != dev_out &&
01951 (in_dev =
in_dev_get(dev_src)) &&
01952
IN_DEV_LOOP(in_dev)) {
01953
in_dev_put(in_dev);
01954 dev_out = dev_src;
01955 key.
src = src;
01956 key.
oif = dev_out->ifindex;
01957 res.
type = RTN_UNICAST;
01958
if (res.
fi) {
01959
fib_info_put(res.
fi);
01960 res.
fi = NULL;
01961 }
01962
goto make_route;
01963 }
01964
if (dev_src)
01965
dev_put(dev_src);
01966 }
01967
if (in_dev)
01968
in_dev_put(in_dev);
01969
if (!key.
src)
01970 key.
src = key.
dst;
01971 dev_out = &
loopback_dev;
01972
dev_hold(dev_out);
01973 key.
oif = dev_out->ifindex;
01974
if (res.
fi)
01975
fib_info_put(res.
fi);
01976 res.
fi = NULL;
01977 flags |= RTCF_LOCAL;
01978
goto make_route;
01979 }
01980
01981
#ifdef CONFIG_IP_ROUTE_MULTIPATH
01982
if (res.
fi->
fib_nhs > 1 && key.
oif == 0)
01983
fib_select_multipath(&key, &res);
01984
else
01985
#endif
01986
if (!res.
prefixlen && res.
type == RTN_UNICAST && !key.
oif)
01987
fib_select_default(&key, &res);
01988
01989
if (!key.
src)
01990 key.
src =
FIB_RES_PREFSRC(res);
01991
01992
if (dev_out)
01993
dev_put(dev_out);
01994 dev_out =
FIB_RES_DEV(res);
01995
dev_hold(dev_out);
01996 key.
oif = dev_out->ifindex;
01997
01998 make_route:
01999
if (
LOOPBACK(key.
src) && !(dev_out->flags&IFF_LOOPBACK))
02000
goto e_inval;
02001
02002
if (key.
dst == 0xFFFFFFFF)
02003 res.
type = RTN_BROADCAST;
02004
else if (
MULTICAST(key.
dst))
02005 res.
type = RTN_MULTICAST;
02006
else if (
BADCLASS(key.
dst) ||
ZERONET(key.
dst))
02007
goto e_inval;
02008
02009
if (dev_out->flags & IFF_LOOPBACK)
02010 flags |= RTCF_LOCAL;
02011
02012
if (res.
type == RTN_BROADCAST) {
02013 flags |= RTCF_BROADCAST | RTCF_LOCAL;
02014
if (res.
fi) {
02015
fib_info_put(res.
fi);
02016 res.
fi = NULL;
02017 }
02018 }
else if (res.
type == RTN_MULTICAST) {
02019 flags |= RTCF_MULTICAST|RTCF_LOCAL;
02020 read_lock(&
inetdev_lock);
02021
if (!
__in_dev_get(dev_out) ||
02022 !
ip_check_mc(
__in_dev_get(dev_out),oldkey->
dst,oldkey->
src))
02023 flags &= ~RTCF_LOCAL;
02024 read_unlock(&
inetdev_lock);
02025
02026
02027
02028
02029
if (res.
fi && res.
prefixlen < 4) {
02030
fib_info_put(res.
fi);
02031 res.
fi = NULL;
02032 }
02033 }
02034
02035 rth =
dst_alloc(&
ipv4_dst_ops);
02036
if (!rth)
02037
goto e_nobufs;
02038
02039 atomic_set(&rth->u.dst.__refcnt, 1);
02040 rth->u.dst.flags=
DST_HOST;
02041 rth->key.dst = oldkey->
dst;
02042 rth->key.tos = tos;
02043 rth->key.src = oldkey->
src;
02044 rth->key.iif = 0;
02045 rth->key.oif = oldkey->
oif;
02046
#ifdef CONFIG_IP_ROUTE_FWMARK
02047
rth->key.fwmark = oldkey->fwmark;
02048
#endif
02049
rth->rt_dst = key.
dst;
02050 rth->rt_src = key.
src;
02051
#ifdef CONFIG_IP_ROUTE_NAT
02052
rth->rt_dst_map = key.
dst;
02053 rth->rt_src_map = key.
src;
02054
#endif
02055
rth->rt_iif = oldkey->
oif ? : dev_out->ifindex;
02056 rth->u.dst.dev = dev_out;
02057
dev_hold(dev_out);
02058 rth->rt_gateway = key.
dst;
02059 rth->rt_spec_dst= key.
src;
02060
02061 rth->u.dst.output=
ip_output;
02062
02063
rt_cache_stat[smp_processor_id()].
out_slow_tot++;
02064
02065
if (flags & RTCF_LOCAL) {
02066 rth->u.dst.input =
ip_local_deliver;
02067 rth->rt_spec_dst = key.
dst;
02068 }
02069
if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
02070 rth->rt_spec_dst = key.
src;
02071
if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) {
02072 rth->u.dst.output =
ip_mc_output;
02073
rt_cache_stat[smp_processor_id()].
out_slow_mc++;
02074 }
02075
#ifdef CONFIG_IP_MROUTE
02076
if (res.
type == RTN_MULTICAST) {
02077
struct in_device *in_dev =
in_dev_get(dev_out);
02078
if (in_dev) {
02079
if (
IN_DEV_MFORWARD(in_dev) &&
02080 !
LOCAL_MCAST(oldkey->
dst)) {
02081 rth->u.dst.input =
ip_mr_input;
02082 rth->u.dst.output =
ip_mc_output;
02083 }
02084
in_dev_put(in_dev);
02085 }
02086 }
02087
#endif
02088
}
02089
02090
rt_set_nexthop(rth, &res, 0);
02091
02092 rth->rt_flags = flags;
02093
02094 hash =
rt_hash_code(oldkey->
dst, oldkey->
src ^ (oldkey->
oif << 5), tos);
02095 err =
rt_intern_hash(hash, rth, rp);
02096 done:
02097
if (free_res)
02098
fib_res_put(&res);
02099
if (dev_out)
02100
dev_put(dev_out);
02101 out:
return err;
02102
02103 e_inval:
02104 err = -EINVAL;
02105
goto done;
02106 e_nobufs:
02107 err = -ENOBUFS;
02108
goto done;
02109 }
02110
02111 int ip_route_output_key(
struct rtable **rp,
const struct rt_key *key)
02112 {
02113
unsigned hash;
02114
struct rtable *rth;
02115
02116 hash =
rt_hash_code(key->
dst, key->
src ^ (key->
oif << 5), key->
tos);
02117
02118 read_lock_bh(&
rt_hash_table[hash].lock);
02119
for (rth =
rt_hash_table[hash].
chain; rth; rth = rth->
u.rt_next) {
02120
if (rth->key.dst == key->
dst &&
02121 rth->key.src == key->
src &&
02122 rth->key.iif == 0 &&
02123 rth->key.oif == key->
oif &&
02124
#ifdef CONFIG_IP_ROUTE_FWMARK
02125
rth->key.fwmark == key->fwmark &&
02126
#endif
02127
!((rth->key.tos ^ key->
tos) &
02128 (
IPTOS_RT_MASK |
RTO_ONLINK))) {
02129 rth->u.dst.lastuse = jiffies;
02130
dst_hold(&rth->u.dst);
02131 rth->u.dst.__use++;
02132
rt_cache_stat[smp_processor_id()].
out_hit++;
02133 read_unlock_bh(&
rt_hash_table[hash].lock);
02134 *rp = rth;
02135
return 0;
02136 }
02137
rt_cache_stat[smp_processor_id()].
out_hlist_search++;
02138 }
02139 read_unlock_bh(&
rt_hash_table[hash].lock);
02140
02141
return ip_route_output_slow(rp, key);
02142 }
02143
02144 static int rt_fill_info(
struct sk_buff *skb, u32 pid, u32 seq,
int event,
02145
int nowait)
02146 {
02147
struct rtable *rt = (
struct rtable*)skb->
dst;
02148
struct rtmsg *r;
02149
struct nlmsghdr *nlh;
02150
unsigned char *b = skb->
tail;
02151
struct rta_cacheinfo ci;
02152
#ifdef CONFIG_IP_MROUTE
02153
struct rtattr *eptr;
02154
#endif
02155
nlh =
NLMSG_PUT(skb, pid, seq, event,
sizeof(*r));
02156 r =
NLMSG_DATA(nlh);
02157 nlh->nlmsg_flags = (nowait && pid) ?
NLM_F_MULTI : 0;
02158 r->rtm_family = AF_INET;
02159 r->rtm_dst_len = 32;
02160 r->rtm_src_len = 0;
02161 r->rtm_tos = rt->key.tos;
02162 r->rtm_table = RT_TABLE_MAIN;
02163 r->rtm_type = rt->rt_type;
02164 r->rtm_scope = RT_SCOPE_UNIVERSE;
02165 r->rtm_protocol = RTPROT_UNSPEC;
02166 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
02167
if (rt->rt_flags & RTCF_NOTIFY)
02168 r->rtm_flags |= RTM_F_NOTIFY;
02169 RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
02170
if (rt->key.src) {
02171 r->rtm_src_len = 32;
02172 RTA_PUT(skb, RTA_SRC, 4, &rt->key.src);
02173 }
02174
if (rt->u.dst.dev)
02175 RTA_PUT(skb, RTA_OIF,
sizeof(
int), &rt->u.
dst.
dev->
ifindex);
02176
#ifdef CONFIG_NET_CLS_ROUTE
02177
if (rt->u.dst.tclassid)
02178 RTA_PUT(skb, RTA_FLOW, 4, &rt->u.
dst.tclassid);
02179
#endif
02180
if (rt->key.iif)
02181 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
02182
else if (rt->rt_src != rt->key.src)
02183 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
02184
if (rt->rt_dst != rt->rt_gateway)
02185 RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
02186
if (
rtnetlink_put_metrics(skb, &rt->u.
dst.
mxlock) < 0)
02187
goto rtattr_failure;
02188 ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
02189 ci.rta_used = rt->u.dst.__use;
02190 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
02191
if (rt->u.dst.expires)
02192 ci.rta_expires = rt->u.dst.expires - jiffies;
02193
else
02194 ci.rta_expires = 0;
02195 ci.rta_error = rt->u.dst.error;
02196 ci.rta_id = ci.rta_ts = ci.rta_tsage = 0;
02197
if (rt->peer) {
02198 ci.rta_id = rt->peer->ip_id_count;
02199
if (rt->peer->tcp_ts_stamp) {
02200 ci.rta_ts = rt->peer->tcp_ts;
02201 ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
02202 }
02203 }
02204
#ifdef CONFIG_IP_MROUTE
02205
eptr = (
struct rtattr*)skb->
tail;
02206
#endif
02207
RTA_PUT(skb, RTA_CACHEINFO,
sizeof(ci), &ci);
02208
if (rt->key.iif) {
02209
#ifdef CONFIG_IP_MROUTE
02210
u32 dst = rt->rt_dst;
02211
02212
if (
MULTICAST(dst) && !
LOCAL_MCAST(dst) &&
02213
ipv4_devconf.mc_forwarding) {
02214
int err =
ipmr_get_route(skb, r, nowait);
02215
if (err <= 0) {
02216
if (!nowait) {
02217
if (err == 0)
02218
return 0;
02219
goto nlmsg_failure;
02220 }
else {
02221
if (err == -EMSGSIZE)
02222
goto nlmsg_failure;
02223 ((
struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err;
02224 }
02225 }
02226 }
else
02227
#endif
02228
RTA_PUT(skb, RTA_IIF,
sizeof(
int), &rt->key.iif);
02229 }
02230
02231 nlh->nlmsg_len = skb->
tail - b;
02232
return skb->
len;
02233
02234 nlmsg_failure:
02235 rtattr_failure:
02236
skb_trim(skb, b - skb->
data);
02237
return -1;
02238 }
02239
02240 int inet_rtm_getroute(
struct sk_buff *in_skb,
struct nlmsghdr* nlh,
void *arg)
02241 {
02242
struct rtattr **rta = arg;
02243
struct rtmsg *rtm =
NLMSG_DATA(nlh);
02244
struct rtable *rt = NULL;
02245 u32 dst = 0;
02246 u32 src = 0;
02247
int iif = 0;
02248
int err = -ENOBUFS;
02249
struct sk_buff *skb;
02250
02251 skb =
alloc_skb(
NLMSG_GOODSIZE, GFP_KERNEL);
02252
if (!skb)
02253
goto out;
02254
02255
02256
02257
02258 skb->mac.raw = skb->data;
02259
skb_reserve(skb,
MAX_HEADER +
sizeof(
struct iphdr));
02260
02261
if (rta[RTA_SRC - 1])
02262 memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4);
02263
if (rta[RTA_DST - 1])
02264 memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
02265
if (rta[RTA_IIF - 1])
02266 memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]),
sizeof(
int));
02267
02268
if (iif) {
02269
struct net_device *dev =
__dev_get_by_index(iif);
02270 err = -ENODEV;
02271
if (!dev)
02272
goto out_free;
02273 skb->protocol = htons(ETH_P_IP);
02274 skb->dev = dev;
02275 local_bh_disable();
02276 err =
ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
02277 local_bh_enable();
02278 rt = (
struct rtable*)skb->dst;
02279
if (!err && rt->u.dst.error)
02280 err = -rt->u.dst.error;
02281 }
else {
02282
int oif = 0;
02283
if (rta[RTA_OIF - 1])
02284 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]),
sizeof(
int));
02285 err =
ip_route_output(&rt, dst, src, rtm->rtm_tos, oif);
02286 }
02287
if (err)
02288
goto out_free;
02289
02290 skb->dst = &rt->u.dst;
02291
if (rtm->rtm_flags & RTM_F_NOTIFY)
02292 rt->rt_flags |= RTCF_NOTIFY;
02293
02294
NETLINK_CB(skb).dst_pid =
NETLINK_CB(in_skb).pid;
02295
02296 err =
rt_fill_info(skb,
NETLINK_CB(in_skb).pid, nlh->
nlmsg_seq,
02297 RTM_NEWROUTE, 0);
02298
if (!err)
02299
goto out_free;
02300
if (err < 0) {
02301 err = -EMSGSIZE;
02302
goto out_free;
02303 }
02304
02305 err =
netlink_unicast(
rtnl, skb,
NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
02306
if (err > 0)
02307 err = 0;
02308 out:
return err;
02309
02310 out_free:
02311
kfree_skb(skb);
02312
goto out;
02313 }
02314
02315 int ip_rt_dump(
struct sk_buff *skb,
struct netlink_callback *cb)
02316 {
02317
struct rtable *rt;
02318
int h, s_h;
02319
int idx, s_idx;
02320
02321 s_h = cb->
args[0];
02322 s_idx = idx = cb->
args[1];
02323
for (h = 0; h <=
rt_hash_mask; h++) {
02324
if (h < s_h)
continue;
02325
if (h > s_h)
02326 s_idx = 0;
02327 read_lock_bh(&
rt_hash_table[h].lock);
02328
for (rt =
rt_hash_table[h].
chain, idx = 0; rt;
02329 rt = rt->
u.rt_next, idx++) {
02330
if (idx < s_idx)
02331
continue;
02332 skb->
dst =
dst_clone(&rt->u.dst);
02333
if (
rt_fill_info(skb,
NETLINK_CB(cb->
skb).pid,
02334 cb->
nlh->
nlmsg_seq,
02335 RTM_NEWROUTE, 1) <= 0) {
02336
dst_release(xchg(&skb->
dst, NULL));
02337 read_unlock_bh(&
rt_hash_table[h].lock);
02338
goto done;
02339 }
02340
dst_release(xchg(&skb->
dst, NULL));
02341 }
02342 read_unlock_bh(&
rt_hash_table[h].lock);
02343 }
02344
02345 done:
02346 cb->
args[0] = h;
02347 cb->
args[1] = idx;
02348
return skb->
len;
02349 }
02350
02351 void ip_rt_multicast_event(
struct in_device *in_dev)
02352 {
02353
rt_cache_flush(0);
02354 }
02355
02356
#ifdef CONFIG_SYSCTL
02357
static int flush_delay;
02358
02359
static int ipv4_sysctl_rtcache_flush(ctl_table *ctl,
int write,
02360
struct file *filp,
void *buffer,
02361 size_t *lenp)
02362 {
02363
if (write) {
02364 proc_dointvec(ctl, write, filp, buffer, lenp);
02365
rt_cache_flush(flush_delay);
02366
return 0;
02367 }
02368
02369
return -EINVAL;
02370 }
02371
02372
static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
int *name,
02373
int nlen,
void *oldval,
02374 size_t *oldlenp,
void *newval,
02375 size_t newlen,
void **context)
02376 {
02377
int delay;
02378
if (newlen !=
sizeof(
int))
02379
return -EINVAL;
02380
if (get_user(delay, (
int *)newval))
02381
return -EFAULT;
02382
rt_cache_flush(delay);
02383
return 0;
02384 }
02385
02386 ctl_table
ipv4_route_table[] = {
02387 {
02388 ctl_name: NET_IPV4_ROUTE_FLUSH,
02389 procname:
"flush",
02390 data: &flush_delay,
02391 maxlen:
sizeof(
int),
02392 mode: 0644,
02393 proc_handler: &ipv4_sysctl_rtcache_flush,
02394 strategy: &ipv4_sysctl_rtcache_flush_strategy,
02395 },
02396 {
02397 ctl_name: NET_IPV4_ROUTE_MIN_DELAY,
02398 procname:
"min_delay",
02399 data: &
ip_rt_min_delay,
02400 maxlen:
sizeof(
int),
02401 mode: 0644,
02402 proc_handler: &proc_dointvec_jiffies,
02403 strategy: &sysctl_jiffies,
02404 },
02405 {
02406 ctl_name: NET_IPV4_ROUTE_MAX_DELAY,
02407 procname:
"max_delay",
02408 data: &
ip_rt_max_delay,
02409 maxlen:
sizeof(
int),
02410 mode: 0644,
02411 proc_handler: &proc_dointvec_jiffies,
02412 strategy: &sysctl_jiffies,
02413 },
02414 {
02415 ctl_name: NET_IPV4_ROUTE_GC_THRESH,
02416 procname:
"gc_thresh",
02417 data: &
ipv4_dst_ops.
gc_thresh,
02418 maxlen:
sizeof(
int),
02419 mode: 0644,
02420 proc_handler: &proc_dointvec,
02421 },
02422 {
02423 ctl_name: NET_IPV4_ROUTE_MAX_SIZE,
02424 procname:
"max_size",
02425 data: &
ip_rt_max_size,
02426 maxlen:
sizeof(
int),
02427 mode: 0644,
02428 proc_handler: &proc_dointvec,
02429 },
02430 {
02431 ctl_name: NET_IPV4_ROUTE_GC_MIN_INTERVAL,
02432 procname:
"gc_min_interval",
02433 data: &
ip_rt_gc_min_interval,
02434 maxlen:
sizeof(
int),
02435 mode: 0644,
02436 proc_handler: &proc_dointvec_jiffies,
02437 strategy: &sysctl_jiffies,
02438 },
02439 {
02440 ctl_name: NET_IPV4_ROUTE_GC_TIMEOUT,
02441 procname:
"gc_timeout",
02442 data: &
ip_rt_gc_timeout,
02443 maxlen:
sizeof(
int),
02444 mode: 0644,
02445 proc_handler: &proc_dointvec_jiffies,
02446 strategy: &sysctl_jiffies,
02447 },
02448 {
02449 ctl_name: NET_IPV4_ROUTE_GC_INTERVAL,
02450 procname:
"gc_interval",
02451 data: &
ip_rt_gc_interval,
02452 maxlen:
sizeof(
int),
02453 mode: 0644,
02454 proc_handler: &proc_dointvec_jiffies,
02455 strategy: &sysctl_jiffies,
02456 },
02457 {
02458 ctl_name: NET_IPV4_ROUTE_REDIRECT_LOAD,
02459 procname:
"redirect_load",
02460 data: &
ip_rt_redirect_load,
02461 maxlen:
sizeof(
int),
02462 mode: 0644,
02463 proc_handler: &proc_dointvec,
02464 },
02465 {
02466 ctl_name: NET_IPV4_ROUTE_REDIRECT_NUMBER,
02467 procname:
"redirect_number",
02468 data: &
ip_rt_redirect_number,
02469 maxlen:
sizeof(
int),
02470 mode: 0644,
02471 proc_handler: &proc_dointvec,
02472 },
02473 {
02474 ctl_name: NET_IPV4_ROUTE_REDIRECT_SILENCE,
02475 procname:
"redirect_silence",
02476 data: &
ip_rt_redirect_silence,
02477 maxlen:
sizeof(
int),
02478 mode: 0644,
02479 proc_handler: &proc_dointvec,
02480 },
02481 {
02482 ctl_name: NET_IPV4_ROUTE_ERROR_COST,
02483 procname:
"error_cost",
02484 data: &
ip_rt_error_cost,
02485 maxlen:
sizeof(
int),
02486 mode: 0644,
02487 proc_handler: &proc_dointvec,
02488 },
02489 {
02490 ctl_name: NET_IPV4_ROUTE_ERROR_BURST,
02491 procname:
"error_burst",
02492 data: &
ip_rt_error_burst,
02493 maxlen:
sizeof(
int),
02494 mode: 0644,
02495 proc_handler: &proc_dointvec,
02496 },
02497 {
02498 ctl_name: NET_IPV4_ROUTE_GC_ELASTICITY,
02499 procname:
"gc_elasticity",
02500 data: &
ip_rt_gc_elasticity,
02501 maxlen:
sizeof(
int),
02502 mode: 0644,
02503 proc_handler: &proc_dointvec,
02504 },
02505 {
02506 ctl_name: NET_IPV4_ROUTE_MTU_EXPIRES,
02507 procname:
"mtu_expires",
02508 data: &
ip_rt_mtu_expires,
02509 maxlen:
sizeof(
int),
02510 mode: 0644,
02511 proc_handler: &proc_dointvec_jiffies,
02512 strategy: &sysctl_jiffies,
02513 },
02514 {
02515 ctl_name: NET_IPV4_ROUTE_MIN_PMTU,
02516 procname:
"min_pmtu",
02517 data: &
ip_rt_min_pmtu,
02518 maxlen:
sizeof(
int),
02519 mode: 0644,
02520 proc_handler: &proc_dointvec,
02521 },
02522 {
02523 ctl_name: NET_IPV4_ROUTE_MIN_ADVMSS,
02524 procname:
"min_adv_mss",
02525 data: &
ip_rt_min_advmss,
02526 maxlen:
sizeof(
int),
02527 mode: 0644,
02528 proc_handler: &proc_dointvec,
02529 },
02530 {
02531 ctl_name: NET_IPV4_ROUTE_SECRET_INTERVAL,
02532 procname:
"secret_interval",
02533 data: &
ip_rt_secret_interval,
02534 maxlen:
sizeof(
int),
02535 mode: 0644,
02536 proc_handler: &proc_dointvec_jiffies,
02537 strategy: &sysctl_jiffies,
02538 },
02539 { 0 }
02540 };
02541
#endif
02542
02543
#ifdef CONFIG_NET_CLS_ROUTE
02544
struct ip_rt_acct *
ip_rt_acct;
02545
02546
02547
02548
02549
#define IP_RT_ACCT_CPU(i) (ip_rt_acct + cpu_logical_map(i) * 256)
02550
02551
static int ip_rt_acct_read(
char *buffer,
char **start, off_t offset,
02552
int length,
int *eof,
void *data)
02553 {
02554
unsigned int i;
02555
02556
if ((offset & 3) || (length & 3))
02557
return -EIO;
02558
02559
if (offset >=
sizeof(
struct ip_rt_acct) * 256) {
02560 *eof = 1;
02561
return 0;
02562 }
02563
02564
if (offset + length >=
sizeof(
struct ip_rt_acct) * 256) {
02565 length =
sizeof(
struct ip_rt_acct) * 256 - offset;
02566 *eof = 1;
02567 }
02568
02569 offset /=
sizeof(u32);
02570
02571
if (length > 0) {
02572 u32 *src = ((u32 *) IP_RT_ACCT_CPU(0)) + offset;
02573 u32 *dst = (u32 *) buffer;
02574
02575
02576 *start = buffer;
02577 memcpy(dst, src, length);
02578
02579
02580
for (i = 1; i < smp_num_cpus; i++) {
02581
unsigned int j;
02582
02583 src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;
02584
02585
for (j = 0; j < length/4; j++)
02586 dst[j] += src[j];
02587 }
02588 }
02589
return length;
02590 }
02591
#endif
02592
02593 void __init
ip_rt_init(
void)
02594 {
02595
int i, order, goal;
02596
02597
rt_hash_rnd = (
int) ((num_physpages ^ (num_physpages>>8)) ^
02598 (jiffies ^ (jiffies >> 7)));
02599
02600
#ifdef CONFIG_NET_CLS_ROUTE
02601
for (order = 0;
02602 (PAGE_SIZE << order) < 256 *
sizeof(
struct ip_rt_acct) * NR_CPUS; order++)
02603 ;
02604
ip_rt_acct = (
struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order);
02605
if (!
ip_rt_acct)
02606 panic(
"IP: failed to allocate ip_rt_acct\n");
02607 memset(
ip_rt_acct, 0, PAGE_SIZE << order);
02608
#endif
02609
02610
ipv4_dst_ops.
kmem_cachep = kmem_cache_create(
"ip_dst_cache",
02611
sizeof(
struct rtable),
02612 0, SLAB_HWCACHE_ALIGN,
02613 NULL, NULL);
02614
02615
if (!
ipv4_dst_ops.
kmem_cachep)
02616 panic(
"IP: failed to allocate ip_dst_cache\n");
02617
02618 goal = num_physpages >> (26 - PAGE_SHIFT);
02619
02620
for (order = 0; (1UL << order) < goal; order++)
02621 ;
02622
02623
do {
02624
rt_hash_mask = (1UL << order) * PAGE_SIZE /
02625
sizeof(
struct rt_hash_bucket);
02626
while (
rt_hash_mask & (
rt_hash_mask - 1))
02627
rt_hash_mask--;
02628
rt_hash_table = (
struct rt_hash_bucket *)
02629 __get_free_pages(GFP_ATOMIC, order);
02630 }
while (
rt_hash_table == NULL && --order > 0);
02631
02632
if (!
rt_hash_table)
02633 panic(
"Failed to allocate IP route cache hash table\n");
02634
02635 printk(KERN_INFO
"IP: routing cache hash table of %u buckets, %ldKbytes\n",
02636
rt_hash_mask,
02637 (
long) (
rt_hash_mask *
sizeof(
struct rt_hash_bucket)) / 1024);
02638
02639
for (
rt_hash_log = 0; (1 <<
rt_hash_log) !=
rt_hash_mask;
rt_hash_log++)
02640 ;
02641
02642
rt_hash_mask--;
02643
for (i = 0; i <=
rt_hash_mask; i++) {
02644
rt_hash_table[i].
lock = RW_LOCK_UNLOCKED;
02645
rt_hash_table[i].
chain = NULL;
02646 }
02647
02648
ipv4_dst_ops.
gc_thresh = (
rt_hash_mask + 1);
02649
ip_rt_max_size = (
rt_hash_mask + 1) * 16;
02650
02651
devinet_init();
02652
ip_fib_init();
02653
02654
rt_flush_timer.function =
rt_run_flush;
02655
rt_periodic_timer.function =
rt_check_expire;
02656
rt_secret_timer.function =
rt_secret_rebuild;
02657
02658
02659
02660
02661
rt_periodic_timer.expires = jiffies +
net_random() %
ip_rt_gc_interval +
02662
ip_rt_gc_interval;
02663 add_timer(&
rt_periodic_timer);
02664
02665
rt_secret_timer.expires = jiffies +
net_random() %
ip_rt_secret_interval +
02666
ip_rt_secret_interval;
02667 add_timer(&
rt_secret_timer);
02668
02669 proc_net_create (
"rt_cache", 0,
rt_cache_get_info);
02670 proc_net_create (
"rt_cache_stat", 0,
rt_cache_stat_get_info);
02671
#ifdef CONFIG_NET_CLS_ROUTE
02672
create_proc_read_entry(
"net/rt_acct", 0, 0, ip_rt_acct_read, NULL);
02673
#endif
02674
}