diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c new file mode 100644 index 00000000..41e5916d --- /dev/null +++ b/arch/arm/boot/compressed/decompress.c @@ -0,0 +1,50 @@ +#define _LINUX_STRING_H_ + +#include /* for inline */ +#include /* for size_t */ +#include /* for NULL */ +#include +#include + +extern unsigned long free_mem_ptr; +extern unsigned long free_mem_end_ptr; +extern void error(char *); + +#define STATIC static +#define STATIC_RW_DATA /* non-static please */ + +#define ARCH_HAS_DECOMP_WDOG + +/* Diagnostic functions */ +#ifdef DEBUG +# define Assert(cond,msg) {if(!(cond)) error(msg);} +# define Trace(x) fprintf x +# define Tracev(x) {if (verbose) fprintf x ;} +# define Tracevv(x) {if (verbose>1) fprintf x ;} +# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} +# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + +#ifdef CONFIG_KERNEL_GZIP +#include "../../../../lib/decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#include "../../../../lib/decompress_bunzip2.c" +#endif + +#ifdef CONFIG_KERNEL_LZMA +#include "../../../../lib/decompress_unlzma.c" +#endif + +void do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) +{ + decompress(input, len, NULL, NULL, output, NULL, error); +} diff --git a/arch/arm/boot/compressed/lib1funcs.S b/arch/arm/boot/compressed/lib1funcs.S new file mode 100644 index 00000000..6dc06487 --- /dev/null +++ b/arch/arm/boot/compressed/lib1funcs.S @@ -0,0 +1,348 @@ +/* + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines + * + * Author: Nicolas Pitre + * - contributed to gcc-3.4 on Sep 30, 2003 + * - adapted for the Linux kernel on Oct 2, 2003 + */ + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + + +#include +#include + + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + +ENTRY(__udivsi3) +ENTRY(__aeabi_uidiv) + + subs r2, r1, #1 + moveq pc, lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + mov pc, lr + +11: moveq r0, #1 + movne r0, #0 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + mov pc, lr + +ENDPROC(__udivsi3) +ENDPROC(__aeabi_uidiv) + +ENTRY(__umodsi3) + + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + movls pc, lr + + ARM_MOD_BODY r0, r1, r2, r3 + + mov pc, lr + +ENDPROC(__umodsi3) + +ENTRY(__divsi3) +ENTRY(__aeabi_idiv) + + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + mov pc, lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + mov pc, lr + +ENDPROC(__divsi3) +ENDPROC(__aeabi_idiv) + +ENTRY(__modsi3) + + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr + +ENDPROC(__modsi3) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_uidivmod) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr + +ENDPROC(__aeabi_uidivmod) + +ENTRY(__aeabi_idivmod) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr + +ENDPROC(__aeabi_idivmod) + +#endif + +Ldiv0: + + str lr, [sp, #-8]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #8 + + diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index d0daeab2..ec600386 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -40,12 +40,12 @@ static inline void atomic_add(int i, atomic_t *v) int result; __asm__ __volatile__("@ atomic_add\n" -"1: ldrex %0, [%2]\n" -" add %0, %0, %3\n" -" strex %1, %0, [%2]\n" +"1: ldrex %0, [%3]\n" +" add %0, %0, %4\n" +" strex %1, %0, [%3]\n" " teq %1, #0\n" " bne 1b" - : "=&r" (result), "=&r" (tmp) + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) : "r" (&v->counter), "Ir" (i) : "cc"); } @@ -58,12 +58,12 @@ static inline int atomic_add_return(int i, atomic_t *v) smp_mb(); __asm__ __volatile__("@ atomic_add_return\n" -"1: ldrex %0, [%2]\n" -" add %0, %0, %3\n" -" strex %1, %0, [%2]\n" +"1: ldrex %0, [%3]\n" +" add %0, %0, %4\n" +" strex %1, %0, [%3]\n" " teq %1, #0\n" " bne 1b" - : "=&r" (result), "=&r" (tmp) + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) : "r" (&v->counter), "Ir" (i) : "cc"); @@ -78,12 +78,12 @@ static inline void atomic_sub(int i, atomic_t *v) int result; __asm__ __volatile__("@ atomic_sub\n" -"1: ldrex %0, [%2]\n" -" sub %0, %0, %3\n" -" strex %1, %0, [%2]\n" +"1: ldrex %0, [%3]\n" +" sub %0, %0, %4\n" +" strex %1, %0, [%3]\n" " teq %1, #0\n" " bne 1b" - : "=&r" (result), "=&r" (tmp) + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) : "r" (&v->counter), "Ir" (i) : "cc"); } @@ -96,12 +96,12 @@ static inline int atomic_sub_return(int i, atomic_t *v) smp_mb(); __asm__ __volatile__("@ atomic_sub_return\n" -"1: ldrex %0, [%2]\n" -" sub %0, %0, %3\n" -" strex %1, %0, [%2]\n" +"1: ldrex %0, [%3]\n" +" sub %0, %0, %4\n" +" strex %1, %0, [%3]\n" " teq %1, #0\n" " bne 1b" - : "=&r" (result), "=&r" (tmp) + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) : "r" (&v->counter), "Ir" (i) : "cc"); @@ -118,11 +118,11 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) do { __asm__ __volatile__("@ atomic_cmpxchg\n" - "ldrex %1, [%2]\n" + "ldrex %1, [%3]\n" "mov %0, #0\n" - "teq %1, %3\n" - "strexeq %0, %4, [%2]\n" - : "=&r" (res), "=&r" (oldval) + "teq %1, %4\n" + "strexeq %0, %5, [%3]\n" + : "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter) : "r" (&ptr->counter), "Ir" (old), "r" (new) : "cc"); } while (res); @@ -137,12 +137,12 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) unsigned long tmp, tmp2; __asm__ __volatile__("@ atomic_clear_mask\n" -"1: ldrex %0, [%2]\n" -" bic %0, %0, %3\n" -" strex %1, %0, [%2]\n" +"1: ldrex %0, [%3]\n" +" bic %0, %0, %4\n" +" strex %1, %0, [%3]\n" " teq %1, #0\n" " bne 1b" - : "=&r" (tmp), "=&r" (tmp2) + : "=&r" (tmp), "=&r" (tmp2), "+Qo" (*addr) : "r" (addr), "Ir" (mask) : "cc"); } @@ -235,6 +235,234 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() smp_mb() #define smp_mb__after_atomic_inc() smp_mb() +#ifndef CONFIG_GENERIC_ATOMIC64 +typedef struct { + u64 __aligned(8) counter; +} atomic64_t; + +#define ATOMIC64_INIT(i) { (i) } + +static inline u64 atomic64_read(atomic64_t *v) +{ + u64 result; + + __asm__ __volatile__("@ atomic64_read\n" +" ldrexd %0, %H0, [%1]" + : "=&r" (result) + : "r" (&v->counter) + ); + + return result; +} + +static inline void atomic64_set(atomic64_t *v, u64 i) +{ + u64 tmp; + + __asm__ __volatile__("@ atomic64_set\n" +"1: ldrexd %0, %H0, [%1]\n" +" strexd %0, %2, %H2, [%1]\n" +" teq %0, #0\n" +" bne 1b" + : "=&r" (tmp) + : "r" (&v->counter), "r" (i) + : "cc"); +} + +static inline void atomic64_add(u64 i, atomic64_t *v) +{ + u64 result; + unsigned long tmp; + + __asm__ __volatile__("@ atomic64_add\n" +"1: ldrexd %0, %H0, [%2]\n" +" adds %0, %0, %3\n" +" adc %H0, %H0, %H3\n" +" strexd %1, %0, %H0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (result), "=&r" (tmp) + : "r" (&v->counter), "r" (i) + : "cc"); +} + +static inline u64 atomic64_add_return(u64 i, atomic64_t *v) +{ + u64 result; + unsigned long tmp; + + smp_mb(); + + __asm__ __volatile__("@ atomic64_add_return\n" +"1: ldrexd %0, %H0, [%2]\n" +" adds %0, %0, %3\n" +" adc %H0, %H0, %H3\n" +" strexd %1, %0, %H0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (result), "=&r" (tmp) + : "r" (&v->counter), "r" (i) + : "cc"); + + smp_mb(); + + return result; +} + +static inline void atomic64_sub(u64 i, atomic64_t *v) +{ + u64 result; + unsigned long tmp; + + __asm__ __volatile__("@ atomic64_sub\n" +"1: ldrexd %0, %H0, [%2]\n" +" subs %0, %0, %3\n" +" sbc %H0, %H0, %H3\n" +" strexd %1, %0, %H0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (result), "=&r" (tmp) + : "r" (&v->counter), "r" (i) + : "cc"); +} + +static inline u64 atomic64_sub_return(u64 i, atomic64_t *v) +{ + u64 result; + unsigned long tmp; + + smp_mb(); + + __asm__ __volatile__("@ atomic64_sub_return\n" +"1: ldrexd %0, %H0, [%2]\n" +" subs %0, %0, %3\n" +" sbc %H0, %H0, %H3\n" +" strexd %1, %0, %H0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (result), "=&r" (tmp) + : "r" (&v->counter), "r" (i) + : "cc"); + + smp_mb(); + + return result; +} + +static inline u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old, u64 new) +{ + u64 oldval; + unsigned long res; + + smp_mb(); + + do { + __asm__ __volatile__("@ atomic64_cmpxchg\n" + "ldrexd %1, %H1, [%2]\n" + "mov %0, #0\n" + "teq %1, %3\n" + "teqeq %H1, %H3\n" + "strexdeq %0, %4, %H4, [%2]" + : "=&r" (res), "=&r" (oldval) + : "r" (&ptr->counter), "r" (old), "r" (new) + : "cc"); + } while (res); + + smp_mb(); + + return oldval; +} + +static inline u64 atomic64_xchg(atomic64_t *ptr, u64 new) +{ + u64 result; + unsigned long tmp; + + smp_mb(); + + __asm__ __volatile__("@ atomic64_xchg\n" +"1: ldrexd %0, %H0, [%2]\n" +" strexd %1, %3, %H3, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (result), "=&r" (tmp) + : "r" (&ptr->counter), "r" (new) + : "cc"); + + smp_mb(); + + return result; +} + +static inline u64 atomic64_dec_if_positive(atomic64_t *v) +{ + u64 result; + unsigned long tmp; + + smp_mb(); + + __asm__ __volatile__("@ atomic64_dec_if_positive\n" +"1: ldrexd %0, %H0, [%2]\n" +" subs %0, %0, #1\n" +" sbc %H0, %H0, #0\n" +" teq %H0, #0\n" +" bmi 2f\n" +" strexd %1, %0, %H0, [%2]\n" +" teq %1, #0\n" +" bne 1b\n" +"2:" + : "=&r" (result), "=&r" (tmp) + : "r" (&v->counter) + : "cc"); + + smp_mb(); + + return result; +} + +static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u) +{ + u64 val; + unsigned long tmp; + int ret = 1; + + smp_mb(); + + __asm__ __volatile__("@ atomic64_add_unless\n" +"1: ldrexd %0, %H0, [%3]\n" +" teq %0, %4\n" +" teqeq %H0, %H4\n" +" moveq %1, #0\n" +" beq 2f\n" +" adds %0, %0, %5\n" +" adc %H0, %H0, %H5\n" +" strexd %2, %0, %H0, [%3]\n" +" teq %2, #0\n" +" bne 1b\n" +"2:" + : "=&r" (val), "=&r" (ret), "=&r" (tmp) + : "r" (&v->counter), "r" (u), "r" (a) + : "cc"); + + if (ret) + smp_mb(); + + return ret; +} + +#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) +#define atomic64_inc(v) atomic64_add(1LL, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) +#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) +#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) +#define atomic64_dec(v) atomic64_sub(1LL, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) +#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) + +#else /* !CONFIG_GENERIC_ATOMIC64 */ +#include +#endif #include #endif #endif diff --git a/drivers/net/ppp_deflate.c b/drivers/net/ppp_deflate.c index 222fc1a3..034c1c65 100644 --- a/drivers/net/ppp_deflate.c +++ b/drivers/net/ppp_deflate.c @@ -306,7 +306,7 @@ static void z_decomp_free(void *arg) if (state) { zlib_inflateEnd(&state->strm); - vfree(state->strm.workspace); + kfree(state->strm.workspace); kfree(state); } } @@ -346,7 +346,8 @@ static void *z_decomp_alloc(unsigned char *options, int opt_len) state->w_size = w_size; state->strm.next_out = NULL; - state->strm.workspace = vmalloc(zlib_inflate_workspacesize()); + state->strm.workspace = kmalloc(zlib_inflate_workspacesize(), + GFP_KERNEL|__GFP_REPEAT); if (state->strm.workspace == NULL) goto out_free; diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index 5910df60..92359019 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -756,6 +756,7 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) /* Try to dequeue as many skbs from reorder_q as we can. */ pppol2tp_recv_dequeue(session); + sock_put(sock); return 0; @@ -772,6 +773,7 @@ discard_bad_csum: UDP_INC_STATS_USER(&init_net, UDP_MIB_INERRORS, 0); tunnel->stats.rx_errors++; kfree_skb(skb); + sock_put(sock); return 0; @@ -1178,7 +1180,8 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) /* Calculate UDP checksum if configured to do so */ if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT) skb->ip_summed = CHECKSUM_NONE; - else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + else if ((skb_dst(skb) && skb_dst(skb)->dev) && + (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) { skb->ip_summed = CHECKSUM_COMPLETE; csum = skb_checksum(skb, 0, udp_len, 0); uh->check = csum_tcpudp_magic(inet->saddr, inet->daddr, @@ -1657,6 +1660,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel_sock == NULL) goto end; + sock_hold(tunnel_sock); tunnel = tunnel_sock->sk_user_data; } else { tunnel = pppol2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel); diff --git a/drivers/net/pppolac.c b/drivers/net/pppolac.c index af3202a9..c94b8507 100644 --- a/drivers/net/pppolac.c +++ b/drivers/net/pppolac.c @@ -15,12 +15,15 @@ */ /* This driver handles L2TP data packets between a UDP socket and a PPP channel. - * To keep things simple, only one session per socket is permitted. Packets are - * sent via the socket, so it must keep connected to the same address. One must - * not set sequencing in ICCN but let LNS controll it. Currently this driver - * only works on IPv4 due to the lack of UDP encapsulation support in IPv6. */ + * The socket must keep connected, and only one session per socket is permitted. + * Sequencing of outgoing packets is controlled by LNS. Incoming packets with + * sequences are reordered within a sliding window of one second. Currently + * reordering only happens when a packet is received. It is done for simplicity + * since no additional locks or threads are required. This driver only works on + * IPv4 due to the lack of UDP encapsulation support in IPv6. */ #include +#include #include #include #include @@ -53,14 +56,28 @@ static inline union unaligned *unaligned(void *ptr) return (union unaligned *)ptr; } +struct meta { + __u32 sequence; + __u32 timestamp; +}; + +static inline struct meta *skb_meta(struct sk_buff *skb) +{ + return (struct meta *)skb->cb; +} + +/******************************************************************************/ + static int pppolac_recv_core(struct sock *sk_udp, struct sk_buff *skb) { struct sock *sk = (struct sock *)sk_udp->sk_user_data; struct pppolac_opt *opt = &pppox_sk(sk)->proto.lac; + struct meta *meta = skb_meta(skb); + __u32 now = jiffies; __u8 bits; __u8 *ptr; - /* Drop the packet if it is too short. */ + /* Drop the packet if L2TP header is missing. */ if (skb->len < sizeof(struct udphdr) + 6) goto drop; @@ -99,9 +116,12 @@ static int pppolac_recv_core(struct sock *sk_udp, struct sk_buff *skb) if (unaligned(ptr)->u32 != opt->local) goto drop; - /* Check the sequence if it is present. According to RFC 2661 section - * 5.4, the only thing to do is to update opt->sequencing. */ - opt->sequencing = bits & L2TP_SEQUENCE_BIT; + /* Check the sequence if it is present. */ + if (bits & L2TP_SEQUENCE_BIT) { + meta->sequence = ptr[4] << 8 | ptr[5]; + if ((__s16)(meta->sequence - opt->recv_sequence) < 0) + goto drop; + } /* Skip PPP address and control if they are present. */ if (skb->len >= 2 && skb->data[0] == PPP_ADDR && @@ -112,7 +132,54 @@ static int pppolac_recv_core(struct sock *sk_udp, struct sk_buff *skb) if (skb->len >= 1 && skb->data[0] & 1) skb_push(skb, 1)[0] = 0; - /* Finally, deliver the packet to PPP channel. */ + /* Drop the packet if PPP protocol is missing. */ + if (skb->len < 2) + goto drop; + + /* Perform reordering if sequencing is enabled. */ + atomic_set(&opt->sequencing, bits & L2TP_SEQUENCE_BIT); + if (bits & L2TP_SEQUENCE_BIT) { + struct sk_buff *skb1; + + /* Insert the packet into receive queue in order. */ + skb_set_owner_r(skb, sk); + skb_queue_walk(&sk->sk_receive_queue, skb1) { + struct meta *meta1 = skb_meta(skb1); + __s16 order = meta->sequence - meta1->sequence; + if (order == 0) + goto drop; + if (order < 0) { + meta->timestamp = meta1->timestamp; + skb_insert(skb1, skb, &sk->sk_receive_queue); + skb = NULL; + break; + } + } + if (skb) { + meta->timestamp = now; + skb_queue_tail(&sk->sk_receive_queue, skb); + } + + /* Remove packets from receive queue as long as + * 1. the receive buffer is full, + * 2. they are queued longer than one second, or + * 3. there are no missing packets before them. */ + skb_queue_walk_safe(&sk->sk_receive_queue, skb, skb1) { + meta = skb_meta(skb); + if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && + now - meta->timestamp < HZ && + meta->sequence != opt->recv_sequence) + break; + skb_unlink(skb, &sk->sk_receive_queue); + opt->recv_sequence = (__u16)(meta->sequence + 1); + skb_orphan(skb); + ppp_input(&pppox_sk(sk)->chan, skb); + } + return NET_RX_SUCCESS; + } + + /* Flush receive queue if sequencing is disabled. */ + skb_queue_purge(&sk->sk_receive_queue); skb_orphan(skb); ppp_input(&pppox_sk(sk)->chan, skb); return NET_RX_SUCCESS; @@ -163,14 +230,14 @@ static int pppolac_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb->data[1] = PPP_CTRL; /* Install L2TP header. */ - if (opt->sequencing) { + if (atomic_read(&opt->sequencing)) { skb_push(skb, 10); skb->data[0] = L2TP_SEQUENCE_BIT; - skb->data[6] = opt->sequence >> 8; - skb->data[7] = opt->sequence; + skb->data[6] = opt->xmit_sequence >> 8; + skb->data[7] = opt->xmit_sequence; skb->data[8] = 0; skb->data[9] = 0; - opt->sequence++; + opt->xmit_sequence++; } else { skb_push(skb, 6); skb->data[0] = 0; @@ -246,6 +313,7 @@ static int pppolac_connect(struct socket *sock, struct sockaddr *useraddr, po->chan.mtu = PPP_MTU - 80; po->proto.lac.local = unaligned(&addr->local)->u32; po->proto.lac.remote = unaligned(&addr->remote)->u32; + atomic_set(&po->proto.lac.sequencing, 1); po->proto.lac.backlog_rcv = sk_udp->sk_backlog_rcv; error = ppp_register_channel(&po->chan); @@ -283,6 +351,7 @@ static int pppolac_release(struct socket *sock) if (sk->sk_state != PPPOX_NONE) { struct sock *sk_udp = (struct sock *)pppox_sk(sk)->chan.private; lock_sock(sk_udp); + skb_queue_purge(&sk->sk_receive_queue); pppox_unbind_sock(sk); udp_sk(sk_udp)->encap_type = 0; udp_sk(sk_udp)->encap_rcv = NULL; diff --git a/drivers/net/pppopns.c b/drivers/net/pppopns.c index 29809712..fb819844 100644 --- a/drivers/net/pppopns.c +++ b/drivers/net/pppopns.c @@ -16,11 +16,14 @@ /* This driver handles PPTP data packets between a RAW socket and a PPP channel. * The socket is created in the kernel space and connected to the same address - * of the control socket. To keep things simple, packets are always sent with - * sequence but without acknowledgement. This driver should work on both IPv4 - * and IPv6. */ + * of the control socket. Outgoing packets are always sent with sequences but + * without acknowledgements. Incoming packets with sequences are reordered + * within a sliding window of one second. Currently reordering only happens when + * a packet is received. It is done for simplicity since no additional locks or + * threads are required. This driver should work on both IPv4 and IPv6. */ #include +#include #include #include #include @@ -52,21 +55,35 @@ struct header { __u32 sequence; } __attribute__((packed)); +struct meta { + __u32 sequence; + __u32 timestamp; +}; + +static inline struct meta *skb_meta(struct sk_buff *skb) +{ + return (struct meta *)skb->cb; +} + +/******************************************************************************/ + static int pppopns_recv_core(struct sock *sk_raw, struct sk_buff *skb) { struct sock *sk = (struct sock *)sk_raw->sk_user_data; struct pppopns_opt *opt = &pppox_sk(sk)->proto.pns; + struct meta *meta = skb_meta(skb); + __u32 now = jiffies; struct header *hdr; /* Skip transport header */ skb_pull(skb, skb_transport_header(skb) - skb->data); - /* Drop the packet if it is too short. */ + /* Drop the packet if GRE header is missing. */ if (skb->len < GRE_HEADER_SIZE) goto drop; + hdr = (struct header *)skb->data; /* Check the header. */ - hdr = (struct header *)skb->data; if (hdr->type != PPTP_GRE_TYPE || hdr->call != opt->local || (hdr->bits & PPTP_GRE_BITS_MASK) != PPTP_GRE_BITS) goto drop; @@ -81,6 +98,13 @@ static int pppopns_recv_core(struct sock *sk_raw, struct sk_buff *skb) if (skb->len != ntohs(hdr->length)) goto drop; + /* Check the sequence if it is present. */ + if (hdr->bits & PPTP_GRE_SEQ_BIT) { + meta->sequence = ntohl(hdr->sequence); + if ((__s32)(meta->sequence - opt->recv_sequence) < 0) + goto drop; + } + /* Skip PPP address and control if they are present. */ if (skb->len >= 2 && skb->data[0] == PPP_ADDR && skb->data[1] == PPP_CTRL) @@ -90,7 +114,53 @@ static int pppopns_recv_core(struct sock *sk_raw, struct sk_buff *skb) if (skb->len >= 1 && skb->data[0] & 1) skb_push(skb, 1)[0] = 0; - /* Finally, deliver the packet to PPP channel. */ + /* Drop the packet if PPP protocol is missing. */ + if (skb->len < 2) + goto drop; + + /* Perform reordering if sequencing is enabled. */ + if (hdr->bits & PPTP_GRE_SEQ_BIT) { + struct sk_buff *skb1; + + /* Insert the packet into receive queue in order. */ + skb_set_owner_r(skb, sk); + skb_queue_walk(&sk->sk_receive_queue, skb1) { + struct meta *meta1 = skb_meta(skb1); + __s32 order = meta->sequence - meta1->sequence; + if (order == 0) + goto drop; + if (order < 0) { + meta->timestamp = meta1->timestamp; + skb_insert(skb1, skb, &sk->sk_receive_queue); + skb = NULL; + break; + } + } + if (skb) { + meta->timestamp = now; + skb_queue_tail(&sk->sk_receive_queue, skb); + } + + /* Remove packets from receive queue as long as + * 1. the receive buffer is full, + * 2. they are queued longer than one second, or + * 3. there are no missing packets before them. */ + skb_queue_walk_safe(&sk->sk_receive_queue, skb, skb1) { + meta = skb_meta(skb); + if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && + now - meta->timestamp < HZ && + meta->sequence != opt->recv_sequence) + break; + skb_unlink(skb, &sk->sk_receive_queue); + opt->recv_sequence = meta->sequence + 1; + skb_orphan(skb); + ppp_input(&pppox_sk(sk)->chan, skb); + } + return NET_RX_SUCCESS; + } + + /* Flush receive queue if sequencing is disabled. */ + skb_queue_purge(&sk->sk_receive_queue); skb_orphan(skb); ppp_input(&pppox_sk(sk)->chan, skb); return NET_RX_SUCCESS; @@ -151,8 +221,8 @@ static int pppopns_xmit(struct ppp_channel *chan, struct sk_buff *skb) hdr->type = PPTP_GRE_TYPE; hdr->length = htons(length); hdr->call = opt->remote; - hdr->sequence = htonl(opt->sequence); - opt->sequence++; + hdr->sequence = htonl(opt->xmit_sequence); + opt->xmit_sequence++; /* Now send the packet via the delivery queue. */ skb_set_owner_w(skb, sk_raw); @@ -261,6 +331,7 @@ static int pppopns_release(struct socket *sock) if (sk->sk_state != PPPOX_NONE) { struct sock *sk_raw = (struct sock *)pppox_sk(sk)->chan.private; lock_sock(sk_raw); + skb_queue_purge(&sk->sk_receive_queue); pppox_unbind_sock(sk); sk_raw->sk_data_ready = pppox_sk(sk)->proto.pns.data_ready; sk_raw->sk_backlog_rcv = pppox_sk(sk)->proto.pns.backlog_rcv; diff --git a/include/linux/android_aid.h b/include/linux/android_aid.h index 7f16a14c..0f904b3b 100644 --- a/include/linux/android_aid.h +++ b/include/linux/android_aid.h @@ -22,5 +22,7 @@ #define AID_INET 3003 #define AID_NET_RAW 3004 #define AID_NET_ADMIN 3005 +#define AID_NET_BW_STATS 3006 /* read bandwidth statistics */ +#define AID_NET_BW_ACCT 3007 /* change bandwidth statistics accounting */ #endif diff --git a/include/linux/err.h b/include/linux/err.h index ec87f314..1b126426 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -34,6 +34,11 @@ static inline long IS_ERR(const void *ptr) return IS_ERR_VALUE((unsigned long)ptr); } +static inline long IS_ERR_OR_NULL(const void *ptr) +{ + return !ptr || IS_ERR_VALUE((unsigned long)ptr); +} + /** * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type * @ptr: The pointer to cast. diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index dea7d6b7..8851a016 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -63,6 +63,7 @@ struct tpacket_auxdata __u16 tp_mac; __u16 tp_net; __u16 tp_vlan_tci; + __u16 tp_padding; }; /* Rx ring - header status */ @@ -103,6 +104,7 @@ struct tpacket2_hdr __u32 tp_sec; __u32 tp_nsec; __u16 tp_vlan_tci; + __u16 tp_padding; }; #define TPACKET2_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll)) diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 9f581dab..8a1a2a2d 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -146,19 +146,21 @@ struct pppoe_opt { }; struct pppolac_opt { - __u32 local; - __u32 remote; - __u16 sequence; - __u8 sequencing; - int (*backlog_rcv)(struct sock *sk_udp, struct sk_buff *skb); + __u32 local; + __u32 remote; + __u32 recv_sequence; + __u32 xmit_sequence; + atomic_t sequencing; + int (*backlog_rcv)(struct sock *sk_udp, struct sk_buff *skb); }; struct pppopns_opt { - __u16 local; - __u16 remote; - __u32 sequence; - void (*data_ready)(struct sock *sk_raw, int length); - int (*backlog_rcv)(struct sock *sk_raw, struct sk_buff *skb); + __u16 local; + __u16 remote; + __u32 recv_sequence; + __u32 xmit_sequence; + void (*data_ready)(struct sock *sk_raw, int length); + int (*backlog_rcv)(struct sock *sk_raw, struct sk_buff *skb); }; #include diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 812a5f3c..9d7e8f73 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1164,9 +1164,12 @@ static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, unsigned int offset) { + if (!pskb_may_pull(skb, hlen)) + return NULL; + NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; - return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; + return skb->data + offset; } static inline void *skb_gro_mac_header(struct sk_buff *skb) @@ -1560,6 +1563,8 @@ extern void netif_carrier_on(struct net_device *dev); extern void netif_carrier_off(struct net_device *dev); +extern void netif_notify_peers(struct net_device *dev); + /** * netif_dormant_on - mark device as dormant. * @dev: network device @@ -2013,6 +2018,10 @@ static inline u32 dev_ethtool_get_flags(struct net_device *dev) return 0; return dev->ethtool_ops->get_flags(dev); } + +#define MODULE_ALIAS_NETDEV(device) \ + MODULE_ALIAS("netdev-" device) + #endif /* __KERNEL__ */ #endif /* _LINUX_NETDEVICE_H */ diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 2aea5039..48767cd1 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -6,6 +6,7 @@ header-y += nfnetlink_queue.h header-y += xt_CLASSIFY.h header-y += xt_CONNMARK.h header-y += xt_CONNSECMARK.h +header-y += xt_CT.h header-y += xt_DSCP.h header-y += xt_LED.h header-y += xt_MARK.h @@ -15,6 +16,7 @@ header-y += xt_RATEEST.h header-y += xt_SECMARK.h header-y += xt_TCPMSS.h header-y += xt_TCPOPTSTRIP.h +header-y += xt_TEE.h header-y += xt_TPROXY.h header-y += xt_comment.h header-y += xt_connbytes.h diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index a8248ee4..16e72038 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -19,6 +19,9 @@ enum ip_conntrack_info /* >= this indicates reply direction */ IP_CT_IS_REPLY, + IP_CT_ESTABLISHED_REPLY = IP_CT_ESTABLISHED + IP_CT_IS_REPLY, + IP_CT_RELATED_REPLY = IP_CT_RELATED + IP_CT_IS_REPLY, + IP_CT_NEW_REPLY = IP_CT_NEW + IP_CT_IS_REPLY, /* Number of distinct IP_CT types (no NEW in reply dirn). */ IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 }; @@ -73,6 +76,32 @@ enum ip_conntrack_status { /* Connection has fixed timeout. */ IPS_FIXED_TIMEOUT_BIT = 10, IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), + + /* Conntrack is a template */ + IPS_TEMPLATE_BIT = 11, + IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), + + /* Conntrack is a fake untracked entry */ + IPS_UNTRACKED_BIT = 12, + IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), +}; + +/* Connection tracking event types */ +enum ip_conntrack_events { + IPCT_NEW, /* new conntrack */ + IPCT_RELATED, /* related conntrack */ + IPCT_DESTROY, /* destroyed conntrack */ + IPCT_REPLY, /* connection has seen two-way traffic */ + IPCT_ASSURED, /* connection status has changed to assured */ + IPCT_PROTOINFO, /* protocol information has changed */ + IPCT_HELPER, /* new helper has been set */ + IPCT_MARK, /* new mark has been set */ + IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ + IPCT_SECMARK, /* new security mark has been set */ +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW, /* new expectation */ }; #ifdef __KERNEL__ diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 9f00da28..9f064479 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -75,11 +75,11 @@ struct nfnetlink_subsystem extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); -extern int nfnetlink_has_listeners(unsigned int group); -extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, +extern int nfnetlink_has_listeners(struct net *net, unsigned int group); +extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags); -extern void nfnetlink_set_err(u32 pid, u32 group, int error); -extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); +extern void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); +extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags); extern void nfnl_lock(void); extern void nfnl_unlock(void); diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 812cb153..668bcd5c 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -128,6 +128,7 @@ struct xt_counters_info #define XT_INV_PROTO 0x40 /* Invert the sense of PROTO. */ +#ifndef __KERNEL__ /* fn returns 0 to continue iteration */ #define XT_MATCH_ITERATE(type, e, fn, args...) \ ({ \ @@ -171,17 +172,37 @@ struct xt_counters_info #define XT_ENTRY_ITERATE(type, entries, size, fn, args...) \ XT_ENTRY_ITERATE_CONTINUE(type, entries, size, 0, fn, args) +#endif /* !__KERNEL__ */ + +/* pos is normally a struct ipt_entry/ip6t_entry/etc. */ +#define xt_entry_foreach(pos, ehead, esize) \ + for ((pos) = (typeof(pos))(ehead); \ + (pos) < (typeof(pos))((char *)(ehead) + (esize)); \ + (pos) = (typeof(pos))((char *)(pos) + (pos)->next_offset)) + +/* can only be xt_entry_match, so no use of typeof here */ +#define xt_ematch_foreach(pos, entry) \ + for ((pos) = (struct xt_entry_match *)entry->elems; \ + (pos) < (struct xt_entry_match *)((char *)(entry) + \ + (entry)->target_offset); \ + (pos) = (struct xt_entry_match *)((char *)(pos) + \ + (pos)->u.match_size)) + #ifdef __KERNEL__ #include +#define xt_match_param xt_action_param +#define xt_target_param xt_action_param /** - * struct xt_match_param - parameters for match extensions' match functions + * struct xt_action_param - parameters for matches/targets * + * @match: the match extension + * @target: the target extension + * @matchinfo: per-match data + * @targetinfo: per-target data * @in: input netdevice * @out: output netdevice - * @match: struct xt_match through which this function was invoked - * @matchinfo: per-match data * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data * @hook: hook number given packet came from @@ -189,10 +210,15 @@ struct xt_counters_info * (helpful when match->family == NFPROTO_UNSPEC) * @hotdrop: drop packet if we had inspection problems */ -struct xt_match_param { +struct xt_action_param { + union { + const struct xt_match *match; + const struct xt_target *target; + }; + union { + const void *matchinfo, *targinfo; + }; const struct net_device *in, *out; - const struct xt_match *match; - const void *matchinfo; int fragoff; unsigned int thoff; unsigned int hooknum; @@ -212,6 +238,7 @@ struct xt_match_param { * @hook_mask: via which hooks the new rule is reachable */ struct xt_mtchk_param { + struct net *net; const char *table; const void *entryinfo; const struct xt_match *match; @@ -222,28 +249,12 @@ struct xt_mtchk_param { /* Match destructor parameters */ struct xt_mtdtor_param { + struct net *net; const struct xt_match *match; void *matchinfo; u_int8_t family; }; -/** - * struct xt_target_param - parameters for target extensions' target functions - * - * @hooknum: hook through which this target was invoked - * @target: struct xt_target through which this function was invoked - * @targinfo: per-target data - * - * Other fields see above. - */ -struct xt_target_param { - const struct net_device *in, *out; - const struct xt_target *target; - const void *targinfo; - unsigned int hooknum; - u_int8_t family; -}; - /** * struct xt_tgchk_param - parameters for target extensions' * checkentry functions @@ -254,6 +265,7 @@ struct xt_target_param { * Other fields see above. */ struct xt_tgchk_param { + struct net *net; const char *table; const void *entryinfo; const struct xt_target *target; @@ -264,6 +276,7 @@ struct xt_tgchk_param { /* Target destructor parameters */ struct xt_tgdtor_param { + struct net *net; const struct xt_target *target; void *targinfo; u_int8_t family; @@ -282,10 +295,10 @@ struct xt_match non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ bool (*match)(const struct sk_buff *skb, - const struct xt_match_param *); + const struct xt_action_param *); /* Called when user tries to insert an entry of this type. */ - bool (*checkentry)(const struct xt_mtchk_param *); + int (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); @@ -320,7 +333,7 @@ struct xt_target must now handle non-linear skbs, using skb_copy_bits and skb_ip_make_writable. */ unsigned int (*target)(struct sk_buff *skb, - const struct xt_target_param *); + const struct xt_action_param *); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be @@ -384,6 +397,13 @@ struct xt_table_info unsigned int hook_entry[NF_INET_NUMHOOKS]; unsigned int underflow[NF_INET_NUMHOOKS]; + /* + * Number of user chains. Since tables cannot have loops, at most + * @stacksize jumps (number of user chains) can possibly be made. + */ + unsigned int stacksize; + unsigned int *stackptr; + void ***jumpstack; /* ipt_entry tables: one per CPU */ /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ void *entries[1]; @@ -419,6 +439,8 @@ extern struct xt_table_info *xt_replace_table(struct xt_table *table, extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); +extern struct xt_match *xt_request_find_match(u8 af, const char *name, + u8 revision); extern struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); extern int xt_find_revision(u8 af, const char *name, u8 revision, diff --git a/include/linux/netfilter/xt_CONNMARK.h b/include/linux/netfilter/xt_CONNMARK.h index 0a854586..2f2e48ec 100644 --- a/include/linux/netfilter/xt_CONNMARK.h +++ b/include/linux/netfilter/xt_CONNMARK.h @@ -1,26 +1,6 @@ #ifndef _XT_CONNMARK_H_target #define _XT_CONNMARK_H_target -#include - -/* Copyright (C) 2002,2004 MARA Systems AB - * by Henrik Nordstrom - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -enum { - XT_CONNMARK_SET = 0, - XT_CONNMARK_SAVE, - XT_CONNMARK_RESTORE -}; - -struct xt_connmark_tginfo1 { - __u32 ctmark, ctmask, nfmask; - __u8 mode; -}; +#include #endif /*_XT_CONNMARK_H_target*/ diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h new file mode 100644 index 00000000..1b564106 --- /dev/null +++ b/include/linux/netfilter/xt_CT.h @@ -0,0 +1,17 @@ +#ifndef _XT_CT_H +#define _XT_CT_H + +#define XT_CT_NOTRACK 0x1 + +struct xt_ct_target_info { + u_int16_t flags; + u_int16_t zone; + u_int32_t ct_events; + u_int32_t exp_events; + char helper[16]; + + /* Used internally by the kernel */ + struct nf_conn *ct __attribute__((aligned(8))); +}; + +#endif /* _XT_CT_H */ diff --git a/include/linux/netfilter/xt_MARK.h b/include/linux/netfilter/xt_MARK.h index bc9561bd..41c456de 100644 --- a/include/linux/netfilter/xt_MARK.h +++ b/include/linux/netfilter/xt_MARK.h @@ -1,10 +1,6 @@ #ifndef _XT_MARK_H_target #define _XT_MARK_H_target -#include - -struct xt_mark_tginfo2 { - __u32 mark, mask; -}; +#include #endif /*_XT_MARK_H_target */ diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h new file mode 100644 index 00000000..5c21d5c8 --- /dev/null +++ b/include/linux/netfilter/xt_TEE.h @@ -0,0 +1,12 @@ +#ifndef _XT_TEE_TARGET_H +#define _XT_TEE_TARGET_H + +struct xt_tee_tginfo { + union nf_inet_addr gw; + char oif[16]; + + /* used internally by the kernel */ + struct xt_tee_priv *priv __attribute__((aligned(8))); +}; + +#endif /* _XT_TEE_TARGET_H */ diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h index 152e8f97..3f3d6936 100644 --- a/include/linux/netfilter/xt_TPROXY.h +++ b/include/linux/netfilter/xt_TPROXY.h @@ -1,5 +1,5 @@ -#ifndef _XT_TPROXY_H_target -#define _XT_TPROXY_H_target +#ifndef _XT_TPROXY_H +#define _XT_TPROXY_H /* TPROXY target is capable of marking the packet to perform * redirection. We can get rid of that whenever we get support for @@ -11,4 +11,11 @@ struct xt_tproxy_target_info { __be16 lport; }; -#endif /* _XT_TPROXY_H_target */ +struct xt_tproxy_target_info_v1 { + u_int32_t mark_mask; + u_int32_t mark_value; + union nf_inet_addr laddr; + __be16 lport; +}; + +#endif /* _XT_TPROXY_H */ diff --git a/include/linux/netfilter/xt_connmark.h b/include/linux/netfilter/xt_connmark.h index 619e47cd..efc17a83 100644 --- a/include/linux/netfilter/xt_connmark.h +++ b/include/linux/netfilter/xt_connmark.h @@ -12,6 +12,17 @@ * (at your option) any later version. */ +enum { + XT_CONNMARK_SET = 0, + XT_CONNMARK_SAVE, + XT_CONNMARK_RESTORE +}; + +struct xt_connmark_tginfo1 { + __u32 ctmark, ctmask, nfmask; + __u8 mode; +}; + struct xt_connmark_mtinfo1 { __u32 mark, mask; __u8 invert; diff --git a/include/linux/netfilter/xt_mark.h b/include/linux/netfilter/xt_mark.h index 6607c8f3..ecadc40d 100644 --- a/include/linux/netfilter/xt_mark.h +++ b/include/linux/netfilter/xt_mark.h @@ -3,6 +3,10 @@ #include +struct xt_mark_tginfo2 { + __u32 mark, mask; +}; + struct xt_mark_mtinfo1 { __u32 mark, mask; __u8 invert; diff --git a/include/linux/netfilter/xt_qtaguid.h b/include/linux/netfilter/xt_qtaguid.h new file mode 100644 index 00000000..ca60fbde --- /dev/null +++ b/include/linux/netfilter/xt_qtaguid.h @@ -0,0 +1,13 @@ +#ifndef _XT_QTAGUID_MATCH_H +#define _XT_QTAGUID_MATCH_H + +/* For now we just replace the xt_owner. + * FIXME: make iptables aware of qtaguid. */ +#include + +#define XT_QTAGUID_UID XT_OWNER_UID +#define XT_QTAGUID_GID XT_OWNER_GID +#define XT_QTAGUID_SOCKET XT_OWNER_SOCKET +#define xt_qtaguid_match_info xt_owner_match_info + +#endif /* _XT_QTAGUID_MATCH_H */ diff --git a/include/linux/netfilter/xt_quota2.h b/include/linux/netfilter/xt_quota2.h new file mode 100644 index 00000000..eadc6903 --- /dev/null +++ b/include/linux/netfilter/xt_quota2.h @@ -0,0 +1,25 @@ +#ifndef _XT_QUOTA_H +#define _XT_QUOTA_H + +enum xt_quota_flags { + XT_QUOTA_INVERT = 1 << 0, + XT_QUOTA_GROW = 1 << 1, + XT_QUOTA_PACKET = 1 << 2, + XT_QUOTA_NO_CHANGE = 1 << 3, + XT_QUOTA_MASK = 0x0F, +}; + +struct xt_quota_counter; + +struct xt_quota_mtinfo2 { + char name[15]; + u_int8_t flags; + + /* Comparison-invariant */ + aligned_u64 quota; + + /* Used internally by the kernel */ + struct xt_quota_counter *master __attribute__((aligned(8))); +}; + +#endif /* _XT_QUOTA_H */ diff --git a/include/linux/netfilter/xt_socket.h b/include/linux/netfilter/xt_socket.h index 6f475b8f..b21f2c23 100644 --- a/include/linux/netfilter/xt_socket.h +++ b/include/linux/netfilter/xt_socket.h @@ -9,4 +9,10 @@ struct xt_socket_mtinfo1 { __u8 flags; }; +void xt_socket_put_sk(struct sock *sk); +struct sock *xt_socket_get4_sk(const struct sk_buff *skb, + struct xt_action_param *par); +struct sock *xt_socket_get6_sk(const struct sk_buff *skb, + struct xt_action_param *par); + #endif /* _XT_SOCKET_H */ diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 6fe3e6aa..29b0633e 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -214,9 +214,11 @@ static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e return (void *)e + e->target_offset; } +#ifndef __KERNEL__ /* fn returns 0 to continue iteration */ #define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \ XT_ENTRY_ITERATE(struct arpt_entry, entries, size, fn, ## args) +#endif /* * Main firewall chains definitions and global var's definitions. @@ -264,6 +266,7 @@ struct arpt_error .target.errorname = "ERROR", \ } +extern void *arpt_alloc_initial_table(const struct xt_table *); extern struct xt_table *arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl); @@ -297,14 +300,6 @@ compat_arpt_get_target(struct compat_arpt_entry *e) #define COMPAT_ARPT_ALIGN(s) COMPAT_XT_ALIGN(s) -/* fn returns 0 to continue iteration */ -#define COMPAT_ARPT_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct compat_arpt_entry, entries, size, fn, ## args) - -#define COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entries, size, n, fn, args...) \ - XT_ENTRY_ITERATE_CONTINUE(struct compat_arpt_entry, entries, size, n, \ - fn, ## args) - #endif /* CONFIG_COMPAT */ #endif /*__KERNEL__*/ #endif /* _ARPTABLES_H */ diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index ea281e6a..4cf4149b 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -302,7 +302,7 @@ struct ebt_table ~(__alignof__(struct ebt_replace)-1)) extern struct ebt_table *ebt_register_table(struct net *net, const struct ebt_table *table); -extern void ebt_unregister_table(struct ebt_table *table); +extern void ebt_unregister_table(struct net *net, struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 61fafc86..94f0b4e7 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -228,6 +228,7 @@ ipt_get_target(struct ipt_entry *e) return (void *)e + e->target_offset; } +#ifndef __KERNEL__ /* fn returns 0 to continue iteration */ #define IPT_MATCH_ITERATE(e, fn, args...) \ XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args) @@ -235,6 +236,7 @@ ipt_get_target(struct ipt_entry *e) /* fn returns 0 to continue iteration */ #define IPT_ENTRY_ITERATE(entries, size, fn, args...) \ XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args) +#endif /* * Main firewall chains definitions and global var's definitions. @@ -247,7 +249,7 @@ extern void ipt_init(void) __init; extern struct xt_table *ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl); -extern void ipt_unregister_table(struct xt_table *table); +extern void ipt_unregister_table(struct net *net, struct xt_table *table); /* Standard entry. */ struct ipt_standard @@ -290,6 +292,7 @@ struct ipt_error .target.errorname = "ERROR", \ } +extern void *ipt_alloc_initial_table(const struct xt_table *); extern unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, @@ -321,19 +324,6 @@ compat_ipt_get_target(struct compat_ipt_entry *e) #define COMPAT_IPT_ALIGN(s) COMPAT_XT_ALIGN(s) -/* fn returns 0 to continue iteration */ -#define COMPAT_IPT_MATCH_ITERATE(e, fn, args...) \ - XT_MATCH_ITERATE(struct compat_ipt_entry, e, fn, ## args) - -/* fn returns 0 to continue iteration */ -#define COMPAT_IPT_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct compat_ipt_entry, entries, size, fn, ## args) - -/* fn returns 0 to continue iteration */ -#define COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entries, size, n, fn, args...) \ - XT_ENTRY_ITERATE_CONTINUE(struct compat_ipt_entry, entries, size, n, \ - fn, ## args) - #endif /* CONFIG_COMPAT */ #endif /*__KERNEL__*/ #endif /* _IPTABLES_H */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index a64e1451..97e649e5 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -288,6 +288,7 @@ ip6t_get_target(struct ip6t_entry *e) return (void *)e + e->target_offset; } +#ifndef __KERNEL__ /* fn returns 0 to continue iteration */ #define IP6T_MATCH_ITERATE(e, fn, args...) \ XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) @@ -295,6 +296,7 @@ ip6t_get_target(struct ip6t_entry *e) /* fn returns 0 to continue iteration */ #define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) +#endif /* * Main firewall chains definitions and global var's definitions. @@ -305,10 +307,11 @@ ip6t_get_target(struct ip6t_entry *e) #include extern void ip6t_init(void) __init; +extern void *ip6t_alloc_initial_table(const struct xt_table *); extern struct xt_table *ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl); -extern void ip6t_unregister_table(struct xt_table *table); +extern void ip6t_unregister_table(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, @@ -349,18 +352,6 @@ compat_ip6t_get_target(struct compat_ip6t_entry *e) #define COMPAT_IP6T_ALIGN(s) COMPAT_XT_ALIGN(s) -/* fn returns 0 to continue iteration */ -#define COMPAT_IP6T_MATCH_ITERATE(e, fn, args...) \ - XT_MATCH_ITERATE(struct compat_ip6t_entry, e, fn, ## args) - -/* fn returns 0 to continue iteration */ -#define COMPAT_IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct compat_ip6t_entry, entries, size, fn, ## args) - -#define COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entries, size, n, fn, args...) \ - XT_ENTRY_ITERATE_CONTINUE(struct compat_ip6t_entry, entries, size, n, \ - fn, ## args) - #endif /* CONFIG_COMPAT */ #endif /*__KERNEL__*/ #endif /* _IP6_TABLES_H */ diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 44428d24..5ecdb50b 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -201,6 +201,7 @@ static inline int notifier_to_errno(int ret) #define NETDEV_PRE_UP 0x000D #define NETDEV_BONDING_OLDTYPE 0x000E #define NETDEV_BONDING_NEWTYPE 0x000F +#define NETDEV_NOTIFY_PEERS 0x0013 #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3ebd0b7b..a475f64c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -84,14 +84,122 @@ extern int rcu_scheduler_active; } while (0) #ifdef CONFIG_DEBUG_LOCK_ALLOC + extern struct lockdep_map rcu_lock_map; -# define rcu_read_acquire() \ - lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_acquire() \ + lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) # define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) -#else -# define rcu_read_acquire() do { } while (0) -# define rcu_read_release() do { } while (0) -#endif + +extern struct lockdep_map rcu_bh_lock_map; +# define rcu_read_acquire_bh() \ + lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_release_bh() lock_release(&rcu_bh_lock_map, 1, _THIS_IP_) + +extern struct lockdep_map rcu_sched_lock_map; +# define rcu_read_acquire_sched() \ + lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_release_sched() \ + lock_release(&rcu_sched_lock_map, 1, _THIS_IP_) + +/** + * rcu_read_lock_held - might we be in RCU read-side critical section? + * + * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in + * an RCU read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * this assumes we are in an RCU read-side critical section unless it can + * prove otherwise. + */ +static inline int rcu_read_lock_held(void) +{ + if (debug_locks) + return lock_is_held(&rcu_lock_map); + return 1; +} + +/** + * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? + * + * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in + * an RCU-bh read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * this assumes we are in an RCU-bh read-side critical section unless it can + * prove otherwise. + */ +static inline int rcu_read_lock_bh_held(void) +{ + if (debug_locks) + return lock_is_held(&rcu_bh_lock_map); + return 1; +} + +/** + * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? + * + * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in an + * RCU-sched read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * this assumes we are in an RCU-sched read-side critical section unless it + * can prove otherwise. Note that disabling of preemption (including + * disabling irqs) counts as an RCU-sched read-side critical section. + */ +static inline int rcu_read_lock_sched_held(void) +{ + int lockdep_opinion = 0; + + if (debug_locks) + lockdep_opinion = lock_is_held(&rcu_sched_lock_map); + return lockdep_opinion || preempt_count() != 0; +} + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +# define rcu_read_acquire_bh() do { } while (0) +# define rcu_read_release_bh() do { } while (0) +# define rcu_read_acquire_sched() do { } while (0) +# define rcu_read_release_sched() do { } while (0) + +static inline int rcu_read_lock_held(void) +{ + return 1; +} + +static inline int rcu_read_lock_bh_held(void) +{ + return 1; +} + +static inline int rcu_read_lock_sched_held(void) +{ + return preempt_count() != 0; +} + +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +#ifdef CONFIG_PROVE_RCU + +extern int rcu_my_thread_group_empty(void); + +/** + * rcu_dereference_check - rcu_dereference with debug checking + * + * Do an rcu_dereference(), but check that the context is correct. + * For example, rcu_dereference_check(gp, rcu_read_lock_held()) to + * ensure that the rcu_dereference_check() executes within an RCU + * read-side critical section. It is also possible to check for + * locks being held, for example, by using lockdep_is_held(). + */ +#define rcu_dereference_check(p, c) \ + ({ \ + if (debug_locks && !(c)) \ + lockdep_rcu_dereference(__FILE__, __LINE__); \ + rcu_dereference_raw(p); \ + }) + +#else /* #ifdef CONFIG_PROVE_RCU */ + +#define rcu_dereference_check(p, c) rcu_dereference_raw(p) + +#endif /* #else #ifdef CONFIG_PROVE_RCU */ /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. @@ -166,7 +274,7 @@ static inline void rcu_read_lock_bh(void) { __rcu_read_lock_bh(); __acquire(RCU_BH); - rcu_read_acquire(); + rcu_read_acquire_bh(); } /* @@ -176,7 +284,7 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { - rcu_read_release(); + rcu_read_release_bh(); __release(RCU_BH); __rcu_read_unlock_bh(); } @@ -194,7 +302,7 @@ static inline void rcu_read_lock_sched(void) { preempt_disable(); __acquire(RCU_SCHED); - rcu_read_acquire(); + rcu_read_acquire_sched(); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ @@ -211,7 +319,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void) */ static inline void rcu_read_unlock_sched(void) { - rcu_read_release(); + rcu_read_release_sched(); __release(RCU_SCHED); preempt_enable(); } @@ -225,21 +333,48 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** - * rcu_dereference - fetch an RCU-protected pointer in an - * RCU read-side critical section. This pointer may later - * be safely dereferenced. + * rcu_dereference_raw - fetch an RCU-protected pointer + * + * The caller must be within some flavor of RCU read-side critical + * section, or must be otherwise preventing the pointer from changing, + * for example, by holding an appropriate lock. This pointer may later + * be safely dereferenced. It is the caller's responsibility to have + * done the right thing, as this primitive does no checking of any kind. * * Inserts memory barriers on architectures that require them * (currently only the Alpha), and, more importantly, documents * exactly which pointers are protected by RCU. */ - -#define rcu_dereference(p) ({ \ +#define rcu_dereference_raw(p) ({ \ typeof(p) _________p1 = ACCESS_ONCE(p); \ smp_read_barrier_depends(); \ (_________p1); \ }) +/** + * rcu_dereference - fetch an RCU-protected pointer, checking for RCU + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference(p) \ + rcu_dereference_check(p, rcu_read_lock_held()) + +/** + * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_bh(p) \ + rcu_dereference_check(p, rcu_read_lock_bh_held()) + +/** + * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_sched(p) \ + rcu_dereference_check(p, rcu_read_lock_sched_held()) + /** * rcu_assign_pointer - assign (publicize) a pointer to a newly * initialized structure that will be dereferenced by RCU read-side diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index adf2068d..e83a5600 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -754,6 +754,9 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); +#ifdef CONFIG_PROVE_LOCKING +extern int lockdep_rtnl_is_held(void); +#endif /* #ifdef CONFIG_PROVE_LOCKING */ extern void rtnetlink_init(void); extern void __rtnl_unlock(void); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 0f7c3782..45375b41 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -177,7 +177,9 @@ extern int unregister_inet6addr_notifier(struct notifier_block *nb); static inline struct inet6_dev * __in6_dev_get(struct net_device *dev) { - return rcu_dereference(dev->ip6_ptr); + return rcu_dereference_check(dev->ip6_ptr, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); } static inline struct inet6_dev * diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 1614d78c..861045fb 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -10,6 +10,7 @@ extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); extern void wait_for_unix_gc(void); +extern struct sock *unix_get_socket(struct file *filp); #define UNIX_HASH_SIZE 256 @@ -56,6 +57,7 @@ struct unix_sock { spinlock_t lock; unsigned int gc_candidate : 1; unsigned int gc_maybe_cycle : 1; + unsigned char recursion_level; wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/include/net/ip.h b/include/net/ip.h index 69db9430..18c47c4d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -341,8 +341,11 @@ enum ip_defrag_users IP_DEFRAG_LOCAL_DELIVER, IP_DEFRAG_CALL_RA_CHAIN, IP_DEFRAG_CONNTRACK_IN, + __IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHORT_MAX, IP_DEFRAG_CONNTRACK_OUT, + __IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHORT_MAX, IP_DEFRAG_CONNTRACK_BRIDGE_IN, + __IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX, IP_DEFRAG_VS_IN, IP_DEFRAG_VS_OUT, IP_DEFRAG_VS_FWD diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 639bbf06..76c9bba7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -357,8 +357,11 @@ struct inet_frag_queue; enum ip6_defrag_users { IP6_DEFRAG_LOCAL_DELIVER, IP6_DEFRAG_CONNTRACK_IN, + __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHORT_MAX, IP6_DEFRAG_CONNTRACK_OUT, + __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHORT_MAX, IP6_DEFRAG_CONNTRACK_BRIDGE_IN, + __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX, }; struct ip6_create_arg { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 998c30fc..a2bd2f98 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -908,6 +908,9 @@ enum ieee80211_tkip_key_type { * @IEEE80211_HW_BEACON_FILTER: * Hardware supports dropping of irrelevant beacon frames to * avoid waking up cpu. + * @IEEE80211_HW_REPORTS_TX_ACK_STATUS: + * Hardware can provide ack status reports of Tx frames to + * the stack. */ enum ieee80211_hw_flags { IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, @@ -924,6 +927,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_DYNAMIC_PS = 1<<12, IEEE80211_HW_MFP_CAPABLE = 1<<13, IEEE80211_HW_BEACON_FILTER = 1<<14, + IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<15, }; /** @@ -1696,6 +1700,12 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb); */ void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb); +/* + * The TX headroom reserved by mac80211 for its own tx_status functions. + * This is enough for the radiotap header. + */ +#define IEEE80211_TX_STATUS_HEADROOM 13 + /** * ieee80211_tx_status - transmit status callback * diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index a1202841..457c6114 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -28,6 +28,10 @@ struct ctl_table_header; struct net_generic; struct sock; + +#define NETDEV_HASHBITS 8 +#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) + struct net { atomic_t count; /* To decided when the network * namespace should be freed. @@ -38,7 +42,8 @@ struct net { */ #endif struct list_head list; /* list of network namespaces */ - struct work_struct work; /* work struct for freeing */ + struct list_head cleanup_list; /* namespaces on death row */ + struct list_head exit_list; /* Use only net_mutex */ struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; @@ -76,6 +81,8 @@ struct net { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif + struct sock *nfnl; + struct sock *nfnl_stash; #endif #ifdef CONFIG_XFRM struct netns_xfrm xfrm; @@ -232,6 +239,9 @@ struct pernet_operations { struct list_head list; int (*init)(struct net *net); void (*exit)(struct net *net); + void (*exit_batch)(struct list_head *net_exit_list); + int *id; + size_t size; }; /* diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5cf7270e..21133fb7 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -198,7 +198,8 @@ extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int null extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size); extern struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); +__nf_conntrack_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); @@ -260,18 +261,29 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, u32 seq); /* Fake conntrack entry for untracked connections */ -extern struct nf_conn nf_conntrack_untracked; +static inline struct nf_conn *nf_ct_untracked_get(void) +{ + extern struct nf_conn nf_conntrack_untracked; + + return &nf_conntrack_untracked; +} +extern void nf_ct_untracked_status_or(unsigned long bits); /* Iterate over all conntracks: if iter returns true, it's deleted. */ extern void nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data); extern void nf_conntrack_free(struct nf_conn *ct); extern struct nf_conn * -nf_conntrack_alloc(struct net *net, +nf_conntrack_alloc(struct net *net, u16 zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); +static inline int nf_ct_is_template(const struct nf_conn *ct) +{ + return test_bit(IPS_TEMPLATE_BIT, &ct->status); +} + /* It's confirmed if it is, or has been in the hash table. */ static inline int nf_ct_is_confirmed(struct nf_conn *ct) { @@ -283,9 +295,9 @@ static inline int nf_ct_is_dying(struct nf_conn *ct) return test_bit(IPS_DYING_BIT, &ct->status); } -static inline int nf_ct_is_untracked(const struct sk_buff *skb) +static inline int nf_ct_is_untracked(const struct nf_conn *ct) { - return (skb->nfct == &nf_conntrack_untracked.ct_general); + return test_bit(IPS_UNTRACKED_BIT, &ct->status); } extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 5a449b44..9d89a87a 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -49,7 +49,8 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ extern struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_conntrack_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); extern int __nf_conntrack_confirm(struct sk_buff *skb); @@ -59,7 +60,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) struct nf_conn *ct = (struct nf_conn *)skb->nfct; int ret = NF_ACCEPT; - if (ct && ct != &nf_conntrack_untracked) { + if (ct && !nf_ct_is_untracked(ct)) { if (!nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) ret = __nf_conntrack_confirm(skb); if (likely(ret == NF_ACCEPT)) diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 4f20d58e..96ba5f7d 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -12,28 +12,12 @@ #include #include -/* Connection tracking event types */ -enum ip_conntrack_events -{ - IPCT_NEW = 0, /* new conntrack */ - IPCT_RELATED = 1, /* related conntrack */ - IPCT_DESTROY = 2, /* destroyed conntrack */ - IPCT_STATUS = 3, /* status has changed */ - IPCT_PROTOINFO = 4, /* protocol information has changed */ - IPCT_HELPER = 5, /* new helper has been set */ - IPCT_MARK = 6, /* new mark has been set */ - IPCT_NATSEQADJ = 7, /* NAT is doing sequence adjustment */ - IPCT_SECMARK = 8, /* new security mark has been set */ -}; - -enum ip_conntrack_expect_events { - IPEXP_NEW = 0, /* new expectation */ -}; - struct nf_conntrack_ecache { - unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ - u32 pid; /* netlink pid of destroyer */ + unsigned long cache; /* bitops want long */ + unsigned long missed; /* missed events */ + u16 ctmask; /* bitmask of ct events to be delivered */ + u16 expmask; /* bitmask of expect events to be delivered */ + u32 pid; /* netlink pid of destroyer */ }; static inline struct nf_conntrack_ecache * @@ -43,14 +27,24 @@ nf_ct_ecache_find(const struct nf_conn *ct) } static inline struct nf_conntrack_ecache * -nf_ct_ecache_ext_add(struct nf_conn *ct, gfp_t gfp) +nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) { struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *e; - if (!net->ct.sysctl_events) + if (!ctmask && !expmask && net->ct.sysctl_events) { + ctmask = ~0; + expmask = ~0; + } + if (!ctmask && !expmask) return NULL; - return nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + if (e) { + e->ctmask = ctmask; + e->expmask = expmask; + } + return e; }; #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -83,6 +77,9 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) if (e == NULL) return; + if (!(e->ctmask & (1 << event))) + return; + set_bit(event, &e->cache); } @@ -93,7 +90,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask, int report) { int ret = 0; - struct net *net = nf_ct_net(ct); struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; @@ -102,9 +98,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask, if (notify == NULL) goto out_unlock; - if (!net->ct.sysctl_events) - goto out_unlock; - e = nf_ct_ecache_find(ct); if (e == NULL) goto out_unlock; @@ -118,6 +111,9 @@ nf_conntrack_eventmask_report(unsigned int eventmask, /* This is a resent of a destroy event? If so, skip missed */ unsigned long missed = e->pid ? 0 : e->missed; + if (!((eventmask | missed) & e->ctmask)) + goto out_unlock; + ret = notify->fcn(eventmask | missed, &item); if (unlikely(ret < 0 || missed)) { spin_lock_bh(&ct->lock); @@ -173,18 +169,19 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event, u32 pid, int report) { - struct net *net = nf_ct_exp_net(exp); struct nf_exp_event_notifier *notify; + struct nf_conntrack_ecache *e; rcu_read_lock(); notify = rcu_dereference(nf_expect_event_cb); if (notify == NULL) goto out_unlock; - if (!net->ct.sysctl_events) + e = nf_ct_ecache_find(exp->master); + if (e == NULL) goto out_unlock; - { + if (e->expmask & (1 << event)) { struct nf_exp_event item = { .exp = exp, .pid = pid, diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index a9652806..896c7671 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -79,13 +79,16 @@ int nf_conntrack_expect_init(struct net *net); void nf_conntrack_expect_fini(struct net *net); struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple); +__nf_ct_expect_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_ct_expect_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_ct_find_expectation(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 7f8fc5d1..a9bd9a54 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -9,6 +9,7 @@ enum nf_ct_ext_id NF_CT_EXT_NAT, NF_CT_EXT_ACCT, NF_CT_EXT_ECACHE, + NF_CT_EXT_ZONE, NF_CT_EXT_NUM, }; @@ -16,6 +17,7 @@ enum nf_ct_ext_id #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache +#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 1b706800..8307c164 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -41,14 +41,18 @@ struct nf_conntrack_helper }; extern struct nf_conntrack_helper * -__nf_conntrack_helper_find_byname(const char *name); +__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); + +extern struct nf_conntrack_helper * +nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); -extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags); +extern int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags); extern void nf_ct_helper_destroy(struct nf_conn *ct); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 3767fb41..555a3fdb 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -50,8 +50,8 @@ struct nf_conntrack_l4proto /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); - int (*error)(struct net *net, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, + int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum); /* Print out the per-protocol part of the tuple. Return like seq_* */ diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h new file mode 100644 index 00000000..0bbb2bd5 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -0,0 +1,23 @@ +#ifndef _NF_CONNTRACK_ZONES_H +#define _NF_CONNTRACK_ZONES_H + +#include + +#define NF_CT_DEFAULT_ZONE 0 + +struct nf_conntrack_zone { + u16 id; +}; + +static inline u16 nf_ct_zone(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone *nf_ct_zone; + nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); + if (nf_ct_zone) + return nf_ct_zone->id; +#endif + return NF_CT_DEFAULT_ZONE; +} + +#endif /* _NF_CONNTRACK_ZONES_H */ diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index 208b46f4..a41659a5 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -8,25 +8,131 @@ #include #include +#define NFT_LOOKUP_ANY 0 +#define NFT_LOOKUP_LISTENER 1 +#define NFT_LOOKUP_ESTABLISHED 2 + /* look up and get a reference to a matching socket */ -extern struct sock * + + +/* This function is used by the 'TPROXY' target and the 'socket' + * match. The following lookups are supported: + * + * Explicit TProxy target rule + * =========================== + * + * This is used when the user wants to intercept a connection matching + * an explicit iptables rule. In this case the sockets are assumed + * matching in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple, it is returned, assuming the redirection + * already took place and we process a packet belonging to an + * established connection + * + * - match: if there's a listening socket matching the redirection + * (e.g. on-port & on-ip of the connection), it is returned, + * regardless if it was bound to 0.0.0.0 or an explicit + * address. The reasoning is that if there's an explicit rule, it + * does not really matter if the listener is bound to an interface + * or to 0. The user already stated that he wants redirection + * (since he added the rule). + * + * "socket" match based redirection (no specific rule) + * =================================================== + * + * There are connections with dynamic endpoints (e.g. FTP data + * connection) that the user is unable to add explicit rules + * for. These are taken care of by a generic "socket" rule. It is + * assumed that the proxy application is trusted to open such + * connections without explicit iptables rule (except of course the + * generic 'socket' rule). In this case the following sockets are + * matched in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple + * + * - match: if there's a non-zero bound listener (possibly with a + * non-local address) We don't accept zero-bound listeners, since + * then local services could intercept traffic going through the + * box. + * + * Please note that there's an overlap between what a TPROXY target + * and a socket match will match. Normally if you have both rules the + * "socket" match will be the first one, effectively all packets + * belonging to established connections going through that one. + */ +static inline struct sock * nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, - const struct net_device *in, bool listening); - -static inline void -nf_tproxy_put_sock(struct sock *sk) + const struct net_device *in, int lookup_type) { - /* TIME_WAIT inet sockets have to be handled differently */ - if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT)) - inet_twsk_put(inet_twsk(sk)); - else - sock_put(sk); + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_ANY: + sk = __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case NFT_LOOKUP_LISTENER: + sk = inet_lookup_listener(net, &tcp_hashinfo, + daddr, dport, + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + + break; + case NFT_LOOKUP_ESTABLISHED: + sk = inet_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + break; + } + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk && lookup_type != NFT_LOOKUP_ANY) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = (inet_sk(sk)->rcv_saddr == 0); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); + + return sk; } + /* assign a socket to the skb -- consumes sk */ -int +void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk); #endif diff --git a/include/net/netlink.h b/include/net/netlink.h index a63b2192..c344646b 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -384,7 +384,7 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, * * Returns the first attribute which matches the specified type. */ -static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh, +static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, int hdrlen, int attrtype) { return nla_find(nlmsg_attrdata(nlh, hdrlen), diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 8be5135f..2c55a7ea 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -107,6 +107,7 @@ typedef enum { SCTP_CMD_T1_RETRAN, /* Mark for retransmission after T1 timeout */ SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */ SCTP_CMD_SEND_MSG, /* Send the whole use message */ + SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ SCTP_CMD_LAST } sctp_verb_t; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index c1dd8936..76abe6cb 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -278,6 +278,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(unsigned long peer); void sctp_generate_heartbeat_event(unsigned long peer); +void sctp_generate_proto_unreach_event(unsigned long peer); void sctp_ootb_pkt_free(struct sctp_packet *); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0a474568..2eb2154b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -772,6 +772,7 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len, struct iovec *data); void sctp_chunk_free(struct sctp_chunk *); void *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data); +void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, const void *data); struct sctp_chunk *sctp_chunkify(struct sk_buff *, const struct sctp_association *, struct sock *); @@ -987,7 +988,7 @@ struct sctp_transport { int init_sent_count; /* state : The current state of this destination, - * : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKOWN. + * : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKNOWN. */ int state; @@ -1007,6 +1008,9 @@ struct sctp_transport { /* Heartbeat timer is per destination. */ struct timer_list hb_timer; + /* Timer to handle ICMP proto unreachable envets */ + struct timer_list proto_unreach_timer; + /* Since we're using per-destination retransmission timers * (see above), we're also using per-destination "transmitted" * queues. This probably ought to be a private struct diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h new file mode 100644 index 00000000..d97f6892 --- /dev/null +++ b/include/net/secure_seq.h @@ -0,0 +1,20 @@ +#ifndef _NET_SECURE_SEQ +#define _NET_SECURE_SEQ + +#include + +extern __u32 secure_ip_id(__be32 daddr); +extern __u32 secure_ipv6_id(const __be32 daddr[4]); +extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport); +extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport); +extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +extern u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport); + +#endif /* _NET_SECURE_SEQ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b9648890..b005163a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -259,11 +259,21 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3) return seq3 - seq2 >= seq1 - seq2; } -static inline int tcp_too_many_orphans(struct sock *sk, int num) +static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { - return (num > sysctl_tcp_max_orphans) || - (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]); + struct percpu_counter *ocp = sk->sk_prot->orphan_count; + int orphans = percpu_counter_read_positive(ocp); + + if (orphans << shift > sysctl_tcp_max_orphans) { + orphans = percpu_counter_sum_positive(ocp); + if (orphans << shift > sysctl_tcp_max_orphans) + return true; + } + + if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && + atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) + return true; + return false; } /* syncookies: remember time of last synqueue overflow */ @@ -501,8 +511,22 @@ extern unsigned int tcp_current_mss(struct sock *sk); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) { - if (tp->max_window && pktsize > (tp->max_window >> 1)) - return max(tp->max_window >> 1, 68U - tp->tcp_header_len); + int cutoff; + + /* When peer uses tiny windows, there is no use in packetizing + * to sub-MSS pieces for the sake of SWS or making sure there + * are enough packets in the pipe for fast recovery. + * + * On the other hand, for extremely large MSS devices, handling + * smaller than MSS windows in this way does make sense. + */ + if (tp->max_window >= 512) + cutoff = (tp->max_window >> 1); + else + cutoff = tp->max_window; + + if (cutoff && pktsize > cutoff) + return max_t(int, cutoff, 68U - tp->tcp_header_len); else return pktsize; } @@ -1260,7 +1284,7 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) /* This function calculates a "timeout" which is equivalent to the timeout of a - * TCP connection after "boundary" unsucessful, exponentially backed-off + * TCP connection after "boundary" unsuccessful, exponentially backed-off * retransmissions with an initial RTO of TCP_RTO_MIN. */ static inline bool retransmits_timed_out(struct sock *sk, diff --git a/include/net/wimax.h b/include/net/wimax.h index 2af7bf83..3b07f6aa 100644 --- a/include/net/wimax.h +++ b/include/net/wimax.h @@ -79,7 +79,7 @@ * drivers have to only report state changes due to external * conditions. * - * All API operations are 'atomic', serialized thorough a mutex in the + * All API operations are 'atomic', serialized through a mutex in the * `struct wimax_dev`. * * EXPORTING TO USER SPACE THROUGH GENERIC NETLINK diff --git a/include/net/x25.h b/include/net/x25.h index 2cda0401..21926a04 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -182,6 +182,10 @@ extern int sysctl_x25_clear_request_timeout; extern int sysctl_x25_ack_holdback_timeout; extern int sysctl_x25_forward; +extern int x25_parse_address_block(struct sk_buff *skb, + struct x25_address *called_addr, + struct x25_address *calling_addr); + extern int x25_addr_ntoa(unsigned char *, struct x25_address *, struct x25_address *); extern int x25_addr_aton(unsigned char *, struct x25_address *, diff --git a/lib/md5.c b/lib/md5.c new file mode 100644 index 00000000..c777180e --- /dev/null +++ b/lib/md5.c @@ -0,0 +1,95 @@ +#include +#include +#include + +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +#define MD5STEP(f, w, x, y, z, in, s) \ + (w += f(x, y, z) + in, w = (w<>(32-s)) + x) + +void md5_transform(__u32 *hash, __u32 const *in) +{ + u32 a, b, c, d; + + a = hash[0]; + b = hash[1]; + c = hash[2]; + d = hash[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + hash[0] += a; + hash[1] += b; + hash[2] += c; + hash[3] += d; +} +EXPORT_SYMBOL(md5_transform); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 58525bd7..455f3992 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -477,6 +477,9 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; + if (addr->sin_family != AF_INET) + goto out; + chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 1560ad9b..cf24c6df 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -54,7 +54,6 @@ #include #endif #include -#include #include #include @@ -374,9 +373,6 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); - /* Start persistent interface stat monitoring. Ignores if loopback. */ - create_iface_stat(in_dev); - return 0; } @@ -1036,6 +1032,21 @@ static inline bool inetdev_valid_mtu(unsigned mtu) return mtu >= 68; } +static void inetdev_send_gratuitous_arp(struct net_device *dev, + struct in_device *in_dev) + +{ + struct in_ifaddr *ifa = in_dev->ifa_list; + + if (!ifa) + return; + + arp_send(ARPOP_REQUEST, ETH_P_ARP, + ifa->ifa_address, dev, + ifa->ifa_address, NULL, + dev->dev_addr, NULL); +} + /* Called only under RTNL semaphore */ static int inetdev_event(struct notifier_block *this, unsigned long event, @@ -1088,16 +1099,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ip_mc_up(in_dev); /* fall through */ case NETDEV_CHANGEADDR: + if (!IN_DEV_ARP_NOTIFY(in_dev)) + break; + /* fall through */ + case NETDEV_NOTIFY_PEERS: /* Send gratuitous ARP to notify of link change */ - if (IN_DEV_ARP_NOTIFY(in_dev)) { - struct in_ifaddr *ifa = in_dev->ifa_list; - - if (ifa) - arp_send(ARPOP_REQUEST, ETH_P_ARP, - ifa->ifa_address, dev, - ifa->ifa_address, NULL, - dev->dev_addr, NULL); - } + inetdev_send_gratuitous_arp(dev, in_dev); break; case NETDEV_DOWN: ip_mc_down(in_dev); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 169da933..cb1e1cbe 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1899,8 +1899,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct err = -EADDRNOTAVAIL; for (pmc=inet->mc_list; pmc; pmc=pmc->next) { - if (pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr - && pmc->multi.imr_ifindex == imr.imr_ifindex) + if ((pmc->multi.imr_multiaddr.s_addr == + imr.imr_multiaddr.s_addr) && + (pmc->multi.imr_ifindex == imr.imr_ifindex)) break; } if (!pmc) { /* must have a prior join */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index a706a47f..dba56d29 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -436,7 +436,7 @@ static int valid_cc(const void *bc, int len, int cc) return 0; if (cc == len) return 1; - if (op->yes < 4) + if (op->yes < 4 || op->yes & 3) return 0; len -= op->yes; bc += op->yes; @@ -446,11 +446,11 @@ static int valid_cc(const void *bc, int len, int cc) static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) { - const unsigned char *bc = bytecode; + const void *bc = bytecode; int len = bytecode_len; while (len > 0) { - struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc; + const struct inet_diag_bc_op *op = bc; //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { @@ -461,22 +461,20 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) case INET_DIAG_BC_S_LE: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; case INET_DIAG_BC_JMP: - if (op->no < 4 || op->no > len + 4) + if (op->no < 4 || op->no > len + 4 || op->no & 3) return -EINVAL; if (op->no < len && !valid_cc(bytecode, bytecode_len, len - op->no)) return -EINVAL; break; case INET_DIAG_BC_NOP: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; break; default: return -EINVAL; } + if (op->yes < 4 || op->yes > len + 4 || op->yes & 3) + return -EINVAL; bc += op->yes; len -= op->yes; } @@ -489,9 +487,11 @@ static int inet_csk_diag_dump(struct sock *sk, { struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { + if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { struct inet_diag_entry entry; - struct rtattr *bc = (struct rtattr *)(r + 1); + const struct nlattr *bc = nlmsg_find_attr(cb->nlh, + sizeof(*r), + INET_DIAG_REQ_BYTECODE); struct inet_sock *inet = inet_sk(sk); entry.family = sk->sk_family; @@ -511,7 +511,7 @@ static int inet_csk_diag_dump(struct sock *sk, entry.dport = ntohs(inet->dport); entry.userlocks = sk->sk_userlocks; - if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) + if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry)) return 0; } @@ -526,9 +526,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, { struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { + if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { struct inet_diag_entry entry; - struct rtattr *bc = (struct rtattr *)(r + 1); + const struct nlattr *bc = nlmsg_find_attr(cb->nlh, + sizeof(*r), + INET_DIAG_REQ_BYTECODE); entry.family = tw->tw_family; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) @@ -547,7 +549,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, entry.dport = ntohs(tw->tw_dport); entry.userlocks = 0; - if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) + if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry)) return 0; } @@ -617,7 +619,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, struct inet_diag_req *r = NLMSG_DATA(cb->nlh); struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt; - struct rtattr *bc = NULL; + const struct nlattr *bc = NULL; struct inet_sock *inet = inet_sk(sk); int j, s_j; int reqnum, s_reqnum; @@ -637,8 +639,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (!lopt || !lopt->qlen) goto out; - if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { - bc = (struct rtattr *)(r + 1); + if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { + bc = nlmsg_find_attr(cb->nlh, sizeof(*r), + INET_DIAG_REQ_BYTECODE); entry.sport = inet->num; entry.userlocks = sk->sk_userlocks; } @@ -671,8 +674,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, &ireq->rmt_addr; entry.dport = ntohs(ireq->rmt_port); - if (!inet_diag_bc_run(RTA_DATA(bc), - RTA_PAYLOAD(bc), &entry)) + if (!inet_diag_bc_run(nla_data(bc), + nla_len(bc), &entry)) continue; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 625cc5f6..d7172674 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -21,6 +21,7 @@ #include #include +#include #include /* diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index b1fbe18f..13b229f4 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -19,6 +19,7 @@ #include #include #include +#include /* * Theory of operations. diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 14333385..f6145847 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1665,14 +1665,15 @@ static int __init ipgre_init(void) printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); - if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { - printk(KERN_INFO "ipgre init: can't add protocol\n"); - return -EAGAIN; - } - err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); if (err < 0) - goto gen_device_failed; + return err; + + err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); + if (err < 0) { + printk(KERN_INFO "ipgre init: can't add protocol\n"); + goto add_proto_failed; + } err = rtnl_link_register(&ipgre_link_ops); if (err < 0) @@ -1688,9 +1689,9 @@ out: tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); -gen_device_failed: inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); +add_proto_failed: + unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); goto out; } @@ -1698,9 +1699,9 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); + unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); } module_init(ipgre_init); @@ -1708,3 +1709,4 @@ module_exit(ipgre_fini); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); +MODULE_ALIAS_NETDEV("gre0"); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4d50daab..44b79107 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -476,9 +476,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) * we can switch to copy when see the first bad fragment. */ if (skb_has_frags(skb)) { - struct sk_buff *frag; + struct sk_buff *frag, *frag2; int first_len = skb_pagelen(skb); - int truesizes = 0; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -491,18 +490,18 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) - goto slow_path; + goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) - goto slow_path; + goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; } - truesizes += frag->truesize; + skb->truesize -= frag->truesize; } /* Everything is OK. Generate! */ @@ -512,7 +511,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb_shinfo(skb)->frag_list; skb_frag_list_init(skb); skb->data_len = first_len - skb_headlen(skb); - skb->truesize -= truesizes; skb->len = first_len; iph->tot_len = htons(first_len); iph->frag_off = htons(IP_MF); @@ -564,6 +562,15 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; + +slow_path_clean: + skb_walk_frags(skb, frag2) { + if (frag2 == frag) + break; + frag2->sk = NULL; + frag2->destructor = NULL; + skb->truesize += frag2->truesize; + } } slow_path: @@ -860,8 +867,10 @@ int ip_append_data(struct sock *sk, !exthdrlen) csummode = CHECKSUM_PARTIAL; + skb = skb_peek_tail(&sk->sk_write_queue); + inet->cork.length += length; - if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) && + if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->u.dst.dev->features & NETIF_F_UFO)) { err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, @@ -879,7 +888,7 @@ int ip_append_data(struct sock *sk, * adding appropriate IP header. */ - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + if (!skb) goto alloc_new_skb; while (length > 0) { @@ -1108,7 +1117,8 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, return -EINVAL; inet->cork.length += size; - if ((sk->sk_protocol == IPPROTO_UDP) && + if ((size + skb->len > mtu) && + (sk->sk_protocol == IPPROTO_UDP) && (rt->u.dst.dev->features & NETIF_F_UFO)) { skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ae40ed1b..f37df1a4 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -853,3 +853,4 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETDEV("tunl0"); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 1725dc0e..f53cb8df 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -155,10 +155,10 @@ static int nf_ip_reroute(struct sk_buff *skb, if (entry->hook == NF_INET_LOCAL_OUT) { const struct iphdr *iph = ip_hdr(skb); - if (!(iph->tos == rt_info->tos - && skb->mark == rt_info->mark - && iph->daddr == rt_info->daddr - && iph->saddr == rt_info->saddr)) + if (!(iph->tos == rt_info->tos && + skb->mark == rt_info->mark && + iph->daddr == rt_info->daddr && + iph->saddr == rt_info->saddr)) return ip_route_me_harder(skb, RTN_UNSPEC); } return 0; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 98442f30..06426ab5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -27,6 +27,7 @@ #include #include +#include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("David S. Miller "); @@ -58,6 +59,12 @@ do { \ #define ARP_NF_ASSERT(x) #endif +void *arpt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(arpt, ARPT); +} +EXPORT_SYMBOL_GPL(arpt_alloc_initial_table); + static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, const char *hdr_addr, int len) { @@ -217,7 +224,7 @@ static inline int arp_checkentry(const struct arpt_arp *arp) } static unsigned int -arpt_error(struct sk_buff *skb, const struct xt_target_param *par) +arpt_error(struct sk_buff *skb, const struct xt_action_param *par) { if (net_ratelimit()) printk("arp_tables: error: '%s'\n", @@ -226,7 +233,14 @@ arpt_error(struct sk_buff *skb, const struct xt_target_param *par) return NF_DROP; } -static inline struct arpt_entry *get_entry(void *base, unsigned int offset) +static inline const struct arpt_entry_target * +arpt_get_target_c(const struct arpt_entry *e) +{ + return arpt_get_target((struct arpt_entry *)e); +} + +static inline struct arpt_entry * +get_entry(const void *base, unsigned int offset) { return (struct arpt_entry *)(base + offset); } @@ -251,7 +265,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const char *indev, *outdev; void *table_base; const struct xt_table_info *private; - struct xt_target_param tgpar; + struct xt_action_param acpar; if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; @@ -266,14 +280,14 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); - tgpar.in = in; - tgpar.out = out; - tgpar.hooknum = hook; - tgpar.family = NFPROTO_ARP; + acpar.in = in; + acpar.out = out; + acpar.hooknum = hook; + acpar.family = NFPROTO_ARP; arp = arp_hdr(skb); do { - struct arpt_entry_target *t; + const struct arpt_entry_target *t; int hdr_len; if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { @@ -285,7 +299,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, (2 * skb->dev->addr_len); ADD_COUNTER(e->counters, hdr_len, 1); - t = arpt_get_target(e); + t = arpt_get_target_c(e); /* Standard target? */ if (!t->u.kernel.target->target) { @@ -319,9 +333,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, /* Targets which reenter must return * abs. verdicts */ - tgpar.target = t->u.kernel.target; - tgpar.targinfo = t->data; - verdict = t->u.kernel.target->target(skb, &tgpar); + acpar.target = t->u.kernel.target; + acpar.targinfo = t->data; + verdict = t->u.kernel.target->target(skb, &acpar); /* Target might have changed stuff. */ arp = arp_hdr(skb); @@ -351,7 +365,7 @@ static inline bool unconditional(const struct arpt_arp *arp) /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. */ -static int mark_source_chains(struct xt_table_info *newinfo, +static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { unsigned int hook; @@ -372,7 +386,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, for (;;) { const struct arpt_standard_target *t - = (void *)arpt_get_target(e); + = (void *)arpt_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { @@ -456,7 +470,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, return 1; } -static inline int check_entry(struct arpt_entry *e, const char *name) +static inline int check_entry(const struct arpt_entry *e, const char *name) { const struct arpt_entry_target *t; @@ -468,7 +482,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name) if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) return -EINVAL; - t = arpt_get_target(e); + t = arpt_get_target_c(e); if (e->target_offset + t->u.target_size > e->next_offset) return -EINVAL; @@ -498,8 +512,7 @@ static inline int check_target(struct arpt_entry *e, const char *name) } static inline int -find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, - unsigned int *i) +find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) { struct arpt_entry_target *t; struct xt_target *target; @@ -510,13 +523,11 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, return ret; t = arpt_get_target(e); - target = try_then_request_module(xt_find_target(NFPROTO_ARP, - t->u.user.name, - t->u.user.revision), - "arpt_%s", t->u.user.name); - if (IS_ERR(target) || !target) { + target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { duprintf("find_check_entry: `%s' not found\n", t->u.user.name); - ret = target ? PTR_ERR(target) : -ENOENT; + ret = PTR_ERR(target); goto out; } t->u.kernel.target = target; @@ -524,8 +535,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, ret = check_target(e, name); if (ret) goto err; - - (*i)++; return 0; err: module_put(t->u.kernel.target->me); @@ -533,14 +542,14 @@ out: return ret; } -static bool check_underflow(struct arpt_entry *e) +static bool check_underflow(const struct arpt_entry *e) { const struct arpt_entry_target *t; unsigned int verdict; if (!unconditional(&e->arp)) return false; - t = arpt_get_target(e); + t = arpt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct arpt_standard_target *)t)->verdict; @@ -550,12 +559,11 @@ static bool check_underflow(struct arpt_entry *e) static inline int check_entry_size_and_hooks(struct arpt_entry *e, struct xt_table_info *newinfo, - unsigned char *base, - unsigned char *limit, + const unsigned char *base, + const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, - unsigned int valid_hooks, - unsigned int *i) + unsigned int valid_hooks) { unsigned int h; @@ -592,19 +600,14 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; - - (*i)++; return 0; } -static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) +static inline void cleanup_entry(struct arpt_entry *e) { struct xt_tgdtor_param par; struct arpt_entry_target *t; - if (i && (*i)-- == 0) - return 1; - t = arpt_get_target(e); par.target = t->u.kernel.target; par.targinfo = t->data; @@ -612,26 +615,20 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - return 0; } /* Checks and translates the user-supplied table segment (held in * newinfo). */ -static int translate_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info *newinfo, - void *entry0, - unsigned int size, - unsigned int number, - const unsigned int *hook_entries, - const unsigned int *underflows) +static int translate_table(struct xt_table_info *newinfo, void *entry0, + const struct arpt_replace *repl) { + struct arpt_entry *iter; unsigned int i; - int ret; + int ret = 0; - newinfo->size = size; - newinfo->number = number; + newinfo->size = repl->size; + newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { @@ -643,52 +640,64 @@ static int translate_table(const char *name, i = 0; /* Walk through entries, checking offsets. */ - ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry_size_and_hooks, - newinfo, - entry0, - entry0 + size, - hook_entries, underflows, valid_hooks, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, repl->hook_entry, repl->underflow, + repl->valid_hooks); + if (ret != 0) + break; + ++i; + if (strcmp(arpt_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); if (ret != 0) return ret; - if (i != number) { + if (i != repl->num_entries) { duprintf("translate_table: %u not %u entries\n", - i, number); + i, repl->num_entries); return -EINVAL; } /* Check hooks all assigned */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, repl->hook_entry[i]); return -EINVAL; } if (newinfo->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, repl->underflow[i]); return -EINVAL; } } - if (!mark_source_chains(newinfo, valid_hooks, entry0)) { + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { duprintf("Looping hook\n"); return -ELOOP; } /* Finally, each sanity check must pass */ i = 0; - ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, - find_check_entry, name, size, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, repl->name, repl->size); + if (ret != 0) + break; + ++i; + } if (ret != 0) { - ARPT_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter); + } return ret; } @@ -701,30 +710,10 @@ static int translate_table(const char *name, return ret; } -/* Gets counters. */ -static inline int add_entry_to_counter(const struct arpt_entry *e, - struct xt_counters total[], - unsigned int *i) -{ - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - -static inline int set_entry_to_counter(const struct arpt_entry *e, - struct xt_counters total[], - unsigned int *i) -{ - SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { + struct arpt_entry *iter; unsigned int cpu; unsigned int i; unsigned int curcpu; @@ -740,32 +729,32 @@ static void get_counters(const struct xt_table_info *t, curcpu = smp_processor_id(); i = 0; - ARPT_ENTRY_ITERATE(t->entries[curcpu], - t->size, - set_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[curcpu], t->size) { + SET_COUNTER(counters[i], iter->counters.bcnt, + iter->counters.pcnt); + ++i; + } for_each_possible_cpu(cpu) { if (cpu == curcpu) continue; i = 0; xt_info_wrlock(cpu); - ARPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[cpu], t->size) { + ADD_COUNTER(counters[i], iter->counters.bcnt, + iter->counters.pcnt); + ++i; + } xt_info_wrunlock(cpu); } local_bh_enable(); } -static struct xt_counters *alloc_counters(struct xt_table *table) +static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -783,11 +772,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) } static int copy_entries_to_user(unsigned int total_size, - struct xt_table *table, + const struct xt_table *table, void __user *userptr) { unsigned int off, num; - struct arpt_entry *e; + const struct arpt_entry *e; struct xt_counters *counters; struct xt_table_info *private = table->private; int ret = 0; @@ -807,7 +796,7 @@ static int copy_entries_to_user(unsigned int total_size, /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ - struct arpt_entry_target *t; + const struct arpt_entry_target *t; e = (struct arpt_entry *)(loc_cpu_entry + off); if (copy_to_user(userptr + off @@ -818,7 +807,7 @@ static int copy_entries_to_user(unsigned int total_size, goto free_counters; } - t = arpt_get_target(e); + t = arpt_get_target_c(e); if (copy_to_user(userptr + off + e->target_offset + offsetof(struct arpt_entry_target, u.user.name), @@ -853,18 +842,18 @@ static int compat_standard_to_user(void __user *dst, void *src) return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } -static int compat_calc_entry(struct arpt_entry *e, +static int compat_calc_entry(const struct arpt_entry *e, const struct xt_table_info *info, - void *base, struct xt_table_info *newinfo) + const void *base, struct xt_table_info *newinfo) { - struct arpt_entry_target *t; + const struct arpt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); entry_offset = (void *)e - base; - t = arpt_get_target(e); + t = arpt_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); @@ -885,7 +874,9 @@ static int compat_calc_entry(struct arpt_entry *e, static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { + struct arpt_entry *iter; void *loc_cpu_entry; + int ret; if (!newinfo || !info) return -EINVAL; @@ -894,13 +885,17 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; - return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size, - compat_calc_entry, info, loc_cpu_entry, - newinfo); + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; } #endif -static int get_info(struct net *net, void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, + const int *len, int compat) { char name[ARPT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -959,7 +954,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) } static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, - int *len) + const int *len) { int ret; struct arpt_get_entries get; @@ -1010,6 +1005,7 @@ static int __do_replace(struct net *net, const char *name, struct xt_table_info *oldinfo; struct xt_counters *counters; void *loc_cpu_old_entry; + struct arpt_entry *iter; ret = 0; counters = vmalloc_node(num_counters * sizeof(struct xt_counters), @@ -1053,8 +1049,8 @@ static int __do_replace(struct net *net, const char *name, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, - NULL); + xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + cleanup_entry(iter); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, @@ -1073,12 +1069,14 @@ static int __do_replace(struct net *net, const char *name, return ret; } -static int do_replace(struct net *net, void __user *user, unsigned int len) +static int do_replace(struct net *net, const void __user *user, + unsigned int len) { int ret; struct arpt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; + struct arpt_entry *iter; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1086,6 +1084,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1099,9 +1098,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, - tmp.hook_entry, tmp.underflow); + ret = translate_table(newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -1114,27 +1111,15 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter); free_newinfo: xt_free_table_info(newinfo); return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -static int do_add_counters(struct net *net, void __user *user, unsigned int len, - int compat) +static int do_add_counters(struct net *net, const void __user *user, + unsigned int len, int compat) { unsigned int i, curcpu; struct xt_counters_info tmp; @@ -1147,6 +1132,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; + struct arpt_entry *iter; #ifdef CONFIG_COMPAT struct compat_xt_counters_info compat_tmp; @@ -1204,11 +1190,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, curcpu = smp_processor_id(); loc_cpu_entry = private->entries[curcpu]; xt_info_wrlock(curcpu); - ARPT_ENTRY_ITERATE(loc_cpu_entry, - private->size, - add_counter_to_entry, - paddc, - &i); + xt_entry_foreach(iter, loc_cpu_entry, private->size) { + ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } xt_info_wrunlock(curcpu); unlock_up_free: local_bh_enable(); @@ -1221,28 +1206,22 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, } #ifdef CONFIG_COMPAT -static inline int -compat_release_entry(struct compat_arpt_entry *e, unsigned int *i) +static inline void compat_release_entry(struct compat_arpt_entry *e) { struct arpt_entry_target *t; - if (i && (*i)-- == 0) - return 1; - t = compat_arpt_get_target(e); module_put(t->u.kernel.target->me); - return 0; } static inline int check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, struct xt_table_info *newinfo, unsigned int *size, - unsigned char *base, - unsigned char *limit, - unsigned int *hook_entries, - unsigned int *underflows, - unsigned int *i, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, const char *name) { struct arpt_entry_target *t; @@ -1273,14 +1252,12 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, entry_offset = (void *)e - (void *)base; t = compat_arpt_get_target(e); - target = try_then_request_module(xt_find_target(NFPROTO_ARP, - t->u.user.name, - t->u.user.revision), - "arpt_%s", t->u.user.name); - if (IS_ERR(target) || !target) { + target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", t->u.user.name); - ret = target ? PTR_ERR(target) : -ENOENT; + ret = PTR_ERR(target); goto out; } t->u.kernel.target = target; @@ -1302,8 +1279,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, /* Clear counters and comefrom */ memset(&e->counters, 0, sizeof(e->counters)); e->comefrom = 0; - - (*i)++; return 0; release_target: @@ -1347,19 +1322,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, return ret; } -static inline int compat_check_entry(struct arpt_entry *e, const char *name, - unsigned int *i) -{ - int ret; - - ret = check_target(e, name); - if (ret) - return ret; - - (*i)++; - return 0; -} - static int translate_compat_table(const char *name, unsigned int valid_hooks, struct xt_table_info **pinfo, @@ -1372,8 +1334,10 @@ static int translate_compat_table(const char *name, unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; + struct compat_arpt_entry *iter0; + struct arpt_entry *iter1; unsigned int size; - int ret; + int ret = 0; info = *pinfo; entry0 = *pentry0; @@ -1390,13 +1354,14 @@ static int translate_compat_table(const char *name, j = 0; xt_compat_lock(NFPROTO_ARP); /* Walk through entries, checking offsets. */ - ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, - check_compat_entry_size_and_hooks, - info, &size, entry0, - entry0 + total_size, - hook_entries, underflows, &j, name); - if (ret != 0) - goto out_unlock; + xt_entry_foreach(iter0, entry0, total_size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, entry0 + total_size, hook_entries, underflows, + name); + if (ret != 0) + goto out_unlock; + ++j; + } ret = -EINVAL; if (j != number) { @@ -1435,9 +1400,12 @@ static int translate_compat_table(const char *name, entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; size = total_size; - ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, - compat_copy_entry_from_user, - &pos, &size, name, newinfo, entry1); + xt_entry_foreach(iter0, entry0, total_size) { + ret = compat_copy_entry_from_user(iter0, &pos, + &size, name, newinfo, entry1); + if (ret != 0) + break; + } xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); if (ret) @@ -1448,13 +1416,32 @@ static int translate_compat_table(const char *name, goto free_newinfo; i = 0; - ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name, &i); + xt_entry_foreach(iter1, entry1, newinfo->size) { + ret = check_target(iter1, name); + if (ret != 0) + break; + ++i; + } if (ret) { + /* + * The first i matches need cleanup_entry (calls ->destroy) + * because they had called ->check already. The other j-i + * entries need only release. + */ + int skip = i; j -= i; - COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, - compat_release_entry, &j); - ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + xt_entry_foreach(iter0, entry0, newinfo->size) { + if (skip-- > 0) + continue; + if (j-- == 0) + break; + compat_release_entry(iter0); + } + xt_entry_foreach(iter1, entry1, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter1); + } xt_free_table_info(newinfo); return ret; } @@ -1472,7 +1459,11 @@ static int translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); out: - COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); + xt_entry_foreach(iter0, entry0, total_size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } return ret; out_unlock: xt_compat_flush_offsets(NFPROTO_ARP); @@ -1499,6 +1490,7 @@ static int compat_do_replace(struct net *net, void __user *user, struct compat_arpt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; + struct arpt_entry *iter; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1508,6 +1500,7 @@ static int compat_do_replace(struct net *net, void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1536,7 +1529,8 @@ static int compat_do_replace(struct net *net, void __user *user, return 0; free_newinfo_untrans: - ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1570,7 +1564,7 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, compat_uint_t *size, struct xt_counters *counters, - unsigned int *i) + unsigned int i) { struct arpt_entry_target *t; struct compat_arpt_entry __user *ce; @@ -1578,14 +1572,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, compat_uint_t origsize; int ret; - ret = -EFAULT; origsize = *size; ce = (struct compat_arpt_entry __user *)*dstptr; - if (copy_to_user(ce, e, sizeof(struct arpt_entry))) - goto out; - - if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) - goto out; + if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; *dstptr += sizeof(struct compat_arpt_entry); *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); @@ -1595,18 +1587,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, t = arpt_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) - goto out; - ret = -EFAULT; + return ret; next_offset = e->next_offset - (origsize - *size); - if (put_user(target_offset, &ce->target_offset)) - goto out; - if (put_user(next_offset, &ce->next_offset)) - goto out; - - (*i)++; + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; return 0; -out: - return ret; } static int compat_copy_entries_to_user(unsigned int total_size, @@ -1620,6 +1606,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, int ret = 0; void *loc_cpu_entry; unsigned int i = 0; + struct arpt_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1629,9 +1616,12 @@ static int compat_copy_entries_to_user(unsigned int total_size, loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size, - compat_copy_entry_to_user, - &pos, &size, counters, &i); + xt_entry_foreach(iter, loc_cpu_entry, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) + break; + } vfree(counters); return ret; } @@ -1763,6 +1753,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, rev.revision, 1, &ret), @@ -1784,8 +1775,7 @@ struct xt_table *arpt_register_table(struct net *net, { int ret; struct xt_table_info *newinfo; - struct xt_table_info bootstrap - = { 0, 0, 0, { 0 }, { 0 }, { } }; + struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; struct xt_table *new_table; @@ -1799,12 +1789,7 @@ struct xt_table *arpt_register_table(struct net *net, loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, - newinfo, loc_cpu_entry, repl->size, - repl->num_entries, - repl->hook_entry, - repl->underflow); - + ret = translate_table(newinfo, loc_cpu_entry, repl); duprintf("arpt_register_table: translate table gives %d\n", ret); if (ret != 0) goto out_free; @@ -1827,13 +1812,14 @@ void arpt_unregister_table(struct xt_table *table) struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; + struct arpt_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, - cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index b0d5b1d0..2c3c83ff 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -9,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer "); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int -target(struct sk_buff *skb, const struct xt_target_param *par) +target(struct sk_buff *skb, const struct xt_action_param *par) { const struct arpt_mangle *mangle = par->targinfo; const struct arphdr *arp; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 97337601..0d082b67 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -6,6 +6,7 @@ */ #include +#include #include MODULE_LICENSE("GPL"); @@ -15,36 +16,6 @@ MODULE_DESCRIPTION("arptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ (1 << NF_ARP_FORWARD)) -static const struct -{ - struct arpt_replace repl; - struct arpt_standard entries[3]; - struct arpt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "filter", - .valid_hooks = FILTER_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error), - .hook_entry = { - [NF_ARP_IN] = 0, - [NF_ARP_OUT] = sizeof(struct arpt_standard), - [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), - }, - .underflow = { - [NF_ARP_IN] = 0, - [NF_ARP_OUT] = sizeof(struct arpt_standard), - [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), - }, - }, - .entries = { - ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_IN */ - ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_OUT */ - ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_FORWARD */ - }, - .term = ARPT_ERROR_INIT, -}; - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -99,9 +70,14 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { static int __net_init arptable_filter_net_init(struct net *net) { - /* Register table */ + struct arpt_replace *repl; + + repl = arpt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; net->ipv4.arptable_filter = - arpt_register_table(net, &packet_filter, &initial_table.repl); + arpt_register_table(net, &packet_filter, repl); + kfree(repl); if (IS_ERR(net->ipv4.arptable_filter)) return PTR_ERR(net->ipv4.arptable_filter); return 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 62aff317..5df30725 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -28,6 +28,7 @@ #include #include #include +#include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); @@ -66,6 +67,12 @@ do { \ #define inline #endif +void *ipt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(ipt, IPT); +} +EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); + /* We keep a set of rules for each CPU, so we can avoid write-locking them in the softirq when updating the counters and therefore @@ -158,7 +165,7 @@ ip_checkentry(const struct ipt_ip *ip) } static unsigned int -ipt_error(struct sk_buff *skb, const struct xt_target_param *par) +ipt_error(struct sk_buff *skb, const struct xt_action_param *par) { if (net_ratelimit()) printk("ip_tables: error: `%s'\n", @@ -167,24 +174,9 @@ ipt_error(struct sk_buff *skb, const struct xt_target_param *par) return NF_DROP; } -/* Performance critical - called for every packet */ -static inline bool -do_match(struct ipt_entry_match *m, const struct sk_buff *skb, - struct xt_match_param *par) -{ - par->match = m->u.kernel.match; - par->matchinfo = m->data; - - /* Stop iteration if it doesn't match */ - if (!m->u.kernel.match->match(skb, par)) - return true; - else - return false; -} - /* Performance critical */ static inline struct ipt_entry * -get_entry(void *base, unsigned int offset) +get_entry(const void *base, unsigned int offset) { return (struct ipt_entry *)(base + offset); } @@ -199,6 +191,13 @@ static inline bool unconditional(const struct ipt_ip *ip) #undef FWINV } +/* for const-correctness */ +static inline const struct ipt_entry_target * +ipt_get_target_c(const struct ipt_entry *e) +{ + return ipt_get_target((struct ipt_entry *)e); +} + #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) static const char *const hooknames[] = { @@ -233,11 +232,11 @@ static struct nf_loginfo trace_loginfo = { /* Mildly perf critical (only if packet tracing is on) */ static inline int -get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, +get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, const char *hookname, const char **chainname, const char **comment, unsigned int *rulenum) { - struct ipt_standard_target *t = (void *)ipt_get_target(s); + const struct ipt_standard_target *t = (void *)ipt_get_target_c(s); if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { /* Head of user chain: ERROR target with chainname */ @@ -263,17 +262,18 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, return 0; } -static void trace_packet(struct sk_buff *skb, +static void trace_packet(const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, const char *tablename, - struct xt_table_info *private, - struct ipt_entry *e) + const struct xt_table_info *private, + const struct ipt_entry *e) { - void *table_base; + const void *table_base; const struct ipt_entry *root; const char *hookname, *chainname, *comment; + const struct ipt_entry *iter; unsigned int rulenum = 0; table_base = private->entries[smp_processor_id()]; @@ -282,10 +282,10 @@ static void trace_packet(struct sk_buff *skb, hookname = chainname = hooknames[hook]; comment = comments[NF_IP_TRACE_COMMENT_RULE]; - IPT_ENTRY_ITERATE(root, - private->size - private->hook_entry[hook], - get_chainname_rulenum, - e, hookname, &chainname, &comment, &rulenum); + xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) + if (get_chainname_rulenum(iter, e, hookname, + &chainname, &comment, &rulenum) != 0) + break; nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", @@ -307,19 +307,17 @@ ipt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table) { -#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom - static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; - void *table_base; - struct ipt_entry *e, *back; - struct xt_table_info *private; - struct xt_match_param mtpar; - struct xt_target_param tgpar; + const void *table_base; + struct ipt_entry *e, **jumpstack; + unsigned int *stackptr, origptr, cpu; + const struct xt_table_info *private; + struct xt_action_param acpar; /* Initialization */ ip = ip_hdr(skb); @@ -331,36 +329,48 @@ ipt_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ - mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; - mtpar.thoff = ip_hdrlen(skb); - mtpar.hotdrop = &hotdrop; - mtpar.in = tgpar.in = in; - mtpar.out = tgpar.out = out; - mtpar.family = tgpar.family = NFPROTO_IPV4; - mtpar.hooknum = tgpar.hooknum = hook; + acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; + acpar.thoff = ip_hdrlen(skb); + acpar.hotdrop = &hotdrop; + acpar.in = in; + acpar.out = out; + acpar.family = NFPROTO_IPV4; + acpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); xt_info_rdlock_bh(); private = table->private; - table_base = private->entries[smp_processor_id()]; + cpu = smp_processor_id(); + table_base = private->entries[cpu]; + jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; + stackptr = &private->stackptr[cpu]; + origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); - /* For return from builtin chain */ - back = get_entry(table_base, private->underflow[hook]); + pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n", + table->name, hook, origptr, + get_entry(table_base, private->underflow[hook])); do { - struct ipt_entry_target *t; + const struct ipt_entry_target *t; + const struct xt_entry_match *ematch; IP_NF_ASSERT(e); - IP_NF_ASSERT(back); if (!ip_packet_match(ip, indev, outdev, - &e->ip, mtpar.fragoff) || - IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) { + &e->ip, acpar.fragoff)) { + no_match: e = ipt_next_entry(e); continue; } + xt_ematch_foreach(ematch, e) { + acpar.match = ematch->u.kernel.match; + acpar.matchinfo = ematch->data; + if (!acpar.match->match(skb, &acpar)) + goto no_match; + } + ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); t = ipt_get_target(e); @@ -384,41 +394,38 @@ ipt_do_table(struct sk_buff *skb, verdict = (unsigned)(-v) - 1; break; } - e = back; - back = get_entry(table_base, back->comefrom); + if (*stackptr == 0) { + e = get_entry(table_base, + private->underflow[hook]); + pr_devel("Underflow (this is normal) " + "to %p\n", e); + } else { + e = jumpstack[--*stackptr]; + pr_devel("Pulled %p out from pos %u\n", + e, *stackptr); + e = ipt_next_entry(e); + } continue; } - if (table_base + v != ipt_next_entry(e) - && !(e->ip.flags & IPT_F_GOTO)) { - /* Save old back ptr in next entry */ - struct ipt_entry *next = ipt_next_entry(e); - next->comefrom = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + if (table_base + v != ipt_next_entry(e) && + !(e->ip.flags & IPT_F_GOTO)) { + if (*stackptr >= private->stacksize) { + verdict = NF_DROP; + break; + } + jumpstack[(*stackptr)++] = e; + pr_devel("Pushed %p into pos %u\n", + e, *stackptr - 1); } e = get_entry(table_base, v); continue; } - /* Targets which reenter must return - abs. verdicts */ - tgpar.target = t->u.kernel.target; - tgpar.targinfo = t->data; + acpar.target = t->u.kernel.target; + acpar.targinfo = t->data; - -#ifdef CONFIG_NETFILTER_DEBUG - tb_comefrom = 0xeeeeeeec; -#endif - verdict = t->u.kernel.target->target(skb, &tgpar); -#ifdef CONFIG_NETFILTER_DEBUG - if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) { - printk("Target %s reentered!\n", - t->u.kernel.target->name); - verdict = NF_DROP; - } - tb_comefrom = 0x57acc001; -#endif + verdict = t->u.kernel.target->target(skb, &acpar); /* Target might have changed stuff. */ ip = ip_hdr(skb); if (verdict == IPT_CONTINUE) @@ -428,7 +435,9 @@ ipt_do_table(struct sk_buff *skb, break; } while (!hotdrop); xt_info_rdunlock_bh(); - + pr_devel("Exiting %s; resetting sp from %u to %u\n", + __func__, *stackptr, origptr); + *stackptr = origptr; #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; #else @@ -436,14 +445,12 @@ ipt_do_table(struct sk_buff *skb, return NF_DROP; else return verdict; #endif - -#undef tb_comefrom } /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int -mark_source_chains(struct xt_table_info *newinfo, +mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { unsigned int hook; @@ -461,8 +468,8 @@ mark_source_chains(struct xt_table_info *newinfo, e->counters.pcnt = pos; for (;;) { - struct ipt_standard_target *t - = (void *)ipt_get_target(e); + const struct ipt_standard_target *t + = (void *)ipt_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { @@ -552,27 +559,23 @@ mark_source_chains(struct xt_table_info *newinfo, return 1; } -static int -cleanup_match(struct ipt_entry_match *m, unsigned int *i) +static void cleanup_match(struct ipt_entry_match *m, struct net *net) { struct xt_mtdtor_param par; - if (i && (*i)-- == 0) - return 1; - + par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV4; if (par.match->destroy != NULL) par.match->destroy(&par); module_put(par.match->me); - return 0; } static int -check_entry(struct ipt_entry *e, const char *name) +check_entry(const struct ipt_entry *e, const char *name) { - struct ipt_entry_target *t; + const struct ipt_entry_target *t; if (!ip_checkentry(&e->ip)) { duprintf("ip_tables: ip check failed %p %s.\n", e, name); @@ -583,7 +586,7 @@ check_entry(struct ipt_entry *e, const char *name) e->next_offset) return -EINVAL; - t = ipt_get_target(e); + t = ipt_get_target_c(e); if (e->target_offset + t->u.target_size > e->next_offset) return -EINVAL; @@ -591,8 +594,7 @@ check_entry(struct ipt_entry *e, const char *name) } static int -check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, - unsigned int *i) +check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; int ret; @@ -607,27 +609,24 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, par.match->name); return ret; } - ++*i; return 0; } static int -find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, - unsigned int *i) +find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) { struct xt_match *match; int ret; - match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, - m->u.user.revision), - "ipt_%s", m->u.user.name); - if (IS_ERR(match) || !match) { + match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, + m->u.user.revision); + if (IS_ERR(match)) { duprintf("find_check_match: `%s' not found\n", m->u.user.name); - return match ? PTR_ERR(match) : -ENOENT; + return PTR_ERR(match); } m->u.kernel.match = match; - ret = check_match(m, par, i); + ret = check_match(m, par); if (ret) goto err; @@ -637,10 +636,11 @@ err: return ret; } -static int check_target(struct ipt_entry *e, const char *name) +static int check_target(struct ipt_entry *e, struct net *net, const char *name) { struct ipt_entry_target *t = ipt_get_target(e); struct xt_tgchk_param par = { + .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, @@ -661,61 +661,66 @@ static int check_target(struct ipt_entry *e, const char *name) } static int -find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, - unsigned int *i) +find_check_entry(struct ipt_entry *e, struct net *net, const char *name, + unsigned int size) { struct ipt_entry_target *t; struct xt_target *target; int ret; unsigned int j; struct xt_mtchk_param mtpar; + struct xt_entry_match *ematch; ret = check_entry(e, name); if (ret) return ret; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV4; - ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j); - if (ret != 0) - goto cleanup_matches; + xt_ematch_foreach(ematch, e) { + ret = find_check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } t = ipt_get_target(e); - target = try_then_request_module(xt_find_target(AF_INET, - t->u.user.name, - t->u.user.revision), - "ipt_%s", t->u.user.name); - if (IS_ERR(target) || !target) { + target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { duprintf("find_check_entry: `%s' not found\n", t->u.user.name); - ret = target ? PTR_ERR(target) : -ENOENT; + ret = PTR_ERR(target); goto cleanup_matches; } t->u.kernel.target = target; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto err; - - (*i)++; return 0; err: module_put(t->u.kernel.target->me); cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } return ret; } -static bool check_underflow(struct ipt_entry *e) +static bool check_underflow(const struct ipt_entry *e) { const struct ipt_entry_target *t; unsigned int verdict; if (!unconditional(&e->ip)) return false; - t = ipt_get_target(e); + t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct ipt_standard_target *)t)->verdict; @@ -726,12 +731,11 @@ static bool check_underflow(struct ipt_entry *e) static int check_entry_size_and_hooks(struct ipt_entry *e, struct xt_table_info *newinfo, - unsigned char *base, - unsigned char *limit, + const unsigned char *base, + const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, - unsigned int valid_hooks, - unsigned int *i) + unsigned int valid_hooks) { unsigned int h; @@ -768,50 +772,42 @@ check_entry_size_and_hooks(struct ipt_entry *e, /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; - - (*i)++; return 0; } -static int -cleanup_entry(struct ipt_entry *e, unsigned int *i) +static void +cleanup_entry(struct ipt_entry *e, struct net *net) { struct xt_tgdtor_param par; struct ipt_entry_target *t; - - if (i && (*i)-- == 0) - return 1; + struct xt_entry_match *ematch; /* Cleanup all matches */ - IPT_MATCH_ITERATE(e, cleanup_match, NULL); + xt_ematch_foreach(ematch, e) + cleanup_match(ematch, net); t = ipt_get_target(e); + par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV4; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - return 0; } /* Checks and translates the user-supplied table segment (held in newinfo) */ static int -translate_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info *newinfo, - void *entry0, - unsigned int size, - unsigned int number, - const unsigned int *hook_entries, - const unsigned int *underflows) +translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, + const struct ipt_replace *repl) { + struct ipt_entry *iter; unsigned int i; - int ret; + int ret = 0; - newinfo->size = size; - newinfo->number = number; + newinfo->size = repl->size; + newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -822,49 +818,59 @@ translate_table(const char *name, duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ - ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry_size_and_hooks, - newinfo, - entry0, - entry0 + size, - hook_entries, underflows, valid_hooks, &i); - if (ret != 0) - return ret; + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, repl->hook_entry, repl->underflow, + repl->valid_hooks); + if (ret != 0) + return ret; + ++i; + if (strcmp(ipt_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } - if (i != number) { + if (i != repl->num_entries) { duprintf("translate_table: %u not %u entries\n", - i, number); + i, repl->num_entries); return -EINVAL; } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, repl->hook_entry[i]); return -EINVAL; } if (newinfo->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, repl->underflow[i]); return -EINVAL; } } - if (!mark_source_chains(newinfo, valid_hooks, entry0)) + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) return -ELOOP; /* Finally, each sanity check must pass */ i = 0; - ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - find_check_entry, name, size, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, net, repl->name, repl->size); + if (ret != 0) + break; + ++i; + } if (ret != 0) { - IPT_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter, net); + } return ret; } @@ -877,33 +883,11 @@ translate_table(const char *name, return ret; } -/* Gets counters. */ -static inline int -add_entry_to_counter(const struct ipt_entry *e, - struct xt_counters total[], - unsigned int *i) -{ - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - -static inline int -set_entry_to_counter(const struct ipt_entry *e, - struct ipt_counters total[], - unsigned int *i) -{ - SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { + struct ipt_entry *iter; unsigned int cpu; unsigned int i; unsigned int curcpu; @@ -919,32 +903,32 @@ get_counters(const struct xt_table_info *t, curcpu = smp_processor_id(); i = 0; - IPT_ENTRY_ITERATE(t->entries[curcpu], - t->size, - set_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[curcpu], t->size) { + SET_COUNTER(counters[i], iter->counters.bcnt, + iter->counters.pcnt); + ++i; + } for_each_possible_cpu(cpu) { if (cpu == curcpu) continue; i = 0; xt_info_wrlock(cpu); - IPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[cpu], t->size) { + ADD_COUNTER(counters[i], iter->counters.bcnt, + iter->counters.pcnt); + ++i; /* macro does multi eval of i */ + } xt_info_wrunlock(cpu); } local_bh_enable(); } -static struct xt_counters * alloc_counters(struct xt_table *table) +static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -962,11 +946,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) static int copy_entries_to_user(unsigned int total_size, - struct xt_table *table, + const struct xt_table *table, void __user *userptr) { unsigned int off, num; - struct ipt_entry *e; + const struct ipt_entry *e; struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; @@ -1018,7 +1002,7 @@ copy_entries_to_user(unsigned int total_size, } } - t = ipt_get_target(e); + t = ipt_get_target_c(e); if (copy_to_user(userptr + off + e->target_offset + offsetof(struct ipt_entry_target, u.user.name), @@ -1053,25 +1037,20 @@ static int compat_standard_to_user(void __user *dst, void *src) return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } -static inline int -compat_calc_match(struct ipt_entry_match *m, int *size) -{ - *size += xt_compat_match_offset(m->u.kernel.match); - return 0; -} - -static int compat_calc_entry(struct ipt_entry *e, +static int compat_calc_entry(const struct ipt_entry *e, const struct xt_table_info *info, - void *base, struct xt_table_info *newinfo) + const void *base, struct xt_table_info *newinfo) { - struct ipt_entry_target *t; + const struct xt_entry_match *ematch; + const struct ipt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); entry_offset = (void *)e - base; - IPT_MATCH_ITERATE(e, compat_calc_match, &off); - t = ipt_get_target(e); + xt_ematch_foreach(ematch, e) + off += xt_compat_match_offset(ematch->u.kernel.match); + t = ipt_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(AF_INET, entry_offset, off); @@ -1092,7 +1071,9 @@ static int compat_calc_entry(struct ipt_entry *e, static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { + struct ipt_entry *iter; void *loc_cpu_entry; + int ret; if (!newinfo || !info) return -EINVAL; @@ -1101,13 +1082,17 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; - return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size, - compat_calc_entry, info, loc_cpu_entry, - newinfo); + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; } #endif -static int get_info(struct net *net, void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, + const int *len, int compat) { char name[IPT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -1167,7 +1152,8 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) } static int -get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) +get_entries(struct net *net, struct ipt_get_entries __user *uptr, + const int *len) { int ret; struct ipt_get_entries get; @@ -1215,6 +1201,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *oldinfo; struct xt_counters *counters; void *loc_cpu_old_entry; + struct ipt_entry *iter; ret = 0; counters = vmalloc(num_counters * sizeof(struct xt_counters)); @@ -1257,8 +1244,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, - NULL); + xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + cleanup_entry(iter, net); + xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) @@ -1277,12 +1265,13 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, } static int -do_replace(struct net *net, void __user *user, unsigned int len) +do_replace(struct net *net, const void __user *user, unsigned int len) { int ret; struct ipt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; + struct ipt_entry *iter; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1290,6 +1279,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1303,9 +1293,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, - tmp.hook_entry, tmp.underflow); + ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -1318,27 +1306,16 @@ do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -static int -do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) +do_add_counters(struct net *net, const void __user *user, + unsigned int len, int compat) { unsigned int i, curcpu; struct xt_counters_info tmp; @@ -1351,6 +1328,7 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; + struct ipt_entry *iter; #ifdef CONFIG_COMPAT struct compat_xt_counters_info compat_tmp; @@ -1408,11 +1386,10 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat curcpu = smp_processor_id(); loc_cpu_entry = private->entries[curcpu]; xt_info_wrlock(curcpu); - IPT_ENTRY_ITERATE(loc_cpu_entry, - private->size, - add_counter_to_entry, - paddc, - &i); + xt_entry_foreach(iter, loc_cpu_entry, private->size) { + ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } xt_info_wrunlock(curcpu); unlock_up_free: local_bh_enable(); @@ -1440,45 +1417,40 @@ struct compat_ipt_replace { static int compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, unsigned int *size, struct xt_counters *counters, - unsigned int *i) + unsigned int i) { struct ipt_entry_target *t; struct compat_ipt_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; - int ret; + const struct xt_entry_match *ematch; + int ret = 0; - ret = -EFAULT; origsize = *size; ce = (struct compat_ipt_entry __user *)*dstptr; - if (copy_to_user(ce, e, sizeof(struct ipt_entry))) - goto out; - - if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) - goto out; + if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; *dstptr += sizeof(struct compat_ipt_entry); *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size); + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_to_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } target_offset = e->target_offset - (origsize - *size); - if (ret) - goto out; t = ipt_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) - goto out; - ret = -EFAULT; + return ret; next_offset = e->next_offset - (origsize - *size); - if (put_user(target_offset, &ce->target_offset)) - goto out; - if (put_user(next_offset, &ce->next_offset)) - goto out; - - (*i)++; + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; return 0; -out: - return ret; } static int @@ -1486,61 +1458,45 @@ compat_find_calc_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask, - int *size, unsigned int *i) + int *size) { struct xt_match *match; - match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, - m->u.user.revision), - "ipt_%s", m->u.user.name); - if (IS_ERR(match) || !match) { + match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, + m->u.user.revision); + if (IS_ERR(match)) { duprintf("compat_check_calc_match: `%s' not found\n", m->u.user.name); - return match ? PTR_ERR(match) : -ENOENT; + return PTR_ERR(match); } m->u.kernel.match = match; *size += xt_compat_match_offset(match); - - (*i)++; return 0; } -static int -compat_release_match(struct ipt_entry_match *m, unsigned int *i) -{ - if (i && (*i)-- == 0) - return 1; - - module_put(m->u.kernel.match->me); - return 0; -} - -static int -compat_release_entry(struct compat_ipt_entry *e, unsigned int *i) +static void compat_release_entry(struct compat_ipt_entry *e) { struct ipt_entry_target *t; - - if (i && (*i)-- == 0) - return 1; + struct xt_entry_match *ematch; /* Cleanup all matches */ - COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL); + xt_ematch_foreach(ematch, e) + module_put(ematch->u.kernel.match->me); t = compat_ipt_get_target(e); module_put(t->u.kernel.target->me); - return 0; } static int check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, - unsigned char *base, - unsigned char *limit, - unsigned int *hook_entries, - unsigned int *underflows, - unsigned int *i, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, const char *name) { + struct xt_entry_match *ematch; struct ipt_entry_target *t; struct xt_target *target; unsigned int entry_offset; @@ -1569,20 +1525,21 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); entry_offset = (void *)e - (void *)base; j = 0; - ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name, - &e->ip, e->comefrom, &off, &j); - if (ret != 0) - goto release_matches; + xt_ematch_foreach(ematch, e) { + ret = compat_find_calc_match(ematch, name, + &e->ip, e->comefrom, &off); + if (ret != 0) + goto release_matches; + ++j; + } t = compat_ipt_get_target(e); - target = try_then_request_module(xt_find_target(AF_INET, - t->u.user.name, - t->u.user.revision), - "ipt_%s", t->u.user.name); - if (IS_ERR(target) || !target) { + target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", t->u.user.name); - ret = target ? PTR_ERR(target) : -ENOENT; + ret = PTR_ERR(target); goto release_matches; } t->u.kernel.target = target; @@ -1604,14 +1561,16 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, /* Clear counters and comefrom */ memset(&e->counters, 0, sizeof(e->counters)); e->comefrom = 0; - - (*i)++; return 0; out: module_put(t->u.kernel.target->me); release_matches: - IPT_MATCH_ITERATE(e, compat_release_match, &j); + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + module_put(ematch->u.kernel.match->me); + } return ret; } @@ -1625,6 +1584,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, struct ipt_entry *de; unsigned int origsize; int ret, h; + struct xt_entry_match *ematch; ret = 0; origsize = *size; @@ -1635,10 +1595,11 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user, - dstptr, size); - if (ret) - return ret; + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_from_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); target = t->u.kernel.target; @@ -1655,36 +1616,43 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, } static int -compat_check_entry(struct ipt_entry *e, const char *name, - unsigned int *i) +compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) { + struct xt_entry_match *ematch; struct xt_mtchk_param mtpar; unsigned int j; - int ret; + int ret = 0; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV4; - ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j); + xt_ematch_foreach(ematch, e) { + ret = check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } + + ret = check_target(e, net, name); if (ret) goto cleanup_matches; - - ret = check_target(e, name); - if (ret) - goto cleanup_matches; - - (*i)++; return 0; cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } return ret; } static int -translate_compat_table(const char *name, +translate_compat_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, @@ -1696,6 +1664,8 @@ translate_compat_table(const char *name, unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; + struct compat_ipt_entry *iter0; + struct ipt_entry *iter1; unsigned int size; int ret; @@ -1714,13 +1684,14 @@ translate_compat_table(const char *name, j = 0; xt_compat_lock(AF_INET); /* Walk through entries, checking offsets. */ - ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, - check_compat_entry_size_and_hooks, - info, &size, entry0, - entry0 + total_size, - hook_entries, underflows, &j, name); - if (ret != 0) - goto out_unlock; + xt_entry_foreach(iter0, entry0, total_size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, entry0 + total_size, hook_entries, underflows, + name); + if (ret != 0) + goto out_unlock; + ++j; + } ret = -EINVAL; if (j != number) { @@ -1759,9 +1730,12 @@ translate_compat_table(const char *name, entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; size = total_size; - ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, - compat_copy_entry_from_user, - &pos, &size, name, newinfo, entry1); + xt_entry_foreach(iter0, entry0, total_size) { + ret = compat_copy_entry_from_user(iter0, &pos, + &size, name, newinfo, entry1); + if (ret != 0) + break; + } xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); if (ret) @@ -1772,13 +1746,32 @@ translate_compat_table(const char *name, goto free_newinfo; i = 0; - ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name, &i); + xt_entry_foreach(iter1, entry1, newinfo->size) { + ret = compat_check_entry(iter1, net, name); + if (ret != 0) + break; + ++i; + } if (ret) { + /* + * The first i matches need cleanup_entry (calls ->destroy) + * because they had called ->check already. The other j-i + * entries need only release. + */ + int skip = i; j -= i; - COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, - compat_release_entry, &j); - IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + xt_entry_foreach(iter0, entry0, newinfo->size) { + if (skip-- > 0) + continue; + if (j-- == 0) + break; + compat_release_entry(iter0); + } + xt_entry_foreach(iter1, entry1, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter1, net); + } xt_free_table_info(newinfo); return ret; } @@ -1796,7 +1789,11 @@ translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); out: - COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); + xt_entry_foreach(iter0, entry0, total_size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } return ret; out_unlock: xt_compat_flush_offsets(AF_INET); @@ -1811,6 +1808,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) struct compat_ipt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; + struct ipt_entry *iter; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1820,6 +1818,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1833,7 +1832,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, + ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, &newinfo, &loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); @@ -1849,7 +1848,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1898,6 +1898,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, int ret = 0; const void *loc_cpu_entry; unsigned int i = 0; + struct ipt_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1910,9 +1911,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size, - compat_copy_entry_to_user, - &pos, &size, counters, &i); + xt_entry_foreach(iter, loc_cpu_entry, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) + break; + } vfree(counters); return ret; @@ -2044,6 +2048,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; if (cmd == IPT_SO_GET_REVISION_TARGET) target = 1; @@ -2071,8 +2076,7 @@ struct xt_table *ipt_register_table(struct net *net, { int ret; struct xt_table_info *newinfo; - struct xt_table_info bootstrap - = { 0, 0, 0, { 0 }, { 0 }, { } }; + struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; struct xt_table *new_table; @@ -2086,11 +2090,7 @@ struct xt_table *ipt_register_table(struct net *net, loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, - newinfo, loc_cpu_entry, repl->size, - repl->num_entries, - repl->hook_entry, - repl->underflow); + ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) goto out_free; @@ -2108,17 +2108,19 @@ out: return ERR_PTR(ret); } -void ipt_unregister_table(struct xt_table *table) +void ipt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; + struct ipt_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); @@ -2136,7 +2138,7 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, } static bool -icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) +icmp_match(const struct sk_buff *skb, const struct xt_action_param *par) { const struct icmphdr *ic; struct icmphdr _icmph; @@ -2163,7 +2165,7 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) !!(icmpinfo->invflags&IPT_ICMP_INV)); } -static bool icmp_checkentry(const struct xt_mtchk_param *par) +static int icmp_checkentry(const struct xt_mtchk_param *par) { const struct ipt_icmp *icmpinfo = par->matchinfo; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2e4f98b8..28b424e7 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -281,7 +281,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) ***********************************************************************/ static unsigned int -clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) +clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; struct nf_conn *ct; @@ -319,12 +319,12 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) ct->mark = hash; break; case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: /* FIXME: we don't handle expectations at the * moment. they can arrive on a different node than * the master connection (e.g. FTP passive mode) */ case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: + case IP_CT_ESTABLISHED_REPLY: break; default: break; @@ -666,8 +666,11 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, struct clusterip_config *c = pde->data; unsigned long nodenum; - if (copy_from_user(buffer, input, PROC_WRITELEN)) + if (size > PROC_WRITELEN) + return -EIO; + if (copy_from_user(buffer, input, size)) return -EFAULT; + buffer[size] = 0; if (*buffer == '+') { nodenum = simple_strtoul(buffer+1, NULL, 10); diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f7e2fa09..8a8687e0 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -50,7 +50,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) struct tcphdr _tcph, *tcph; __be16 oldval; - /* Not enought header? */ + /* Not enough header? */ tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (!tcph) return false; @@ -77,7 +77,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) } static unsigned int -ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) +ecn_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_ECN_info *einfo = par->targinfo; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index acc44c69..867b7c94 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -425,7 +425,7 @@ ipt_log_packet(u_int8_t pf, } static unsigned int -log_tg(struct sk_buff *skb, const struct xt_target_param *par) +log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_log_info *loginfo = par->targinfo; struct nf_loginfo li; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index dada0863..573d156c 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -44,7 +44,7 @@ static bool masquerade_tg_check(const struct xt_tgchk_param *par) } static unsigned int -masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) +masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_conn *ct; struct nf_conn_nat *nat; @@ -60,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED - || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + || ctinfo == IP_CT_RELATED_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 7c29582d..35065b60 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -38,7 +38,7 @@ static bool netmap_tg_check(const struct xt_tgchk_param *par) } static unsigned int -netmap_tg(struct sk_buff *skb, const struct xt_target_param *par) +netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 698e5e78..12a75ef2 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -42,7 +42,7 @@ static bool redirect_tg_check(const struct xt_tgchk_param *par) } static unsigned int -redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) +redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index c93ae44b..9f5e5526 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -135,13 +135,10 @@ static inline void send_unreach(struct sk_buff *skb_in, int code) } static unsigned int -reject_tg(struct sk_buff *skb, const struct xt_target_param *par) +reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_reject_info *reject = par->targinfo; - /* WARNING: This code causes reentry within iptables. - This means that the iptables jump stack is now crap. We - must return an absolute verdict. --RR */ switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: send_unreach(skb, ICMP_NET_UNREACH); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d32cc4bb..36892480 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -281,7 +281,7 @@ alloc_failure: } static unsigned int -ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) +ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) { ipt_ulog_packet(par->hooknum, skb, par->in, par->out, par->targinfo, NULL); diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 3b216be3..ded9cd64 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -30,7 +30,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev, } static bool -addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) +addrtype_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par) { struct net *net = dev_net(par->in ? par->in : par->out); const struct ipt_addrtype_info *info = par->matchinfo; @@ -48,7 +48,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) } static bool -addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +addrtype_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par) { struct net *net = dev_net(par->in ? par->in : par->out); const struct ipt_addrtype_info_v1 *info = par->matchinfo; @@ -70,7 +70,7 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) return ret; } -static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) { struct ipt_addrtype_info_v1 *info = par->matchinfo; diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 0104c0b3..4eccad15 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -5,7 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include @@ -18,25 +18,19 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yon Uriarte "); MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match"); -#ifdef DEBUG_CONNTRACK -#define duprintf(format, args...) printk(format , ## args) -#else -#define duprintf(format, args...) -#endif - /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { bool r; - duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', - min,spi,max); + pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", + invert ? '!' : ' ', min, spi, max); r=(spi >= min && spi <= max) ^ invert; - duprintf(" result %s\n",r? "PASS" : "FAILED"); + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } -static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) +static bool ah_mt(const struct sk_buff *skb, const struct xt_action_param *par) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; @@ -51,7 +45,7 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ - duprintf("Dropping evil AH tinygram.\n"); + pr_debug("Dropping evil AH tinygram.\n"); *par->hotdrop = true; return 0; } @@ -61,13 +55,13 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) !!(ahinfo->invflags & IPT_AH_INV_SPI)); } -static bool ah_mt_check(const struct xt_mtchk_param *par) +static int ah_mt_check(const struct xt_mtchk_param *par) { const struct ipt_ah *ahinfo = par->matchinfo; /* Must specify no unknown invflags */ if (ahinfo->invflags & ~IPT_AH_INV_MASK) { - duprintf("ipt_ah: unknown flags %X\n", ahinfo->invflags); + pr_debug("unknown flags %X\n", ahinfo->invflags); return false; } return true; diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 6289b641..52d04d64 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -67,7 +67,8 @@ static inline bool match_tcp(const struct sk_buff *skb, return true; } -static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par) +static bool ecn_mt(const struct sk_buff *skb, + const struct xt_action_param *par) { const struct ipt_ecn_info *info = par->matchinfo; @@ -85,7 +86,7 @@ static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool ecn_mt_check(const struct xt_mtchk_param *par) +static int ecn_mt_check(const struct xt_mtchk_param *par) { const struct ipt_ecn_info *info = par->matchinfo; const struct ipt_ip *ip = par->entryinfo; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index df566cbd..46332921 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -23,36 +23,6 @@ MODULE_DESCRIPTION("iptables filter table"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) -static struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "filter", - .valid_hooks = FILTER_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - .underflow = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -128,9 +98,18 @@ module_param(forward, bool, 0000); static int __net_init iptable_filter_net_init(struct net *net) { - /* Register table */ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; + /* Entry 1 is the FORWARD hook */ + ((struct ipt_standard *)repl->entries)[1].target.verdict = + -forward - 1; + net->ipv4.iptable_filter = - ipt_register_table(net, &packet_filter, &initial_table.repl); + ipt_register_table(net, &packet_filter, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_filter)) return PTR_ERR(net->ipv4.iptable_filter); return 0; @@ -138,7 +117,7 @@ static int __net_init iptable_filter_net_init(struct net *net) static void __net_exit iptable_filter_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_filter); + ipt_unregister_table(net, net->ipv4.iptable_filter); } static struct pernet_operations iptable_filter_net_ops = { @@ -155,9 +134,6 @@ static int __init iptable_filter_init(void) return -EINVAL; } - /* Entry 1 is the FORWARD hook */ - initial_table.entries[1].target.verdict = -forward - 1; - ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) return ret; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 036047f9..c3fed41f 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -27,43 +27,6 @@ MODULE_DESCRIPTION("iptables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) -/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[5]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "mangle", - .valid_hooks = MANGLE_VALID_HOOKS, - .num_entries = 6, - .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard), - [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4, - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard), - [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4, - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, @@ -198,9 +161,14 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { static int __net_init iptable_mangle_net_init(struct net *net) { - /* Register table */ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_mangler); + if (repl == NULL) + return -ENOMEM; net->ipv4.iptable_mangle = - ipt_register_table(net, &packet_mangler, &initial_table.repl); + ipt_register_table(net, &packet_mangler, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_mangle)) return PTR_ERR(net->ipv4.iptable_mangle); return 0; @@ -208,7 +176,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) static void __net_exit iptable_mangle_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_mangle); + ipt_unregister_table(net, net->ipv4.iptable_mangle); } static struct pernet_operations iptable_mangle_net_ops = { diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 993edc23..8fde88cb 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -9,33 +9,6 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[2]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "raw", - .valid_hooks = RAW_VALID_HOOKS, - .num_entries = 3, - .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -90,9 +63,14 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { static int __net_init iptable_raw_net_init(struct net *net) { - /* Register table */ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_raw); + if (repl == NULL) + return -ENOMEM; net->ipv4.iptable_raw = - ipt_register_table(net, &packet_raw, &initial_table.repl); + ipt_register_table(net, &packet_raw, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_raw)) return PTR_ERR(net->ipv4.iptable_raw); return 0; @@ -100,7 +78,7 @@ static int __net_init iptable_raw_net_init(struct net *net) static void __net_exit iptable_raw_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_raw); + ipt_unregister_table(net, net->ipv4.iptable_raw); } static struct pernet_operations iptable_raw_net_ops = { diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 99eb76c6..5888738e 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -27,36 +27,6 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "security", - .valid_hooks = SECURITY_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - .underflow = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, @@ -127,9 +97,14 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { static int __net_init iptable_security_net_init(struct net *net) { - net->ipv4.iptable_security = - ipt_register_table(net, &security_table, &initial_table.repl); + struct ipt_replace *repl; + repl = ipt_alloc_initial_table(&security_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.iptable_security = + ipt_register_table(net, &security_table, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_security)) return PTR_ERR(net->ipv4.iptable_security); @@ -138,7 +113,7 @@ static int __net_init iptable_security_net_init(struct net *net) static void __net_exit iptable_security_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_security); + ipt_unregister_table(net, net->ipv4.iptable_security); } static struct pernet_operations iptable_security_net_ops = { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 1032a152..85bdd89e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -100,7 +101,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); @@ -274,7 +275,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(sock_net(sk), &tuple); + h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index d71ba767..e962e953 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; @@ -114,13 +115,14 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct net *net, struct sk_buff *skb, +icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; NF_CT_ASSERT(skb->nfct == NULL); @@ -146,7 +148,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(net, &innertuple); + h = nf_conntrack_find_get(net, zone, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; @@ -163,7 +165,8 @@ icmp_error_message(struct net *net, struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int -icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, +icmp_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; @@ -208,7 +211,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, && icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(net, skb, ctinfo, hooknum); + return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 331ead3e..cd9d5da1 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -16,6 +16,7 @@ #include #include +#include #include /* Returns new sk_buff, or NULL */ @@ -38,15 +39,20 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, struct sk_buff *skb) { + u16 zone = NF_CT_DEFAULT_ZONE; + + if (skb->nfct) + zone = nf_ct_zone((struct nf_conn *)skb->nfct); + #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) - return IP_DEFRAG_CONNTRACK_BRIDGE_IN; + return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; #endif if (hooknum == NF_INET_PRE_ROUTING) - return IP_DEFRAG_CONNTRACK_IN; + return IP_DEFRAG_CONNTRACK_IN + zone; else - return IP_DEFRAG_CONNTRACK_OUT; + return IP_DEFRAG_CONNTRACK_OUT + zone; } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, @@ -59,7 +65,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if (skb->nfct) + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; #endif #endif diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 26066a23..da21dcb9 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -30,6 +30,7 @@ #include #include #include +#include static DEFINE_SPINLOCK(nf_nat_lock); @@ -69,13 +70,14 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { unsigned int hash; /* Original src, to ensure we map it consistently if poss. */ hash = jhash_3words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->src.u.all, + (__force u32)tuple->src.u.all ^ zone, tuple->dst.protonum, 0); return ((u64)hash * net->ipv4.nat_htable_size) >> 32; } @@ -139,12 +141,12 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(struct net *net, +find_appropriate_src(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(net, tuple); + unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; const struct hlist_node *n; @@ -152,7 +154,7 @@ find_appropriate_src(struct net *net, rcu_read_lock(); hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { ct = nat->ct; - if (same_src(ct, tuple)) { + if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -175,7 +177,7 @@ find_appropriate_src(struct net *net, the ip with the lowest src-ip/dst-ip/proto usage. */ static void -find_best_ips_proto(struct nf_conntrack_tuple *tuple, +find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) @@ -209,7 +211,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, range->flags & IP_NAT_RANGE_PERSISTENT ? - 0 : (__force u32)tuple->dst.u3.ip, 0); + 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); } @@ -229,6 +231,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, { struct net *net = nf_ct_net(ct); const struct nf_nat_protocol *proto; + u16 zone = nf_ct_zone(ct); /* 1) If this srcip/proto/src-proto-part is currently mapped, and that same mapping gives a unique tuple within the given @@ -239,7 +242,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC && !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (find_appropriate_src(net, orig_tuple, tuple, range)) { + if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) return; @@ -249,7 +252,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, /* 2) Select the least-used IP/proto combination in the given range. */ *tuple = *orig_tuple; - find_best_ips_proto(tuple, range, ct, maniptype); + find_best_ips_proto(zone, tuple, range, ct, maniptype); /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ @@ -327,7 +330,8 @@ nf_nat_setup_info(struct nf_conn *ct, if (have_to_hash) { unsigned int srchash; - srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + srchash = hash_by_src(net, nf_ct_zone(ct), + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); @@ -444,7 +448,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, /* Must be RELATED */ NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || - skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + skb->nfctinfo == IP_CT_RELATED_REPLY); /* Redirects on non-null nats must be dropped, else they'll start talking to each other without our translation, and be @@ -737,7 +741,7 @@ static int __init nf_nat_init(void) spin_unlock_bh(&nf_nat_lock); /* Initialize fake conntrack so that NAT will skip it */ - nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; + nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 9eb17105..4c060038 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -74,7 +75,7 @@ static void pptp_nat_expected(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); - other_exp = nf_ct_expect_find_get(net, &t); + other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 6c4f11f5..2d5073ac 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 9e81e0df..e06fd0c6 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -28,36 +28,6 @@ (1 << NF_INET_POST_ROUTING) | \ (1 << NF_INET_LOCAL_OUT)) -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} nat_initial_table __net_initdata = { - .repl = { - .name = "nat", - .valid_hooks = NAT_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, @@ -67,7 +37,7 @@ static const struct xt_table nat_table = { /* Source NAT */ static unsigned int -ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) +ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -79,14 +49,14 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); NF_CT_ASSERT(par->out != NULL); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); } static unsigned int -ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) +ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -186,8 +156,13 @@ static struct xt_target ipt_dnat_reg __read_mostly = { static int __net_init nf_nat_rule_net_init(struct net *net) { - net->ipv4.nat_table = ipt_register_table(net, &nat_table, - &nat_initial_table.repl); + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&nat_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl); + kfree(repl); if (IS_ERR(net->ipv4.nat_table)) return PTR_ERR(net->ipv4.nat_table); return 0; @@ -195,7 +170,7 @@ static int __net_init nf_nat_rule_net_init(struct net *net) static void __net_exit nf_nat_rule_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.nat_table); + ipt_unregister_table(net, net->ipv4.nat_table); } static struct pernet_operations nf_nat_rule_net_ops = { diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 5f41d017..dfea18bc 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -97,7 +97,7 @@ nf_nat_fn(unsigned int hooknum, return NF_ACCEPT; /* Don't try to NAT if this packet is not conntracked */ - if (ct == &nf_conntrack_untracked) + if (nf_ct_is_untracked(ct)) return NF_ACCEPT; nat = nfct_nat(ct); @@ -114,7 +114,7 @@ nf_nat_fn(unsigned int hooknum, switch (ctinfo) { case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, hooknum, skb)) @@ -149,7 +149,7 @@ nf_nat_fn(unsigned int hooknum, default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); + ctinfo == IP_CT_ESTABLISHED_REPLY); } return nf_nat_packet(ct, ctinfo, hooknum, skb); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5b1050a5..4cb5624d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -107,6 +107,7 @@ #ifdef CONFIG_SYSCTL #include #endif +#include #define RT_FL_TOS(oldflp) \ ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) @@ -287,12 +288,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) if (!rt_hash_table[st->bucket].chain) continue; rcu_read_lock_bh(); - r = rcu_dereference(rt_hash_table[st->bucket].chain); + r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); while (r) { if (dev_net(r->u.dst.dev) == seq_file_net(seq) && r->rt_genid == st->genid) return r; - r = rcu_dereference(r->u.dst.rt_next); + r = rcu_dereference_bh(r->u.dst.rt_next); } rcu_read_unlock_bh(); } @@ -314,7 +315,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, rcu_read_lock_bh(); r = rt_hash_table[st->bucket].chain; } - return rcu_dereference(r); + return rcu_dereference_bh(r); } static struct rtable *rt_cache_get_next(struct seq_file *seq, @@ -2685,8 +2686,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); rcu_read_lock_bh(); - for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->u.dst.rt_next)) { + for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; + rth = rcu_dereference_bh(rth->u.dst.rt_next)) { if (rth->fl.fl4_dst == flp->fl4_dst && rth->fl.fl4_src == flp->fl4_src && rth->fl.iif == 0 && @@ -2712,6 +2713,11 @@ slow_output: EXPORT_SYMBOL_GPL(__ip_route_output_key); +static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) +{ + return NULL; +} + static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -2720,7 +2726,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, - .check = ipv4_dst_check, + .check = ipv4_blackhole_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .entries = ATOMIC_INIT(0), }; @@ -3004,8 +3010,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!rt_hash_table[h].chain) continue; rcu_read_lock_bh(); - for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference(rt->u.dst.rt_next), idx++) { + for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; + rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) continue; if (rt_is_expired(rt)) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1813bc7..96e3b5eb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -265,6 +265,10 @@ #include #include +#ifdef CONFIG_UID_STAT +#include +#endif + #include #include #include @@ -386,8 +390,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ mask = 0; - if (sk->sk_err) - mask = POLLERR; /* * POLLHUP is certainly not done right. But poll() doesn't @@ -451,11 +453,17 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) mask |= POLLOUT | POLLWRNORM; } - } + } else + mask |= POLLOUT | POLLWRNORM; if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; } + /* This barrier is coupled with smp_wmb() in tcp_reset() */ + smp_rmb(); + if (sk->sk_err) + mask |= POLLERR; + return mask; } @@ -934,7 +942,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, goto out_err; while (--iovlen >= 0) { - int seglen = iov->iov_len; + size_t seglen = iov->iov_len; unsigned char __user *from = iov->iov_base; iov++; @@ -1098,6 +1106,11 @@ out: tcp_push(sk, flags, mss_now, tp->nonagle); TCP_CHECK_TIMER(sk); release_sock(sk); + +#ifdef CONFIG_UID_STAT + if (copied > 0) + update_tcp_snd(current_uid(), copied); +#endif return copied; do_fault: @@ -1334,14 +1347,19 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_eat_skb(sk, skb, 0); if (!desc->count) break; + tp->copied_seq = seq; } tp->copied_seq = seq; tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ - if (copied > 0) + if (copied > 0) { tcp_cleanup_rbuf(sk, copied); +#ifdef CONFIG_UID_STAT + update_tcp_rcv(current_uid(), copied); +#endif + } return copied; } @@ -1737,6 +1755,10 @@ skip_copy: TCP_CHECK_TIMER(sk); release_sock(sk); +#ifdef CONFIG_UID_STAT + if (copied > 0) + update_tcp_rcv(current_uid(), copied); +#endif return copied; out: @@ -1746,6 +1768,10 @@ out: recv_urg: err = tcp_recv_urg(sk, msg, len, flags); +#ifdef CONFIG_UID_STAT + if (err > 0) + update_tcp_rcv(current_uid(), err); +#endif goto out; } @@ -1975,11 +2001,8 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - int orphan_count = percpu_counter_read_positive( - sk->sk_prot->orphan_count); - sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, orphan_count)) { + if (tcp_too_many_orphans(sk, 0)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); @@ -2115,7 +2138,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ - if (val < 8 || val > MAX_TCP_WINDOW) { + if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { err = -EINVAL; break; } @@ -2880,7 +2903,7 @@ void __init tcp_init(void) { struct sk_buff *skb = NULL; unsigned long nr_pages, limit; - int order, i, max_share; + int i, max_share, cnt; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -2929,31 +2952,23 @@ void __init tcp_init(void) INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); } - /* Try to be a bit smarter and adjust defaults depending - * on available memory. - */ - for (order = 0; ((1 << order) << PAGE_SHIFT) < - (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); - order++) - ; - if (order >= 4) { - tcp_death_row.sysctl_max_tw_buckets = 180000; - sysctl_tcp_max_orphans = 4096 << (order - 4); - sysctl_max_syn_backlog = 1024; - } else if (order < 3) { - tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); - sysctl_tcp_max_orphans >>= (3 - order); - sysctl_max_syn_backlog = 128; - } + + cnt = tcp_hashinfo.ehash_size; + + tcp_death_row.sysctl_max_tw_buckets = cnt / 2; + sysctl_tcp_max_orphans = cnt / 2; + sysctl_max_syn_backlog = max(128, cnt / 256); /* Set the pressure threshold to be a fraction of global memory that * is up to 1/2 at 256 MB, decreasing toward zero with the amount of - * memory, with a floor of 128 pages. + * memory, with a floor of 128 pages, and a ceiling that prevents an + * integer overflow. */ nr_pages = totalram_pages - totalhigh_pages; limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); + limit = min(limit, INT_MAX * 4UL / 3 / 2); sysctl_tcp_mem[0] = limit / 4 * 3; sysctl_tcp_mem[1] = limit; sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2433bcde..ce1ce82e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3969,6 +3969,8 @@ static void tcp_reset(struct sock *sk) default: sk->sk_err = ECONNRESET; } + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 315805d1..f7382594 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5608cbd8..af83bdf5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -231,8 +231,6 @@ void tcp_select_initial_window(int __space, __u32 mss, if (*rcv_wnd > init_cwnd * mss) *rcv_wnd = init_cwnd * mss; } - /* Lock the initial TCP window size to 64K*/ - *rcv_wnd = 64240; /* Set the clamp no higher than max representable value */ (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); @@ -2039,6 +2037,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) int mib_idx; int fwd_rexmitting = 0; + if (!tp->packets_out) + return; + if (!tp->lost_out) tp->retransmit_high = tp->snd_una; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cdb2ca76..57d55019 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -65,18 +65,18 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = percpu_counter_read_positive(&tcp_orphan_count); + int shift = 0; /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) - orphans <<= 1; + shift++; /* If some dubious ICMP arrived, penalize even more. */ if (sk->sk_err_soft) - orphans <<= 1; + shift++; - if (tcp_too_many_orphans(sk, orphans)) { + if (tcp_too_many_orphans(sk, shift)) { if (net_ratelimit()) printk(KERN_INFO "Out of socket memory\n"); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c322f442..0ac88332 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1011,6 +1011,9 @@ csum_copy_err: if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } @@ -1832,12 +1835,14 @@ void __init udp_init(void) udp_table_init(&udp_table); /* Set the pressure threshold up by the same strategy of TCP. It is a * fraction of global memory that is up to 1/2 at 256 MB, decreasing - * toward zero with the amount of memory, with a floor of 128 pages. + * toward zero with the amount of memory, with a floor of 128 pages, + * and a ceiling that prevents an integer overflow. */ nr_pages = totalram_pages - totalhigh_pages; limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); + limit = min(limit, INT_MAX * 4UL / 3 / 2); sysctl_udp_mem[0] = limit / 4 * 3; sysctl_udp_mem[1] = limit; sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 74fb2eb8..e3a9a655 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -71,7 +71,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/ xdst->u.rt.fl.fl4_dst == fl->fl4_dst && xdst->u.rt.fl.fl4_src == fl->fl4_src && - xdst->u.rt.fl.fl4_tos == fl->fl4_tos && + !((xdst->u.rt.fl.fl4_tos ^ fl->fl4_tos) & IPTOS_RT_MASK) && xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) { dst_clone(dst); break; @@ -83,7 +83,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) static int xfrm4_get_tos(struct flowi *fl) { - return fl->fl4_tos; + return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ } static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d1f77cce..8ac3d091 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -407,9 +407,6 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) dev->type == ARPHRD_TUNNEL6 || dev->type == ARPHRD_SIT || dev->type == ARPHRD_NONE) { - printk(KERN_INFO - "%s: Disabled Privacy Extensions\n", - dev->name); ndev->cnf.use_tempaddr = -1; } else { in6_dev_hold(ndev); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2f7c3bf9..ecd2cb52 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -121,8 +121,11 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) int try_loading_module = 0; int err; +//SU660 hobbes.song socket permission denied workaround +#if !(defined(STAR_COUNTRY_KR) && defined(STAR_OPERATOR_SKT)) if (!current_has_network()) return -EACCES; +#endif if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1bcc3431..093e9b23 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -20,6 +20,7 @@ #include #include #include +#include #include void __inet6_hash(struct sock *sk) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cd48801a..eca3ef77 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -643,7 +643,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (skb_has_frags(skb)) { int first_len = skb_pagelen(skb); - int truesizes = 0; + struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -655,18 +655,18 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) - goto slow_path; + goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) - goto slow_path; + goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; - truesizes += frag->truesize; } + skb->truesize -= frag->truesize; } err = 0; @@ -697,7 +697,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); - skb->truesize -= truesizes; skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); @@ -760,6 +759,15 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) IPSTATS_MIB_FRAGFAILS); dst_release(&rt->u.dst); return err; + +slow_path_clean: + skb_walk_frags(skb, frag2) { + if (frag2 == frag) + break; + frag2->sk = NULL; + frag2->destructor = NULL; + skb->truesize += frag2->truesize; + } } slow_path: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c595bbe1..9a95c825 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -56,6 +56,7 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETDEV("ip6tnl0"); #define IPV6_TLV_TEL_DST_SIZE 8 diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 29d643bc..e5f6edc2 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -5,10 +5,15 @@ menu "IPv6: Netfilter Configuration" depends on INET && IPV6 && NETFILTER +config NF_DEFRAG_IPV6 + tristate + default n + config NF_CONNTRACK_IPV6 tristate "IPv6 connection tracking support" depends on INET && IPV6 && NF_CONNTRACK default m if NETFILTER_ADVANCED=n + select NF_DEFRAG_IPV6 ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1de56fdf..378a1714 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -29,6 +29,7 @@ #include #include #include +#include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); @@ -67,6 +68,12 @@ do { \ #define inline #endif +void *ip6t_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(ip6t, IP6T); +} +EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table); + /* We keep a set of rules for each CPU, so we can avoid write-locking them in the softirq when updating the counters and therefore @@ -190,7 +197,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6) } static unsigned int -ip6t_error(struct sk_buff *skb, const struct xt_target_param *par) +ip6t_error(struct sk_buff *skb, const struct xt_action_param *par) { if (net_ratelimit()) printk("ip6_tables: error: `%s'\n", @@ -199,23 +206,8 @@ ip6t_error(struct sk_buff *skb, const struct xt_target_param *par) return NF_DROP; } -/* Performance critical - called for every packet */ -static inline bool -do_match(struct ip6t_entry_match *m, const struct sk_buff *skb, - struct xt_match_param *par) -{ - par->match = m->u.kernel.match; - par->matchinfo = m->data; - - /* Stop iteration if it doesn't match */ - if (!m->u.kernel.match->match(skb, par)) - return true; - else - return false; -} - static inline struct ip6t_entry * -get_entry(void *base, unsigned int offset) +get_entry(const void *base, unsigned int offset) { return (struct ip6t_entry *)(base + offset); } @@ -229,6 +221,12 @@ static inline bool unconditional(const struct ip6t_ip6 *ipv6) return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; } +static inline const struct ip6t_entry_target * +ip6t_get_target_c(const struct ip6t_entry *e) +{ + return ip6t_get_target((struct ip6t_entry *)e); +} + #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* This cries for unification! */ @@ -264,11 +262,11 @@ static struct nf_loginfo trace_loginfo = { /* Mildly perf critical (only if packet tracing is on) */ static inline int -get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, +get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, const char *hookname, const char **chainname, const char **comment, unsigned int *rulenum) { - struct ip6t_standard_target *t = (void *)ip6t_get_target(s); + const struct ip6t_standard_target *t = (void *)ip6t_get_target_c(s); if (strcmp(t->target.u.kernel.target->name, IP6T_ERROR_TARGET) == 0) { /* Head of user chain: ERROR target with chainname */ @@ -294,17 +292,18 @@ get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, return 0; } -static void trace_packet(struct sk_buff *skb, +static void trace_packet(const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, const char *tablename, - struct xt_table_info *private, - struct ip6t_entry *e) + const struct xt_table_info *private, + const struct ip6t_entry *e) { - void *table_base; + const void *table_base; const struct ip6t_entry *root; const char *hookname, *chainname, *comment; + const struct ip6t_entry *iter; unsigned int rulenum = 0; table_base = private->entries[smp_processor_id()]; @@ -313,10 +312,10 @@ static void trace_packet(struct sk_buff *skb, hookname = chainname = hooknames[hook]; comment = comments[NF_IP6_TRACE_COMMENT_RULE]; - IP6T_ENTRY_ITERATE(root, - private->size - private->hook_entry[hook], - get_chainname_rulenum, - e, hookname, &chainname, &comment, &rulenum); + xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) + if (get_chainname_rulenum(iter, e, hookname, + &chainname, &comment, &rulenum) != 0) + break; nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", @@ -338,18 +337,16 @@ ip6t_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table) { -#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom - static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; - void *table_base; - struct ip6t_entry *e, *back; - struct xt_table_info *private; - struct xt_match_param mtpar; - struct xt_target_param tgpar; + const void *table_base; + struct ip6t_entry *e, **jumpstack; + unsigned int *stackptr, origptr, cpu; + const struct xt_table_info *private; + struct xt_action_param acpar; /* Initialization */ indev = in ? in->name : nulldevname; @@ -360,40 +357,48 @@ ip6t_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ - mtpar.hotdrop = &hotdrop; - mtpar.in = tgpar.in = in; - mtpar.out = tgpar.out = out; - mtpar.family = tgpar.family = NFPROTO_IPV6; - mtpar.hooknum = tgpar.hooknum = hook; + acpar.hotdrop = &hotdrop; + acpar.in = in; + acpar.out = out; + acpar.family = NFPROTO_IPV6; + acpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); xt_info_rdlock_bh(); private = table->private; - table_base = private->entries[smp_processor_id()]; + cpu = smp_processor_id(); + table_base = private->entries[cpu]; + jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; + stackptr = &private->stackptr[cpu]; + origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); - /* For return from builtin chain */ - back = get_entry(table_base, private->underflow[hook]); - do { - struct ip6t_entry_target *t; + const struct ip6t_entry_target *t; + const struct xt_entry_match *ematch; IP_NF_ASSERT(e); - IP_NF_ASSERT(back); if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, - &mtpar.thoff, &mtpar.fragoff, &hotdrop) || - IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) { + &acpar.thoff, &acpar.fragoff, &hotdrop)) { + no_match: e = ip6t_next_entry(e); continue; } + xt_ematch_foreach(ematch, e) { + acpar.match = ematch->u.kernel.match; + acpar.matchinfo = ematch->data; + if (!acpar.match->match(skb, &acpar)) + goto no_match; + } + ADD_COUNTER(e->counters, ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr), 1); - t = ip6t_get_target(e); + t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ @@ -414,41 +419,30 @@ ip6t_do_table(struct sk_buff *skb, verdict = (unsigned)(-v) - 1; break; } - e = back; - back = get_entry(table_base, back->comefrom); + if (*stackptr == 0) + e = get_entry(table_base, + private->underflow[hook]); + else + e = ip6t_next_entry(jumpstack[--*stackptr]); continue; } - if (table_base + v != ip6t_next_entry(e) - && !(e->ipv6.flags & IP6T_F_GOTO)) { - /* Save old back ptr in next entry */ - struct ip6t_entry *next = ip6t_next_entry(e); - next->comefrom = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + if (table_base + v != ip6t_next_entry(e) && + !(e->ipv6.flags & IP6T_F_GOTO)) { + if (*stackptr >= private->stacksize) { + verdict = NF_DROP; + break; + } + jumpstack[(*stackptr)++] = e; } e = get_entry(table_base, v); continue; } - /* Targets which reenter must return - abs. verdicts */ - tgpar.target = t->u.kernel.target; - tgpar.targinfo = t->data; + acpar.target = t->u.kernel.target; + acpar.targinfo = t->data; -#ifdef CONFIG_NETFILTER_DEBUG - tb_comefrom = 0xeeeeeeec; -#endif - verdict = t->u.kernel.target->target(skb, &tgpar); - -#ifdef CONFIG_NETFILTER_DEBUG - if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) { - printk("Target %s reentered!\n", - t->u.kernel.target->name); - verdict = NF_DROP; - } - tb_comefrom = 0x57acc001; -#endif + verdict = t->u.kernel.target->target(skb, &acpar); if (verdict == IP6T_CONTINUE) e = ip6t_next_entry(e); else @@ -456,10 +450,8 @@ ip6t_do_table(struct sk_buff *skb, break; } while (!hotdrop); -#ifdef CONFIG_NETFILTER_DEBUG - tb_comefrom = NETFILTER_LINK_POISON; -#endif xt_info_rdunlock_bh(); + *stackptr = origptr; #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -468,14 +460,12 @@ ip6t_do_table(struct sk_buff *skb, return NF_DROP; else return verdict; #endif - -#undef tb_comefrom } /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int -mark_source_chains(struct xt_table_info *newinfo, +mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { unsigned int hook; @@ -493,8 +483,8 @@ mark_source_chains(struct xt_table_info *newinfo, e->counters.pcnt = pos; for (;;) { - struct ip6t_standard_target *t - = (void *)ip6t_get_target(e); + const struct ip6t_standard_target *t + = (void *)ip6t_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { @@ -584,27 +574,23 @@ mark_source_chains(struct xt_table_info *newinfo, return 1; } -static int -cleanup_match(struct ip6t_entry_match *m, unsigned int *i) +static void cleanup_match(struct ip6t_entry_match *m, struct net *net) { struct xt_mtdtor_param par; - if (i && (*i)-- == 0) - return 1; - + par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV6; if (par.match->destroy != NULL) par.match->destroy(&par); module_put(par.match->me); - return 0; } static int -check_entry(struct ip6t_entry *e, const char *name) +check_entry(const struct ip6t_entry *e, const char *name) { - struct ip6t_entry_target *t; + const struct ip6t_entry_target *t; if (!ip6_checkentry(&e->ipv6)) { duprintf("ip_tables: ip check failed %p %s.\n", e, name); @@ -615,15 +601,14 @@ check_entry(struct ip6t_entry *e, const char *name) e->next_offset) return -EINVAL; - t = ip6t_get_target(e); + t = ip6t_get_target_c(e); if (e->target_offset + t->u.target_size > e->next_offset) return -EINVAL; return 0; } -static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par, - unsigned int *i) +static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; int ret; @@ -638,27 +623,24 @@ static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par, par.match->name); return ret; } - ++*i; return 0; } static int -find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par, - unsigned int *i) +find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par) { struct xt_match *match; int ret; - match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, - m->u.user.revision), - "ip6t_%s", m->u.user.name); - if (IS_ERR(match) || !match) { + match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, + m->u.user.revision); + if (IS_ERR(match)) { duprintf("find_check_match: `%s' not found\n", m->u.user.name); - return match ? PTR_ERR(match) : -ENOENT; + return PTR_ERR(match); } m->u.kernel.match = match; - ret = check_match(m, par, i); + ret = check_match(m, par); if (ret) goto err; @@ -668,10 +650,11 @@ err: return ret; } -static int check_target(struct ip6t_entry *e, const char *name) +static int check_target(struct ip6t_entry *e, struct net *net, const char *name) { struct ip6t_entry_target *t = ip6t_get_target(e); struct xt_tgchk_param par = { + .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, @@ -693,61 +676,66 @@ static int check_target(struct ip6t_entry *e, const char *name) } static int -find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, - unsigned int *i) +find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, + unsigned int size) { struct ip6t_entry_target *t; struct xt_target *target; int ret; unsigned int j; struct xt_mtchk_param mtpar; + struct xt_entry_match *ematch; ret = check_entry(e, name); if (ret) return ret; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ipv6; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV6; - ret = IP6T_MATCH_ITERATE(e, find_check_match, &mtpar, &j); - if (ret != 0) - goto cleanup_matches; + xt_ematch_foreach(ematch, e) { + ret = find_check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } t = ip6t_get_target(e); - target = try_then_request_module(xt_find_target(AF_INET6, - t->u.user.name, - t->u.user.revision), - "ip6t_%s", t->u.user.name); - if (IS_ERR(target) || !target) { + target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { duprintf("find_check_entry: `%s' not found\n", t->u.user.name); - ret = target ? PTR_ERR(target) : -ENOENT; + ret = PTR_ERR(target); goto cleanup_matches; } t->u.kernel.target = target; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto err; - - (*i)++; return 0; err: module_put(t->u.kernel.target->me); cleanup_matches: - IP6T_MATCH_ITERATE(e, cleanup_match, &j); + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } return ret; } -static bool check_underflow(struct ip6t_entry *e) +static bool check_underflow(const struct ip6t_entry *e) { const struct ip6t_entry_target *t; unsigned int verdict; if (!unconditional(&e->ipv6)) return false; - t = ip6t_get_target(e); + t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct ip6t_standard_target *)t)->verdict; @@ -758,12 +746,11 @@ static bool check_underflow(struct ip6t_entry *e) static int check_entry_size_and_hooks(struct ip6t_entry *e, struct xt_table_info *newinfo, - unsigned char *base, - unsigned char *limit, + const unsigned char *base, + const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, - unsigned int valid_hooks, - unsigned int *i) + unsigned int valid_hooks) { unsigned int h; @@ -800,50 +787,41 @@ check_entry_size_and_hooks(struct ip6t_entry *e, /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; - - (*i)++; return 0; } -static int -cleanup_entry(struct ip6t_entry *e, unsigned int *i) +static void cleanup_entry(struct ip6t_entry *e, struct net *net) { struct xt_tgdtor_param par; struct ip6t_entry_target *t; - - if (i && (*i)-- == 0) - return 1; + struct xt_entry_match *ematch; /* Cleanup all matches */ - IP6T_MATCH_ITERATE(e, cleanup_match, NULL); + xt_ematch_foreach(ematch, e) + cleanup_match(ematch, net); t = ip6t_get_target(e); + par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV6; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - return 0; } /* Checks and translates the user-supplied table segment (held in newinfo) */ static int -translate_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info *newinfo, - void *entry0, - unsigned int size, - unsigned int number, - const unsigned int *hook_entries, - const unsigned int *underflows) +translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, + const struct ip6t_replace *repl) { + struct ip6t_entry *iter; unsigned int i; - int ret; + int ret = 0; - newinfo->size = size; - newinfo->number = number; + newinfo->size = repl->size; + newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -854,49 +832,59 @@ translate_table(const char *name, duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ - ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, - check_entry_size_and_hooks, - newinfo, - entry0, - entry0 + size, - hook_entries, underflows, valid_hooks, &i); - if (ret != 0) - return ret; + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, repl->hook_entry, repl->underflow, + repl->valid_hooks); + if (ret != 0) + return ret; + ++i; + if (strcmp(ip6t_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } - if (i != number) { + if (i != repl->num_entries) { duprintf("translate_table: %u not %u entries\n", - i, number); + i, repl->num_entries); return -EINVAL; } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, repl->hook_entry[i]); return -EINVAL; } if (newinfo->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, repl->underflow[i]); return -EINVAL; } } - if (!mark_source_chains(newinfo, valid_hooks, entry0)) + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) return -ELOOP; /* Finally, each sanity check must pass */ i = 0; - ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, - find_check_entry, name, size, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, net, repl->name, repl->size); + if (ret != 0) + break; + ++i; + } if (ret != 0) { - IP6T_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter, net); + } return ret; } @@ -909,33 +897,11 @@ translate_table(const char *name, return ret; } -/* Gets counters. */ -static inline int -add_entry_to_counter(const struct ip6t_entry *e, - struct xt_counters total[], - unsigned int *i) -{ - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - -static inline int -set_entry_to_counter(const struct ip6t_entry *e, - struct ip6t_counters total[], - unsigned int *i) -{ - SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { + struct ip6t_entry *iter; unsigned int cpu; unsigned int i; unsigned int curcpu; @@ -951,32 +917,32 @@ get_counters(const struct xt_table_info *t, curcpu = smp_processor_id(); i = 0; - IP6T_ENTRY_ITERATE(t->entries[curcpu], - t->size, - set_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[curcpu], t->size) { + SET_COUNTER(counters[i], iter->counters.bcnt, + iter->counters.pcnt); + ++i; + } for_each_possible_cpu(cpu) { if (cpu == curcpu) continue; i = 0; xt_info_wrlock(cpu); - IP6T_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[cpu], t->size) { + ADD_COUNTER(counters[i], iter->counters.bcnt, + iter->counters.pcnt); + ++i; + } xt_info_wrunlock(cpu); } local_bh_enable(); } -static struct xt_counters *alloc_counters(struct xt_table *table) +static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -994,11 +960,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) static int copy_entries_to_user(unsigned int total_size, - struct xt_table *table, + const struct xt_table *table, void __user *userptr) { unsigned int off, num; - struct ip6t_entry *e; + const struct ip6t_entry *e; struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; @@ -1050,7 +1016,7 @@ copy_entries_to_user(unsigned int total_size, } } - t = ip6t_get_target(e); + t = ip6t_get_target_c(e); if (copy_to_user(userptr + off + e->target_offset + offsetof(struct ip6t_entry_target, u.user.name), @@ -1085,25 +1051,20 @@ static int compat_standard_to_user(void __user *dst, void *src) return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } -static inline int -compat_calc_match(struct ip6t_entry_match *m, int *size) -{ - *size += xt_compat_match_offset(m->u.kernel.match); - return 0; -} - -static int compat_calc_entry(struct ip6t_entry *e, +static int compat_calc_entry(const struct ip6t_entry *e, const struct xt_table_info *info, - void *base, struct xt_table_info *newinfo) + const void *base, struct xt_table_info *newinfo) { - struct ip6t_entry_target *t; + const struct xt_entry_match *ematch; + const struct ip6t_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); entry_offset = (void *)e - base; - IP6T_MATCH_ITERATE(e, compat_calc_match, &off); - t = ip6t_get_target(e); + xt_ematch_foreach(ematch, e) + off += xt_compat_match_offset(ematch->u.kernel.match); + t = ip6t_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(AF_INET6, entry_offset, off); @@ -1124,7 +1085,9 @@ static int compat_calc_entry(struct ip6t_entry *e, static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { + struct ip6t_entry *iter; void *loc_cpu_entry; + int ret; if (!newinfo || !info) return -EINVAL; @@ -1133,13 +1096,17 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; - return IP6T_ENTRY_ITERATE(loc_cpu_entry, info->size, - compat_calc_entry, info, loc_cpu_entry, - newinfo); + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; } #endif -static int get_info(struct net *net, void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, + const int *len, int compat) { char name[IP6T_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -1199,7 +1166,8 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) } static int -get_entries(struct net *net, struct ip6t_get_entries __user *uptr, int *len) +get_entries(struct net *net, struct ip6t_get_entries __user *uptr, + const int *len) { int ret; struct ip6t_get_entries get; @@ -1247,6 +1215,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *oldinfo; struct xt_counters *counters; const void *loc_cpu_old_entry; + struct ip6t_entry *iter; ret = 0; counters = vmalloc_node(num_counters * sizeof(struct xt_counters), @@ -1290,8 +1259,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, - NULL); + xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + cleanup_entry(iter, net); + xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) @@ -1310,12 +1280,13 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, } static int -do_replace(struct net *net, void __user *user, unsigned int len) +do_replace(struct net *net, const void __user *user, unsigned int len) { int ret; struct ip6t_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; + struct ip6t_entry *iter; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1323,6 +1294,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1336,9 +1308,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, - tmp.hook_entry, tmp.underflow); + ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -1351,27 +1321,15 @@ do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ static int -add_counter_to_entry(struct ip6t_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -static int -do_add_counters(struct net *net, void __user *user, unsigned int len, +do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { unsigned int i, curcpu; @@ -1385,6 +1343,7 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, const struct xt_table_info *private; int ret = 0; const void *loc_cpu_entry; + struct ip6t_entry *iter; #ifdef CONFIG_COMPAT struct compat_xt_counters_info compat_tmp; @@ -1443,11 +1402,10 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, curcpu = smp_processor_id(); xt_info_wrlock(curcpu); loc_cpu_entry = private->entries[curcpu]; - IP6T_ENTRY_ITERATE(loc_cpu_entry, - private->size, - add_counter_to_entry, - paddc, - &i); + xt_entry_foreach(iter, loc_cpu_entry, private->size) { + ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } xt_info_wrunlock(curcpu); unlock_up_free: @@ -1476,45 +1434,40 @@ struct compat_ip6t_replace { static int compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, unsigned int *size, struct xt_counters *counters, - unsigned int *i) + unsigned int i) { struct ip6t_entry_target *t; struct compat_ip6t_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; - int ret; + const struct xt_entry_match *ematch; + int ret = 0; - ret = -EFAULT; origsize = *size; ce = (struct compat_ip6t_entry __user *)*dstptr; - if (copy_to_user(ce, e, sizeof(struct ip6t_entry))) - goto out; - - if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) - goto out; + if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; *dstptr += sizeof(struct compat_ip6t_entry); *size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); - ret = IP6T_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size); + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_to_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } target_offset = e->target_offset - (origsize - *size); - if (ret) - goto out; t = ip6t_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) - goto out; - ret = -EFAULT; + return ret; next_offset = e->next_offset - (origsize - *size); - if (put_user(target_offset, &ce->target_offset)) - goto out; - if (put_user(next_offset, &ce->next_offset)) - goto out; - - (*i)++; + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; return 0; -out: - return ret; } static int @@ -1522,61 +1475,45 @@ compat_find_calc_match(struct ip6t_entry_match *m, const char *name, const struct ip6t_ip6 *ipv6, unsigned int hookmask, - int *size, unsigned int *i) + int *size) { struct xt_match *match; - match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, - m->u.user.revision), - "ip6t_%s", m->u.user.name); - if (IS_ERR(match) || !match) { + match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, + m->u.user.revision); + if (IS_ERR(match)) { duprintf("compat_check_calc_match: `%s' not found\n", m->u.user.name); - return match ? PTR_ERR(match) : -ENOENT; + return PTR_ERR(match); } m->u.kernel.match = match; *size += xt_compat_match_offset(match); - - (*i)++; return 0; } -static int -compat_release_match(struct ip6t_entry_match *m, unsigned int *i) -{ - if (i && (*i)-- == 0) - return 1; - - module_put(m->u.kernel.match->me); - return 0; -} - -static int -compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i) +static void compat_release_entry(struct compat_ip6t_entry *e) { struct ip6t_entry_target *t; - - if (i && (*i)-- == 0) - return 1; + struct xt_entry_match *ematch; /* Cleanup all matches */ - COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, NULL); + xt_ematch_foreach(ematch, e) + module_put(ematch->u.kernel.match->me); t = compat_ip6t_get_target(e); module_put(t->u.kernel.target->me); - return 0; } static int check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, - unsigned char *base, - unsigned char *limit, - unsigned int *hook_entries, - unsigned int *underflows, - unsigned int *i, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, const char *name) { + struct xt_entry_match *ematch; struct ip6t_entry_target *t; struct xt_target *target; unsigned int entry_offset; @@ -1605,20 +1542,21 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); entry_offset = (void *)e - (void *)base; j = 0; - ret = COMPAT_IP6T_MATCH_ITERATE(e, compat_find_calc_match, name, - &e->ipv6, e->comefrom, &off, &j); - if (ret != 0) - goto release_matches; + xt_ematch_foreach(ematch, e) { + ret = compat_find_calc_match(ematch, name, + &e->ipv6, e->comefrom, &off); + if (ret != 0) + goto release_matches; + ++j; + } t = compat_ip6t_get_target(e); - target = try_then_request_module(xt_find_target(AF_INET6, - t->u.user.name, - t->u.user.revision), - "ip6t_%s", t->u.user.name); - if (IS_ERR(target) || !target) { + target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", t->u.user.name); - ret = target ? PTR_ERR(target) : -ENOENT; + ret = PTR_ERR(target); goto release_matches; } t->u.kernel.target = target; @@ -1640,14 +1578,16 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, /* Clear counters and comefrom */ memset(&e->counters, 0, sizeof(e->counters)); e->comefrom = 0; - - (*i)++; return 0; out: module_put(t->u.kernel.target->me); release_matches: - IP6T_MATCH_ITERATE(e, compat_release_match, &j); + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + module_put(ematch->u.kernel.match->me); + } return ret; } @@ -1661,6 +1601,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, struct ip6t_entry *de; unsigned int origsize; int ret, h; + struct xt_entry_match *ematch; ret = 0; origsize = *size; @@ -1671,10 +1612,11 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); - ret = COMPAT_IP6T_MATCH_ITERATE(e, xt_compat_match_from_user, - dstptr, size); - if (ret) - return ret; + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_from_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); target = t->u.kernel.target; @@ -1690,36 +1632,44 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, return ret; } -static int compat_check_entry(struct ip6t_entry *e, const char *name, - unsigned int *i) +static int compat_check_entry(struct ip6t_entry *e, struct net *net, + const char *name) { unsigned int j; - int ret; + int ret = 0; struct xt_mtchk_param mtpar; + struct xt_entry_match *ematch; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ipv6; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV6; - ret = IP6T_MATCH_ITERATE(e, check_match, &mtpar, &j); + xt_ematch_foreach(ematch, e) { + ret = check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } + + ret = check_target(e, net, name); if (ret) goto cleanup_matches; - - ret = check_target(e, name); - if (ret) - goto cleanup_matches; - - (*i)++; return 0; cleanup_matches: - IP6T_MATCH_ITERATE(e, cleanup_match, &j); + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } return ret; } static int -translate_compat_table(const char *name, +translate_compat_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, @@ -1731,8 +1681,10 @@ translate_compat_table(const char *name, unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; + struct compat_ip6t_entry *iter0; + struct ip6t_entry *iter1; unsigned int size; - int ret; + int ret = 0; info = *pinfo; entry0 = *pentry0; @@ -1749,13 +1701,14 @@ translate_compat_table(const char *name, j = 0; xt_compat_lock(AF_INET6); /* Walk through entries, checking offsets. */ - ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, - check_compat_entry_size_and_hooks, - info, &size, entry0, - entry0 + total_size, - hook_entries, underflows, &j, name); - if (ret != 0) - goto out_unlock; + xt_entry_foreach(iter0, entry0, total_size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, entry0 + total_size, hook_entries, underflows, + name); + if (ret != 0) + goto out_unlock; + ++j; + } ret = -EINVAL; if (j != number) { @@ -1794,9 +1747,12 @@ translate_compat_table(const char *name, entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; size = total_size; - ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, - compat_copy_entry_from_user, - &pos, &size, name, newinfo, entry1); + xt_entry_foreach(iter0, entry0, total_size) { + ret = compat_copy_entry_from_user(iter0, &pos, + &size, name, newinfo, entry1); + if (ret != 0) + break; + } xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); if (ret) @@ -1807,13 +1763,32 @@ translate_compat_table(const char *name, goto free_newinfo; i = 0; - ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name, &i); + xt_entry_foreach(iter1, entry1, newinfo->size) { + ret = compat_check_entry(iter1, net, name); + if (ret != 0) + break; + ++i; + } if (ret) { + /* + * The first i matches need cleanup_entry (calls ->destroy) + * because they had called ->check already. The other j-i + * entries need only release. + */ + int skip = i; j -= i; - COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, - compat_release_entry, &j); - IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + xt_entry_foreach(iter0, entry0, newinfo->size) { + if (skip-- > 0) + continue; + if (j-- == 0) + break; + compat_release_entry(iter0); + } + xt_entry_foreach(iter1, entry1, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter1, net); + } xt_free_table_info(newinfo); return ret; } @@ -1831,7 +1806,11 @@ translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); out: - COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); + xt_entry_foreach(iter0, entry0, total_size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } return ret; out_unlock: xt_compat_flush_offsets(AF_INET6); @@ -1846,6 +1825,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) struct compat_ip6t_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; + struct ip6t_entry *iter; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1855,6 +1835,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1868,7 +1849,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, + ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, &newinfo, &loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); @@ -1884,7 +1865,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1933,6 +1915,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, int ret = 0; const void *loc_cpu_entry; unsigned int i = 0; + struct ip6t_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1945,9 +1928,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - ret = IP6T_ENTRY_ITERATE(loc_cpu_entry, total_size, - compat_copy_entry_to_user, - &pos, &size, counters, &i); + xt_entry_foreach(iter, loc_cpu_entry, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) + break; + } vfree(counters); return ret; @@ -2079,6 +2065,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; if (cmd == IP6T_SO_GET_REVISION_TARGET) target = 1; @@ -2106,8 +2093,7 @@ struct xt_table *ip6t_register_table(struct net *net, { int ret; struct xt_table_info *newinfo; - struct xt_table_info bootstrap - = { 0, 0, 0, { 0 }, { 0 }, { } }; + struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; struct xt_table *new_table; @@ -2121,11 +2107,7 @@ struct xt_table *ip6t_register_table(struct net *net, loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, - newinfo, loc_cpu_entry, repl->size, - repl->num_entries, - repl->hook_entry, - repl->underflow); + ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) goto out_free; @@ -2142,17 +2124,19 @@ out: return ERR_PTR(ret); } -void ip6t_unregister_table(struct xt_table *table) +void ip6t_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; + struct ip6t_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); @@ -2169,7 +2153,7 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, } static bool -icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par) +icmp6_match(const struct sk_buff *skb, const struct xt_action_param *par) { const struct icmp6hdr *ic; struct icmp6hdr _icmph; @@ -2197,7 +2181,7 @@ icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par) } /* Called when user tries to insert an entry of this type. */ -static bool icmp6_checkentry(const struct xt_mtchk_param *par) +static int icmp6_checkentry(const struct xt_mtchk_param *par) { const struct ip6t_icmp *icmpinfo = par->matchinfo; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 7018cac4..90c37aad 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -11,7 +11,6 @@ */ #include -#include #include #include #include @@ -437,7 +436,7 @@ ip6t_log_packet(u_int8_t pf, } static unsigned int -log_tg6(struct sk_buff *skb, const struct xt_target_param *par) +log_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_log_info *loginfo = par->targinfo; struct nf_loginfo li; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 5a7f00cd..844c6b0d 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -43,6 +43,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) int tcphoff, needs_ack; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); struct ipv6hdr *ip6h; +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; struct dst_entry *dst = NULL; u8 proto; struct flowi fl; @@ -95,9 +97,11 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) fl.fl_ip_dport = otcph.source; security_skb_classify_flow(oldskb, &fl); dst = ip6_route_output(net, NULL, &fl); - if (dst == NULL) + if (dst == NULL || dst->error) { + dst_release(dst); return; - if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) + } + if (xfrm_lookup(net, &dst, &fl, NULL, 0)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; @@ -119,7 +123,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); - ip6h->version = 6; + *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20)); ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); ip6h->nexthdr = IPPROTO_TCP; ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); @@ -173,15 +177,12 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, } static unsigned int -reject_tg6(struct sk_buff *skb, const struct xt_target_param *par) +reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_reject_info *reject = par->targinfo; struct net *net = dev_net((par->in != NULL) ? par->in : par->out); pr_debug("%s: medium point\n", __func__); - /* WARNING: This code causes reentry within ip6tables. - This means that the ip6tables jump stack is now crap. We - must return an absolute verdict. --RR */ switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 3a82f247..e08795c6 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -36,7 +36,8 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) return r; } -static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +static bool ah_mt6(const struct sk_buff *skb, + const struct xt_action_param *par) { struct ip_auth_hdr _ah; const struct ip_auth_hdr *ah; @@ -90,7 +91,7 @@ static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par) !(ahinfo->hdrres && ah->reserved); } -static bool ah_mt6_check(const struct xt_mtchk_param *par) +static int ah_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_ah *ahinfo = par->matchinfo; diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index ca287f6d..2fd2be17 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -20,7 +20,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andras Kis-Szabo "); static bool -eui64_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +eui64_mt6(const struct sk_buff *skb, const struct xt_action_param *par) { unsigned char eui64[8]; diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 673aa0a5..f1154a1a 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -35,7 +35,7 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) } static bool -frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +frag_mt6(const struct sk_buff *skb, const struct xt_action_param *par) { struct frag_hdr _frag; const struct frag_hdr *fh; @@ -107,7 +107,7 @@ frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par) && (ntohs(fh->frag_off) & IP6_MF)); } -static bool frag_mt6_check(const struct xt_mtchk_param *par) +static int frag_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_frag *fraginfo = par->matchinfo; diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index cbe8dec9..4d832717 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -42,7 +42,7 @@ MODULE_ALIAS("ip6t_dst"); */ static bool -hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +hbh_mt6(const struct sk_buff *skb, const struct xt_action_param *par) { struct ipv6_opt_hdr _optsh; const struct ipv6_opt_hdr *oh; @@ -160,7 +160,7 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) return false; } -static bool hbh_mt6_check(const struct xt_mtchk_param *par) +static int hbh_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_opts *optsinfo = par->matchinfo; diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 91490ad9..9d799916 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -27,7 +27,7 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match"); MODULE_AUTHOR("Andras Kis-Szabo "); static bool -ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +ipv6header_mt6(const struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_ipv6header_info *info = par->matchinfo; unsigned int temp; @@ -118,7 +118,7 @@ ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par) } } -static bool ipv6header_mt6_check(const struct xt_mtchk_param *par) +static int ipv6header_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_ipv6header_info *info = par->matchinfo; diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index aafe4e66..9365a2a1 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -11,6 +11,7 @@ * Based on net/netfilter/xt_tcpudp.c * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include @@ -24,12 +25,6 @@ MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match"); MODULE_LICENSE("GPL"); -#ifdef DEBUG_IP_FIREWALL_USER -#define duprintf(format, args...) printk(format , ## args) -#else -#define duprintf(format, args...) -#endif - /* Returns 1 if the type is matched by the range, 0 otherwise */ static inline bool type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) @@ -37,7 +32,8 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) return (type >= min && type <= max) ^ invert; } -static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +static bool mh_mt6(const struct sk_buff *skb, + const struct xt_action_param *par) { struct ip6_mh _mh; const struct ip6_mh *mh; @@ -51,13 +47,13 @@ static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) if (mh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ - duprintf("Dropping evil MH tinygram.\n"); + pr_debug("Dropping evil MH tinygram.\n"); *par->hotdrop = true; return false; } if (mh->ip6mh_proto != IPPROTO_NONE) { - duprintf("Dropping invalid MH Payload Proto: %u\n", + pr_debug("Dropping invalid MH Payload Proto: %u\n", mh->ip6mh_proto); *par->hotdrop = true; return false; @@ -67,7 +63,7 @@ static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) !!(mhinfo->invflags & IP6T_MH_INV_TYPE)); } -static bool mh_mt6_check(const struct xt_mtchk_param *par) +static int mh_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_mh *mhinfo = par->matchinfo; diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 356b8d6f..34b2e14d 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -36,7 +36,8 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) return r; } -static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +static bool rt_mt6(const struct sk_buff *skb, + const struct xt_action_param *par) { struct ipv6_rt_hdr _route; const struct ipv6_rt_hdr *rh; @@ -186,7 +187,7 @@ static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par) return false; } -static bool rt_mt6_check(const struct xt_mtchk_param *par) +static int rt_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_rt *rtinfo = par->matchinfo; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 6f4383ad..64ae7995 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -21,36 +21,6 @@ MODULE_DESCRIPTION("ip6tables filter table"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) -static struct -{ - struct ip6t_replace repl; - struct ip6t_standard entries[3]; - struct ip6t_error term; -} initial_table __net_initdata = { - .repl = { - .name = "filter", - .valid_hooks = FILTER_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), - .hook_entry = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ip6t_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 - }, - .underflow = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ip6t_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 - }, - }, - .entries = { - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IP6T_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -121,9 +91,18 @@ module_param(forward, bool, 0000); static int __net_init ip6table_filter_net_init(struct net *net) { - /* Register table */ + struct ip6t_replace *repl; + + repl = ip6t_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; + /* Entry 1 is the FORWARD hook */ + ((struct ip6t_standard *)repl->entries)[1].target.verdict = + -forward - 1; + net->ipv6.ip6table_filter = - ip6t_register_table(net, &packet_filter, &initial_table.repl); + ip6t_register_table(net, &packet_filter, repl); + kfree(repl); if (IS_ERR(net->ipv6.ip6table_filter)) return PTR_ERR(net->ipv6.ip6table_filter); return 0; @@ -131,7 +110,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) static void __net_exit ip6table_filter_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_filter); + ip6t_unregister_table(net, net->ipv6.ip6table_filter); } static struct pernet_operations ip6table_filter_net_ops = { @@ -148,9 +127,6 @@ static int __init ip6table_filter_init(void) return -EINVAL; } - /* Entry 1 is the FORWARD hook */ - initial_table.entries[1].target.verdict = -forward - 1; - ret = register_pernet_subsys(&ip6table_filter_net_ops); if (ret < 0) return ret; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 0ad91433..79cb7b0b 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -21,42 +21,6 @@ MODULE_DESCRIPTION("ip6tables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) -static const struct -{ - struct ip6t_replace repl; - struct ip6t_standard entries[5]; - struct ip6t_error term; -} initial_table __net_initdata = { - .repl = { - .name = "mangle", - .valid_hooks = MANGLE_VALID_HOOKS, - .num_entries = 6, - .size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard), - [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2, - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, - [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard), - [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2, - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, - [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, - }, - }, - .entries = { - IP6T_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ - }, - .term = IP6T_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, @@ -172,9 +136,14 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { static int __net_init ip6table_mangle_net_init(struct net *net) { - /* Register table */ + struct ip6t_replace *repl; + + repl = ip6t_alloc_initial_table(&packet_mangler); + if (repl == NULL) + return -ENOMEM; net->ipv6.ip6table_mangle = - ip6t_register_table(net, &packet_mangler, &initial_table.repl); + ip6t_register_table(net, &packet_mangler, repl); + kfree(repl); if (IS_ERR(net->ipv6.ip6table_mangle)) return PTR_ERR(net->ipv6.ip6table_mangle); return 0; @@ -182,7 +151,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net) static void __net_exit ip6table_mangle_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_mangle); + ip6t_unregister_table(net, net->ipv6.ip6table_mangle); } static struct pernet_operations ip6table_mangle_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index ed1a1180..98909515 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -8,33 +8,6 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static const struct -{ - struct ip6t_replace repl; - struct ip6t_standard entries[2]; - struct ip6t_error term; -} initial_table __net_initdata = { - .repl = { - .name = "raw", - .valid_hooks = RAW_VALID_HOOKS, - .num_entries = 3, - .size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) - }, - }, - .entries = { - IP6T_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IP6T_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -84,9 +57,14 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { static int __net_init ip6table_raw_net_init(struct net *net) { - /* Register table */ + struct ip6t_replace *repl; + + repl = ip6t_alloc_initial_table(&packet_raw); + if (repl == NULL) + return -ENOMEM; net->ipv6.ip6table_raw = - ip6t_register_table(net, &packet_raw, &initial_table.repl); + ip6t_register_table(net, &packet_raw, repl); + kfree(repl); if (IS_ERR(net->ipv6.ip6table_raw)) return PTR_ERR(net->ipv6.ip6table_raw); return 0; @@ -94,7 +72,7 @@ static int __net_init ip6table_raw_net_init(struct net *net) static void __net_exit ip6table_raw_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_raw); + ip6t_unregister_table(net, net->ipv6.ip6table_raw); } static struct pernet_operations ip6table_raw_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 41b444c6..b9b4ac1d 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -26,36 +26,6 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static const struct -{ - struct ip6t_replace repl; - struct ip6t_standard entries[3]; - struct ip6t_error term; -} initial_table __net_initdata = { - .repl = { - .name = "security", - .valid_hooks = SECURITY_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), - .hook_entry = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ip6t_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2, - }, - .underflow = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ip6t_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2, - }, - }, - .entries = { - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IP6T_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, @@ -123,9 +93,14 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { static int __net_init ip6table_security_net_init(struct net *net) { - net->ipv6.ip6table_security = - ip6t_register_table(net, &security_table, &initial_table.repl); + struct ip6t_replace *repl; + repl = ip6t_alloc_initial_table(&security_table); + if (repl == NULL) + return -ENOMEM; + net->ipv6.ip6table_security = + ip6t_register_table(net, &security_table, repl); + kfree(repl); if (IS_ERR(net->ipv6.ip6table_security)) return PTR_ERR(net->ipv6.ip6table_security); @@ -134,7 +109,7 @@ static int __net_init ip6table_security_net_init(struct net *net) static void __net_exit ip6table_security_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_security); + ip6t_unregister_table(net, net->ipv6.ip6table_security); } static struct pernet_operations ip6table_security_net_ops = { diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 0956ebab..3d8b7ddb 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -159,7 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); @@ -191,15 +192,20 @@ out: static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { + u16 zone = NF_CT_DEFAULT_ZONE; + + if (skb->nfct) + zone = nf_ct_zone((struct nf_conn *)skb->nfct); + #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) - return IP6_DEFRAG_CONNTRACK_BRIDGE_IN; + return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; #endif if (hooknum == NF_INET_PRE_ROUTING) - return IP6_DEFRAG_CONNTRACK_IN; + return IP6_DEFRAG_CONNTRACK_IN + zone; else - return IP6_DEFRAG_CONNTRACK_OUT; + return IP6_DEFRAG_CONNTRACK_OUT + zone; } @@ -212,7 +218,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff *reasm; /* Previously seen (loopback)? */ - if (skb->nfct) + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 642dcb12..6014f7b9 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -128,7 +129,7 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, } static int -icmpv6_error_message(struct net *net, +icmpv6_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int icmp6off, enum ip_conntrack_info *ctinfo, @@ -137,6 +138,7 @@ icmpv6_error_message(struct net *net, struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_l4proto *inproto; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; NF_CT_ASSERT(skb->nfct == NULL); @@ -163,7 +165,7 @@ icmpv6_error_message(struct net *net, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(net, &intuple); + h = nf_conntrack_find_get(net, zone, &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; @@ -179,7 +181,8 @@ icmpv6_error_message(struct net *net, } static int -icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, +icmpv6_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; @@ -205,7 +208,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { - skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); return NF_ACCEPT; @@ -215,7 +218,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 4b6a539c..e5590a9c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -63,6 +63,7 @@ struct nf_ct_frag6_queue struct inet_frag_queue q; __be32 id; /* fragment id */ + u32 user; struct in6_addr saddr; struct in6_addr daddr; @@ -473,7 +474,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ fp = skb_shinfo(head)->frag_list; - if (NFCT_FRAG6_CB(fp)->orig == NULL) + if (fp && NFCT_FRAG6_CB(fp)->orig == NULL) /* at above code, head skb is divided into two skbs. */ fp = fp->next; @@ -599,12 +600,6 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - if (!(fhdr->frag_off & htons(0xFFF9))) { - pr_debug("Invalid fragment offset\n"); - /* It is not a fragmented frame */ - goto ret_orig; - } - if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) nf_ct_frag6_evictor(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d6fe7646..e307517c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1561,14 +1561,13 @@ out: * i.e. Path MTU discovery */ -void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, - struct net_device *dev, u32 pmtu) +static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, + struct net *net, u32 pmtu, int ifindex) { struct rt6_info *rt, *nrt; - struct net *net = dev_net(dev); int allfrag = 0; - rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); + rt = rt6_lookup(net, daddr, saddr, ifindex, 0); if (rt == NULL) return; @@ -1636,6 +1635,27 @@ out: dst_release(&rt->u.dst); } +void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, + struct net_device *dev, u32 pmtu) +{ + struct net *net = dev_net(dev); + + /* + * RFC 1981 states that a node "MUST reduce the size of the packets it + * is sending along the path" that caused the Packet Too Big message. + * Since it's not possible in the general case to determine which + * interface was used to send the original packet, we update the MTU + * on the interface that will be used to send future packets. We also + * update the MTU on the interface that received the Packet Too Big in + * case the original packet was forced out that interface with + * SO_BINDTODEVICE or similar. This is the next best thing to the + * correct behaviour, which would be to update the MTU on all + * interfaces. + */ + rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); + rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); +} + /* * Misc support functions */ diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index dbd19a78..de2ffefc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1101,4 +1101,4 @@ static int __init sit_init(void) module_init(sit_init); module_exit(sit_cleanup); MODULE_LICENSE("GPL"); -MODULE_ALIAS("sit0"); +MODULE_ALIAS_NETDEV("sit0"); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 21d100b6..faae6dfd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -60,6 +60,7 @@ #include #include #include +#include #include diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index cf538ed5..ca520d4d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -304,8 +304,11 @@ csum_copy_err: } release_sock(sk); - if (flags & MSG_DONTWAIT) + if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 81a95c00..473f8793 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -344,13 +344,19 @@ static struct xfrm6_tunnel xfrm46_tunnel_handler = { static int __init xfrm6_tunnel_init(void) { - if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) + int rv; + + rv = xfrm_register_type(&xfrm6_tunnel_type, AF_INET6); + if (rv < 0) goto err; - if (xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6)) + rv = xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6); + if (rv < 0) goto unreg; - if (xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET)) + rv = xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET); + if (rv < 0) goto dereg6; - if (xfrm6_tunnel_spi_init() < 0) + rv = xfrm6_tunnel_spi_init(); + if (rv < 0) goto dereg46; return 0; @@ -361,7 +367,7 @@ dereg6: unreg: xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); err: - return -EAGAIN; + return rv; } static void __exit xfrm6_tunnel_fini(void) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 634d14af..cda47005 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -83,6 +83,19 @@ config NF_CONNTRACK_SECMARK If unsure, say 'N'. +config NF_CONNTRACK_ZONES + bool 'Connection tracking zones' + depends on NETFILTER_ADVANCED + depends on NETFILTER_XT_TARGET_CT + help + This option enables support for connection tracking zones. + Normally, each connection needs to have a unique system wide + identity. Connection tracking zones allow to have multiple + connections using the same identity, as long as they are + contained in different zones. + + If unsure, say `N'. + config NF_CONNTRACK_EVENTS bool "Connection tracking events" depends on NETFILTER_ADVANCED @@ -301,6 +314,35 @@ config NETFILTER_XTABLES if NETFILTER_XTABLES +comment "Xtables combined modules" + +config NETFILTER_XT_MARK + tristate 'nfmark target and match support' + default m if NETFILTER_ADVANCED=n + ---help--- + This option adds the "MARK" target and "mark" match. + + Netfilter mark matching allows you to match packets based on the + "nfmark" value in the packet. + The target allows you to create rules in the "mangle" table which alter + the netfilter mark (nfmark) field associated with the packet. + + Prior to routing, the nfmark can influence the routing method (see + "Use netfilter MARK value as routing key") and can also be used by + other subsystems to change their behavior. + +config NETFILTER_XT_CONNMARK + tristate 'ctmark target and match support' + depends on NF_CONNTRACK + depends on NETFILTER_ADVANCED + select NF_CONNTRACK_MARK + ---help--- + This option adds the "CONNMARK" target and "connmark" match. + + Netfilter allows you to store a mark value per connection (a.k.a. + ctmark), similarly to the packet mark (nfmark). Using this + target and match, you can set and match on this mark. + # alphabetically ordered list of targets config NETFILTER_XT_TARGET_CLASSIFY @@ -319,15 +361,11 @@ config NETFILTER_XT_TARGET_CONNMARK tristate '"CONNMARK" target support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED - select NF_CONNTRACK_MARK - help - This option adds a `CONNMARK' target, which allows one to manipulate - the connection mark value. Similar to the MARK target, but - affects the connection mark value rather than the packet mark value. - - If you want to compile it as a module, say M here and read - . The module will be called - ipt_CONNMARK. If unsure, say `N'. + select NETFILTER_XT_CONNMARK + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module). config NETFILTER_XT_TARGET_CONNSECMARK tristate '"CONNSECMARK" target support' @@ -341,6 +379,18 @@ config NETFILTER_XT_TARGET_CONNSECMARK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_CT + tristate '"CT" target support' + depends on NF_CONNTRACK + depends on IP_NF_RAW || IP6_NF_RAW + depends on NETFILTER_ADVANCED + help + This options adds a `CT' target, which allows to specify initial + connection tracking parameters like events to be delivered and + the helper to be used. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_DSCP tristate '"DSCP" and "TOS" target support' depends on IP_NF_MANGLE || IP6_NF_MANGLE @@ -398,16 +448,12 @@ config NETFILTER_XT_TARGET_LED config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' - default m if NETFILTER_ADVANCED=n - help - This option adds a `MARK' target, which allows you to create rules - in the `mangle' table which alter the netfilter mark (nfmark) field - associated with the packet prior to routing. This can change - the routing method (see `Use netfilter MARK value as routing - key') and can also be used by other subsystems to change their - behavior. - - To compile it as a module, choose M here. If unsure, say N. + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MARK + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). config NETFILTER_XT_TARGET_NFLOG tristate '"NFLOG" target support' @@ -454,6 +500,13 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_TEE + tristate '"TEE" - packet cloning to alternate destiantion' + depends on NETFILTER_ADVANCED + ---help--- + This option adds a "TEE" target with which a packet can be cloned and + this clone be rerouted to another nexthop. + config NETFILTER_XT_TARGET_TPROXY tristate '"TPROXY" target support (EXPERIMENTAL)' depends on EXPERIMENTAL @@ -461,6 +514,7 @@ config NETFILTER_XT_TARGET_TPROXY depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED select NF_DEFRAG_IPV4 + select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES help This option adds a `TPROXY' target, which is somewhat similar to REDIRECT. It can only be used in the mangle table and is useful @@ -577,14 +631,11 @@ config NETFILTER_XT_MATCH_CONNMARK tristate '"connmark" connection mark match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED - select NF_CONNTRACK_MARK - help - This option adds a `connmark' match, which allows you to match the - connection mark value previously set for the session by `CONNMARK'. - - If you want to compile it as a module, say M here and read - . The module will be called - ipt_connmark. If unsure, say `N'. + select NETFILTER_XT_CONNMARK + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module). config NETFILTER_XT_MATCH_CONNTRACK tristate '"conntrack" connection tracking match support' @@ -708,13 +759,12 @@ config NETFILTER_XT_MATCH_MAC config NETFILTER_XT_MATCH_MARK tristate '"mark" match support' - default m if NETFILTER_ADVANCED=n - help - Netfilter mark matching allows you to match packets based on the - `nfmark' value in the packet. This can be set by the MARK target - (see below). - - To compile it as a module, choose M here. If unsure, say N. + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MARK + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). config NETFILTER_XT_MATCH_MULTIPORT tristate '"multiport" Multiple port match support' @@ -734,6 +784,8 @@ config NETFILTER_XT_MATCH_OWNER based on who created the socket: the user or group. It is also possible to check whether a socket actually exists. + Conflicts with '"quota, tag, uid" match' + config NETFILTER_XT_MATCH_POLICY tristate 'IPsec "policy" match support' depends on XFRM @@ -767,6 +819,22 @@ config NETFILTER_XT_MATCH_PKTTYPE To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_QTAGUID + bool '"quota, tag, owner" match and stats support' + depends on NETFILTER_XT_MATCH_SOCKET + depends on NETFILTER_XT_MATCH_OWNER=n + help + This option replaces the `owner' match. In addition to matching + on uid, it keeps stats based on a tag assigned to a socket. + The full tag is comprised of a UID and an accounting tag. + The tags are assignable to sockets from user space (e.g. a download + manager can assign the socket to another UID for accounting). + Stats and control are done via /proc/net/xt_qtaguid/. + It replaces owner as it takes the same arguments, but should + really be recognized by the iptables tool. + + If unsure, say `N'. + config NETFILTER_XT_MATCH_QUOTA tristate '"quota" match support' depends on NETFILTER_ADVANCED @@ -777,6 +845,30 @@ config NETFILTER_XT_MATCH_QUOTA If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_QUOTA2 + tristate '"quota2" match support' + depends on NETFILTER_ADVANCED + help + This option adds a `quota2' match, which allows to match on a + byte counter correctly and not per CPU. + It allows naming the quotas. + This is based on http://xtables-addons.git.sourceforge.net + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + +config NETFILTER_XT_MATCH_QUOTA2_LOG + bool '"quota2" Netfilter LOG support' + depends on NETFILTER_XT_MATCH_QUOTA2 + depends on IP_NF_TARGET_ULOG=n # not yes, not module, just no + default n + help + This option allows `quota2' to log ONCE when a quota limit + is passed. It logs via NETLINK using the NETLINK_NFLOG family. + It logs similarly to how ipt_ULOG would without data. + + If unsure, say `N'. + config NETFILTER_XT_MATCH_RATEEST tristate '"rateest" match support' depends on NETFILTER_ADVANCED @@ -839,6 +931,7 @@ config NETFILTER_XT_MATCH_SOCKET depends on NETFILTER_ADVANCED depends on !NF_CONNTRACK || NF_CONNTRACK select NF_DEFRAG_IPV4 + select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES help This option adds a `socket' match, which can be used to match packets for which a TCP or UDP socket lookup finds a valid socket. diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 49f62ee4..8c1d5234 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -40,14 +40,17 @@ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o +# combos +obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o +obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o + # targets obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o -obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o -obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o @@ -56,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o # matches @@ -63,7 +67,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o -obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o @@ -75,14 +78,15 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o -obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o +obj-$(CONFIG_NETFILTER_XT_MATCH_QTAGUID) += xt_qtaguid_print.o xt_qtaguid.o obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o +obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA2) += xt_quota2.o obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 27c30cf9..95682e56 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -146,6 +146,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); + spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { list_add(&cp->c_list, &ip_vs_conn_tab[hash]); @@ -158,6 +159,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) ret = 0; } + spin_unlock(&cp->lock); ct_write_unlock(hash); return ret; @@ -177,6 +179,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); + spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { list_del(&cp->c_list); @@ -186,6 +189,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) } else ret = 0; + spin_unlock(&cp->lock); ct_write_unlock(hash); return ret; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1374179b..5543471b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -61,14 +62,14 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); -struct nf_conn nf_conntrack_untracked __read_mostly; +struct nf_conn nf_conntrack_untracked; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, - unsigned int size, unsigned int rnd) + u16 zone, unsigned int size, unsigned int rnd) { unsigned int n; u_int32_t h; @@ -79,16 +80,16 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, */ n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); h = jhash2((u32 *)tuple, n, - rnd ^ (((__force __u16)tuple->dst.u.all << 16) | - tuple->dst.protonum)); + zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) | + tuple->dst.protonum)); return ((u64)h * size) >> 32; } -static inline u_int32_t hash_conntrack(const struct net *net, +static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, net->ct.htable_size, + return __hash_conntrack(tuple, zone, net->ct.htable_size, nf_conntrack_hash_rnd); } @@ -292,11 +293,12 @@ static void death_by_timeout(unsigned long ul_conntrack) * - Caller must lock nf_conntrack_lock before calling this function */ struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) +__nf_conntrack_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(net, tuple); + unsigned int hash = hash_conntrack(net, zone, tuple); /* Disable BHs the entire time since we normally need to disable them * at least once for the stats anyway. @@ -304,7 +306,8 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) local_bh_disable(); begin: hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { - if (nf_ct_tuple_equal(tuple, &h->tuple)) { + if (nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { NF_CT_STAT_INC(net, found); local_bh_enable(); return h; @@ -326,21 +329,23 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_conntrack_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; rcu_read_lock(); begin: - h = __nf_conntrack_find(net, tuple); + h = __nf_conntrack_find(net, zone, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); if (unlikely(nf_ct_is_dying(ct) || !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { - if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) { + if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) || + nf_ct_zone(ct) != zone)) { nf_ct_put(ct); goto begin; } @@ -368,9 +373,11 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; + u16 zone; - hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + zone = nf_ct_zone(ct); + hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); __nf_conntrack_hash_insert(ct, hash, repl_hash); } @@ -387,6 +394,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; + u16 zone; ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(ct); @@ -398,8 +406,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; - hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + zone = nf_ct_zone(ct); + hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); /* We're not in hash table, and we refuse to set up related connections for unconfirmed conns. But packet copies and @@ -418,11 +427,13 @@ __nf_conntrack_confirm(struct sk_buff *skb) not in the hash. If there is, we lost race. */ hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &h->tuple)) + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &h->tuple)) + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; /* Remove from unconfirmed list */ @@ -469,15 +480,19 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(net, tuple); + struct nf_conn *ct; + u16 zone = nf_ct_zone(ignored_conntrack); + unsigned int hash = hash_conntrack(net, zone, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. */ rcu_read_lock_bh(); hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { - if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && - nf_ct_tuple_equal(tuple, &h->tuple)) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (ct != ignored_conntrack && + nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(ct) == zone) { NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; @@ -535,7 +550,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) return dropped; } -struct nf_conn *nf_conntrack_alloc(struct net *net, +struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) @@ -553,7 +568,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { - unsigned int hash = hash_conntrack(net, orig); + unsigned int hash = hash_conntrack(net, zone, orig); if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); if (net_ratelimit()) @@ -590,13 +605,28 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, #ifdef CONFIG_NET_NS ct->ct_net = net; #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (zone) { + struct nf_conntrack_zone *nf_ct_zone; + nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC); + if (!nf_ct_zone) + goto out_free; + nf_ct_zone->id = zone; + } +#endif /* * changes to lookup keys must be done before setting refcnt to 1 */ smp_wmb(); atomic_set(&ct->ct_general.use, 1); return ct; + +#ifdef CONFIG_NF_CONNTRACK_ZONES +out_free: + kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + return ERR_PTR(-ENOMEM); +#endif } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); @@ -614,7 +644,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * -init_conntrack(struct net *net, +init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, @@ -624,14 +654,16 @@ init_conntrack(struct net *net, struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; + struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { pr_debug("Can't invert tuple.\n"); return NULL; } - ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC); if (IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; @@ -644,10 +676,14 @@ init_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, GFP_ATOMIC); + + ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; + nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, + ecache ? ecache->expmask : 0, + GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); - exp = nf_ct_find_expectation(net, tuple); + exp = nf_ct_find_expectation(net, zone, tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", ct, exp); @@ -669,7 +705,7 @@ init_conntrack(struct net *net, nf_conntrack_get(&ct->master->ct_general); NF_CT_STAT_INC(net, expect_new); } else { - __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); NF_CT_STAT_INC(net, new); } @@ -690,7 +726,7 @@ init_conntrack(struct net *net, /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ static inline struct nf_conn * -resolve_normal_ct(struct net *net, +resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, @@ -703,6 +739,7 @@ resolve_normal_ct(struct net *net, struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, &tuple, l3proto, @@ -712,9 +749,10 @@ resolve_normal_ct(struct net *net, } /* look for tuple match */ - h = nf_conntrack_find_get(net, &tuple); + h = nf_conntrack_find_get(net, zone, &tuple); if (!h) { - h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); + h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, + skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -724,7 +762,7 @@ resolve_normal_ct(struct net *net, /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { - *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + *ctinfo = IP_CT_ESTABLISHED_REPLY; /* Please set reply bit if this packet OK */ *set_reply = 1; } else { @@ -751,7 +789,7 @@ unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { - struct nf_conn *ct; + struct nf_conn *ct, *tmpl = NULL; enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -760,10 +798,14 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, int set_reply = 0; int ret; - /* Previously seen (loopback or untracked)? Ignore. */ if (skb->nfct) { - NF_CT_STAT_INC_ATOMIC(net, ignore); - return NF_ACCEPT; + /* Previously seen (loopback or untracked)? Ignore. */ + tmpl = (struct nf_conn *)skb->nfct; + if (!nf_ct_is_template(tmpl)) { + NF_CT_STAT_INC_ATOMIC(net, ignore); + return NF_ACCEPT; + } + skb->nfct = NULL; } /* rcu_read_lock()ed by nf_hook_slow */ @@ -774,7 +816,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, pr_debug("not prepared to track yet or error occured\n"); NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); - return -ret; + ret = -ret; + goto out; } l4proto = __nf_ct_l4proto_find(pf, protonum); @@ -783,26 +826,30 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL) { - ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); + ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo, + pf, hooknum); if (ret <= 0) { NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); - return -ret; + ret = -ret; + goto out; } } - ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, + ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(net, invalid); - return NF_ACCEPT; + ret = NF_ACCEPT; + goto out; } if (IS_ERR(ct)) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(net, drop); - return NF_DROP; + ret = NF_DROP; + goto out; } NF_CT_ASSERT(skb->nfct); @@ -817,11 +864,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) NF_CT_STAT_INC_ATOMIC(net, drop); - return -ret; + ret = -ret; + goto out; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_REPLY, ct); +out: + if (tmpl) + nf_ct_put(tmpl); return ret; } @@ -860,7 +911,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, return; rcu_read_lock(); - __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); @@ -934,6 +985,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); +#ifdef CONFIG_NF_CONNTRACK_ZONES +static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { + .len = sizeof(struct nf_conntrack_zone), + .align = __alignof__(struct nf_conntrack_zone), + .id = NF_CT_EXT_ZONE, +}; +#endif + #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include @@ -990,7 +1049,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* This ICMP is in reverse direction to the packet which caused it */ ct = nf_ct_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) - ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; + ctinfo = IP_CT_RELATED_REPLY; else ctinfo = IP_CT_RELATED; @@ -1115,6 +1174,9 @@ static void nf_conntrack_cleanup_init_net(void) nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); +#ifdef CONFIG_NF_CONNTRACK_ZONES + nf_ct_extend_unregister(&nf_ct_zone_extend); +#endif } static void nf_conntrack_cleanup_net(struct net *net) @@ -1173,7 +1235,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls) if (!hash) { *vmalloced = 1; printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); - hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); + hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL); } if (hash && nulls) @@ -1190,6 +1253,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) unsigned int hashsize, old_size; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; if (current->nsproxy->net_ns != &init_net) return -EOPNOTSUPP; @@ -1216,8 +1280,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) while (!hlist_nulls_empty(&init_net.ct.hash[i])) { h = hlist_nulls_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnnode); + ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, hashsize, + bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), + hashsize, nf_conntrack_hash_rnd); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } @@ -1239,6 +1305,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); +void nf_ct_untracked_status_or(unsigned long bits) +{ + nf_conntrack_untracked.status |= bits; +} +EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); + static int nf_conntrack_init_init_net(void) { int max_factor = 8; @@ -1275,16 +1347,24 @@ static int nf_conntrack_init_init_net(void) if (ret < 0) goto err_helper; +#ifdef CONFIG_NF_CONNTRACK_ZONES + ret = nf_ct_extend_register(&nf_ct_zone_extend); + if (ret < 0) + goto err_extend; +#endif /* Set up fake conntrack: to never be deleted, not in any hashes */ #ifdef CONFIG_NET_NS nf_conntrack_untracked.ct_net = &init_net; #endif atomic_set(&nf_conntrack_untracked.ct_general.use, 1); /* - and look it like as a confirmed connection */ - set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); - + nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); return 0; +#ifdef CONFIG_NF_CONNTRACK_ZONES +err_extend: + nf_conntrack_helper_fini(); +#endif err_helper: nf_conntrack_proto_fini(); err_proto: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e73eb04f..ccb0e4df 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -27,6 +27,7 @@ #include #include #include +#include unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); @@ -84,7 +85,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; struct hlist_node *n; @@ -95,7 +97,8 @@ __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { - if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) + if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && + nf_ct_zone(i->master) == zone) return i; } return NULL; @@ -104,12 +107,13 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; rcu_read_lock(); - i = __nf_ct_expect_find(net, tuple); + i = __nf_ct_expect_find(net, zone, tuple); if (i && !atomic_inc_not_zero(&i->use)) i = NULL; rcu_read_unlock(); @@ -121,7 +125,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; struct hlist_node *n; @@ -133,7 +138,8 @@ nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && - nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { + nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && + nf_ct_zone(i->master) == zone) { exp = i; break; } @@ -202,9 +208,10 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, static inline int expect_matches(const struct nf_conntrack_expect *a, const struct nf_conntrack_expect *b) { - return a->master == b->master && a->class == b->class - && nf_ct_tuple_equal(&a->tuple, &b->tuple) - && nf_ct_tuple_mask_equal(&a->mask, &b->mask); + return a->master == b->master && a->class == b->class && + nf_ct_tuple_equal(&a->tuple, &b->tuple) && + nf_ct_tuple_mask_equal(&a->mask, &b->mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } /* Generally a bad idea to call this: could have matched already. */ diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 7dfd469c..a83aff46 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -243,8 +243,8 @@ static int try_epsv_response(const char *data, size_t dlen, /* Three delimiters. */ if (dlen <= 3) return 0; delim = data[0]; - if (isdigit(delim) || delim < 33 || delim > 126 - || data[1] != delim || data[2] != delim) + if (isdigit(delim) || delim < 33 || delim > 126 || + data[1] != delim || data[2] != delim) return 0; return get_port(data, 3, dlen, delim, &cmd->u.tcp.port); @@ -366,8 +366,8 @@ static int help(struct sk_buff *skb, typeof(nf_nat_ftp_hook) nf_nat_ftp; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED - && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED_REPLY) { pr_debug("ftp: Conntrackinfo = %u\n", ctinfo); return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 66369490..2a38ac89 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -29,6 +29,7 @@ #include #include #include +#include #include /* Parameters */ @@ -570,10 +571,9 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_h245: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); @@ -1116,10 +1116,9 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_q931: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); @@ -1216,7 +1215,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; - exp = __nf_ct_expect_find(net, &tuple); + exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (exp && exp->master == ct) return exp; return NULL; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4b1a56bd..4509fa67 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -65,7 +65,7 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) } struct nf_conntrack_helper * -__nf_conntrack_helper_find_byname(const char *name) +__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; struct hlist_node *n; @@ -73,13 +73,34 @@ __nf_conntrack_helper_find_byname(const char *name) for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) { - if (!strcmp(h->name, name)) + if (!strcmp(h->name, name) && + h->tuple.src.l3num == l3num && + h->tuple.dst.protonum == protonum) return h; } } return NULL; } -EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname); +EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); + +struct nf_conntrack_helper * +nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) +{ + struct nf_conntrack_helper *h; + + h = __nf_conntrack_helper_find(name, l3num, protonum); +#ifdef CONFIG_MODULES + if (h == NULL) { + if (request_module("nfct-helper-%s", name) == 0) + h = __nf_conntrack_helper_find(name, l3num, protonum); + } +#endif + if (h != NULL && !try_module_get(h->me)) + h = NULL; + + return h; +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { @@ -94,13 +115,22 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); -int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags) +int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags) { + struct nf_conntrack_helper *helper = NULL; + struct nf_conn_help *help; int ret = 0; - struct nf_conntrack_helper *helper; - struct nf_conn_help *help = nfct_help(ct); - helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + if (tmpl != NULL) { + help = nfct_help(tmpl); + if (help != NULL) + helper = help->helper; + } + + help = nfct_help(ct); + if (helper == NULL) + helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); if (helper == NULL) { if (help) rcu_assign_pointer(help->helper, NULL); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 8bd98c84..6d8afa62 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -124,8 +124,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, return NF_ACCEPT; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d5217182..ac72b6db 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -456,6 +457,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) static int ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { + struct net *net; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; @@ -466,7 +468,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) int err; /* ignore our fake conntrack entry */ - if (ct == &nf_conntrack_untracked) + if (nf_ct_is_untracked(ct)) return 0; if (events & (1 << IPCT_DESTROY)) { @@ -482,7 +484,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) } else return 0; - if (!item->report && !nfnetlink_has_listeners(group)) + net = nf_ct_net(ct); + if (!item->report && !nfnetlink_has_listeners(net, group)) return 0; skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC); @@ -559,7 +562,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); + err = nfnetlink_send(skb, net, item->pid, group, item->report, + GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; @@ -571,7 +575,7 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(0, group, -ENOBUFS); + nfnetlink_set_err(net, 0, group, -ENOBUFS); return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -586,6 +590,7 @@ static int ctnetlink_done(struct netlink_callback *cb) static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; @@ -596,7 +601,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) { restart: - hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], + hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[cb->args[0]], hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; @@ -768,6 +773,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; @@ -781,7 +787,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush_report(&init_net, + nf_conntrack_flush_report(net, NETLINK_CB(skb).pid, nlmsg_report(nlh)); return 0; @@ -790,7 +796,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&init_net, &tuple); + h = nf_conntrack_find_get(net, 0, &tuple); if (!h) return -ENOENT; @@ -828,6 +834,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; @@ -850,7 +857,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&init_net, &tuple); + h = nf_conntrack_find_get(net, 0, &tuple); if (!h) return -ENOENT; @@ -994,7 +1001,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) return 0; } - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper == NULL) { #ifdef CONFIG_MODULES spin_unlock_bh(&nf_conntrack_lock); @@ -1005,7 +1013,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) } spin_lock_bh(&nf_conntrack_lock); - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper) return -EAGAIN; #endif @@ -1175,7 +1184,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct, } static struct nf_conn * -ctnetlink_create_conntrack(const struct nlattr * const cda[], +ctnetlink_create_conntrack(struct net *net, + const struct nlattr * const cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, u8 u3) @@ -1184,7 +1194,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], int err = -EINVAL; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, 0, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) return ERR_PTR(-ENOMEM); @@ -1203,7 +1213,8 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], if (err < 0) goto err2; - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); #ifdef CONFIG_MODULES @@ -1213,7 +1224,9 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], } rcu_read_lock(); - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, + nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper) { err = -EAGAIN; goto err2; @@ -1236,7 +1249,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], } } else { /* try an implicit helper assignation */ - err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); if (err < 0) goto err2; } @@ -1268,7 +1281,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, GFP_ATOMIC); + nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) @@ -1285,7 +1298,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], if (err < 0) goto err2; - master_h = nf_conntrack_find_get(&init_net, &master); + master_h = nf_conntrack_find_get(net, 0, &master); if (master_h == NULL) { err = -ENOENT; goto err2; @@ -1313,6 +1326,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -1333,9 +1347,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(&init_net, &otuple); + h = __nf_conntrack_find(net, 0, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(&init_net, &rtuple); + h = __nf_conntrack_find(net, 0, &rtuple); if (h == NULL) { err = -ENOENT; @@ -1343,7 +1357,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conn *ct; enum ip_conntrack_events events; - ct = ctnetlink_create_conntrack(cda, &otuple, + ct = ctnetlink_create_conntrack(net, cda, &otuple, &rtuple, u3); if (IS_ERR(ct)) { err = PTR_ERR(ct); @@ -1357,7 +1371,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; - nf_conntrack_eventmask_report((1 << IPCT_STATUS) | + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | + (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | @@ -1382,7 +1397,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err == 0) { nf_conntrack_get(&ct->ct_general); spin_unlock_bh(&nf_conntrack_lock); - nf_conntrack_eventmask_report((1 << IPCT_STATUS) | + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | + (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | @@ -1525,9 +1541,10 @@ nla_put_failure: static int ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) { + struct nf_conntrack_expect *exp = item->exp; + struct net *net = nf_ct_exp_net(exp); struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nf_conntrack_expect *exp = item->exp; struct sk_buff *skb; unsigned int type; int flags = 0; @@ -1539,7 +1556,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) return 0; if (!item->report && - !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + !nfnetlink_has_listeners(net, NFNLGRP_CONNTRACK_EXP_NEW)) return 0; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); @@ -1562,7 +1579,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, + nfnetlink_send(skb, net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report, GFP_ATOMIC); return 0; @@ -1572,7 +1589,7 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(0, 0, -ENOBUFS); + nfnetlink_set_err(net, 0, 0, -ENOBUFS); return 0; } #endif @@ -1586,7 +1603,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb) static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = &init_net; + struct net *net = sock_net(skb->sk); struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct hlist_node *n; @@ -1639,6 +1656,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct sk_buff *skb2; @@ -1660,7 +1678,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_ct_expect_find_get(&init_net, &tuple); + exp = nf_ct_expect_find_get(net, 0, &tuple); if (!exp) return -ENOENT; @@ -1700,9 +1718,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; - struct nf_conntrack_helper *h; struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct hlist_node *n, *next; u_int8_t u3 = nfmsg->nfgen_family; @@ -1716,7 +1734,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(&init_net, &tuple); + exp = nf_ct_expect_find_get(net, 0, &tuple); if (!exp) return -ENOENT; @@ -1739,18 +1757,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* delete all expectations for this helper */ spin_lock_bh(&nf_conntrack_lock); - h = __nf_conntrack_helper_find_byname(name); - if (!h) { - spin_unlock_bh(&nf_conntrack_lock); - return -EOPNOTSUPP; - } for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &init_net.ct.expect_hash[i], + &net->ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); - if (m_help->helper == h - && del_timer(&exp->timeout)) { + if (!strcmp(m_help->helper->name, name) && + del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } @@ -1762,7 +1775,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &init_net.ct.expect_hash[i], + &net->ct.expect_hash[i], hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); @@ -1783,7 +1796,8 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, } static int -ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3, +ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[], + u_int8_t u3, u32 pid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; @@ -1805,7 +1819,7 @@ ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3, return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(&init_net, &master_tuple); + h = nf_conntrack_find_get(net, 0, &master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); @@ -1845,6 +1859,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -1861,13 +1876,13 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, return err; spin_lock_bh(&nf_conntrack_lock); - exp = __nf_ct_expect_find(&init_net, &tuple); + exp = __nf_ct_expect_find(net, 0, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { - err = ctnetlink_create_expect(cda, + err = ctnetlink_create_expect(net, cda, u3, NETLINK_CB(skb).pid, nlmsg_report(nlh)); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 3807ac7f..2fd45651 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -123,7 +124,7 @@ static void pptp_expectfn(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); - exp_other = nf_ct_expect_find_get(net, &inv_t); + exp_other = nf_ct_expect_find_get(net, nf_ct_zone(ct), &inv_t); if (exp_other) { /* delete other expectation. */ pr_debug("found\n"); @@ -136,17 +137,18 @@ static void pptp_expectfn(struct nf_conn *ct, rcu_read_unlock(); } -static int destroy_sibling_or_exp(struct net *net, +static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, const struct nf_conntrack_tuple *t) { const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; struct nf_conn *sibling; + u16 zone = nf_ct_zone(ct); pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); - h = nf_conntrack_find_get(net, t); + h = nf_conntrack_find_get(net, zone, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); pr_debug("setting timeout of conntrack %p to 0\n", sibling); @@ -157,7 +159,7 @@ static int destroy_sibling_or_exp(struct net *net, nf_ct_put(sibling); return 1; } else { - exp = nf_ct_expect_find_get(net, t); + exp = nf_ct_expect_find_get(net, zone, t); if (exp) { pr_debug("unexpect_related of expect %p\n", exp); nf_ct_unexpect_related(exp); @@ -182,7 +184,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; - if (!destroy_sibling_or_exp(net, &t)) + if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ @@ -190,7 +192,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; - if (!destroy_sibling_or_exp(net, &t)) + if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout reply pac->pns ct/exp\n"); } @@ -517,8 +519,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, u_int16_t msg; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; nexthdr_off = protoff; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 1b816a2e..ab546d2e 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -561,8 +561,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } -static int dccp_error(struct net *net, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, +static int dccp_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index a54a0af0..2ed4b60b 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -241,7 +241,7 @@ static int gre_packet(struct nf_conn *ct, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c10e6f36..26f0456a 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -377,7 +377,7 @@ static int sctp_packet(struct nf_conn *ct, new_state == SCTP_CONNTRACK_ESTABLISHED) { pr_debug("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ba2b7693..c32f99ed 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -760,7 +760,7 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ -static int tcp_error(struct net *net, +static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, @@ -1014,7 +1014,7 @@ static int tcp_packet(struct nf_conn *ct, after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 70809d11..0e1450ee 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -77,7 +77,7 @@ static int udp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout); @@ -91,8 +91,8 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, +static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 0badedc5..560e171a 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct, nf_ct_udplite_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout); @@ -89,7 +89,7 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udplite_error(struct net *net, +static int udplite_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index dcfecbb8..e7922842 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -77,7 +77,7 @@ static int help(struct sk_buff *skb, ct_sane_info = &nfct_help(ct)->help.ct_sane_info; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 4b572163..3b5efc90 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -22,6 +22,7 @@ #include #include #include +#include #include MODULE_LICENSE("GPL"); @@ -777,7 +778,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, rcu_read_lock(); do { - exp = __nf_ct_expect_find(net, &tuple); + exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (!exp || exp->master == ct || nfct_help(exp->master)->helper != nfct_help(ct)->helper || diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 1a84bf69..352940b6 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -26,6 +26,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); @@ -171,6 +172,11 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) + goto release; +#endif + if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) goto release; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index d65d3481..e54392ee 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -83,6 +83,8 @@ EXPORT_SYMBOL(nf_log_unregister); int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) { + if (pf >= ARRAY_SIZE(nf_loggers)) + return -EINVAL; mutex_lock(&nf_log_mutex); if (__find_logger(pf, logger->name) == NULL) { mutex_unlock(&nf_log_mutex); @@ -96,6 +98,8 @@ EXPORT_SYMBOL(nf_log_bind_pf); void nf_log_unbind_pf(u_int8_t pf) { + if (pf >= ARRAY_SIZE(nf_loggers)) + return; mutex_lock(&nf_log_mutex); rcu_assign_pointer(nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c index 5490fc37..474d621c 100644 --- a/net/netfilter/nf_tproxy_core.c +++ b/net/netfilter/nf_tproxy_core.c @@ -18,41 +18,6 @@ #include #include -struct sock * -nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, - const __be32 saddr, const __be32 daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in, bool listening_only) -{ - struct sock *sk; - - /* look up socket */ - switch (protocol) { - case IPPROTO_TCP: - if (listening_only) - sk = __inet_lookup_listener(net, &tcp_hashinfo, - daddr, ntohs(dport), - in->ifindex); - else - sk = __inet_lookup(net, &tcp_hashinfo, - saddr, sport, daddr, dport, - in->ifindex); - break; - case IPPROTO_UDP: - sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - break; - default: - WARN_ON(1); - sk = NULL; - } - - pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n", - protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk); - - return sk; -} -EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4); static void nf_tproxy_destructor(struct sk_buff *skb) @@ -63,22 +28,23 @@ nf_tproxy_destructor(struct sk_buff *skb) skb->destructor = NULL; if (sk) - nf_tproxy_put_sock(sk); + sock_put(sk); } /* consumes sk */ -int +void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { - if (inet_sk(sk)->transparent) { - skb_orphan(skb); - skb->sk = sk; - skb->destructor = nf_tproxy_destructor; - return 1; - } else - nf_tproxy_put_sock(sk); + /* assigning tw sockets complicates things; most + * skb->sk->X checks would have to test sk->sk_state first */ + if (sk->sk_state == TCP_TIME_WAIT) { + inet_twsk_put(inet_twsk(sk)); + return; + } - return 0; + skb_orphan(skb); + skb->sk = sk; + skb->destructor = nf_tproxy_destructor; } EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index eedc0c1a..8eb0cc23 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -40,7 +40,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; -static struct sock *nfnl = NULL; static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; static DEFINE_MUTEX(nfnl_mutex); @@ -101,34 +100,35 @@ nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss) return &ss->cb[cb_id]; } -int nfnetlink_has_listeners(unsigned int group) +int nfnetlink_has_listeners(struct net *net, unsigned int group) { - return netlink_has_listeners(nfnl, group); + return netlink_has_listeners(net->nfnl, group); } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, u32 pid, +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags) { - return nlmsg_notify(nfnl, skb, pid, group, echo, flags); + return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); -void nfnetlink_set_err(u32 pid, u32 group, int error) +void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) { - netlink_set_err(nfnl, pid, group, error); + netlink_set_err(net->nfnl, pid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) { - return netlink_unicast(nfnl, skb, pid, flags); + return netlink_unicast(net->nfnl, skb, pid, flags); } EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); const struct nfnl_callback *nc; const struct nfnetlink_subsystem *ss; int type, err; @@ -170,7 +170,7 @@ replay: if (err < 0) return err; - err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda); + err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda); if (err == -EAGAIN) goto replay; return err; @@ -184,26 +184,45 @@ static void nfnetlink_rcv(struct sk_buff *skb) nfnl_unlock(); } -static void __exit nfnetlink_exit(void) +static int __net_init nfnetlink_net_init(struct net *net) { - printk("Removing netfilter NETLINK layer.\n"); - netlink_kernel_release(nfnl); - return; + struct sock *nfnl; + + nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, NFNLGRP_MAX, + nfnetlink_rcv, NULL, THIS_MODULE); + if (!nfnl) + return -ENOMEM; + net->nfnl_stash = nfnl; + rcu_assign_pointer(net->nfnl, nfnl); + return 0; } +static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) +{ + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) + rcu_assign_pointer(net->nfnl, NULL); + synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) + netlink_kernel_release(net->nfnl_stash); +} + +static struct pernet_operations nfnetlink_net_ops = { + .init = nfnetlink_net_init, + .exit_batch = nfnetlink_net_exit_batch, +}; + static int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - - nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, - nfnetlink_rcv, NULL, THIS_MODULE); - if (!nfnl) { - printk(KERN_ERR "cannot initialize nfnetlink!\n"); - return -ENOMEM; - } - - return 0; + return register_pernet_subsys(&nfnetlink_net_ops); } +static void __exit nfnetlink_exit(void) +{ + printk("Removing netfilter NETLINK layer.\n"); + unregister_pernet_subsys(&nfnetlink_net_ops); +} module_init(nfnetlink_init); module_exit(nfnetlink_exit); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f900dc31..949ce348 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -323,7 +323,8 @@ __nfulnl_send(struct nfulnl_instance *inst) NLMSG_DONE, sizeof(struct nfgenmsg)); - status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); + status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid, + MSG_DONTWAIT); inst->qlen = 0; inst->skb = NULL; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7a9dec9f..a6a0c782 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -420,7 +420,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index f01955cc..7d7e4339 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -12,7 +12,7 @@ * published by the Free Software Foundation. * */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include @@ -26,7 +26,9 @@ #include #include - +#include +#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); @@ -52,12 +54,6 @@ struct xt_af { static struct xt_af *xt; -#ifdef DEBUG_IP_FIREWALL_USER -#define duprintf(format, args...) printk(format , ## args) -#else -#define duprintf(format, args...) -#endif - static const char *const xt_prefix[NFPROTO_NUMPROTO] = { [NFPROTO_UNSPEC] = "x", [NFPROTO_IPV4] = "ip", @@ -66,6 +62,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = { [NFPROTO_IPV6] = "ip6", }; +/* Allow this many total (re)entries. */ +static const unsigned int xt_jumpstack_multiplier = 2; + /* Registration hooks for targets. */ int xt_register_target(struct xt_target *target) @@ -218,6 +217,17 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) } EXPORT_SYMBOL(xt_find_match); +struct xt_match * +xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision) +{ + struct xt_match *match; + + match = try_then_request_module(xt_find_match(nfproto, name, revision), + "%st_%s", xt_prefix[nfproto], name); + return (match != NULL) ? match : ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL_GPL(xt_request_find_match); + /* Find target, grabs ref. Returns ERR_PTR() on error. */ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { @@ -254,9 +264,7 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) target = try_then_request_module(xt_find_target(af, name, revision), "%st_%s", xt_prefix[af], name); - if (IS_ERR(target) || !target) - return NULL; - return target; + return (target != NULL) ? target : ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(xt_request_find_target); @@ -655,6 +663,26 @@ void xt_free_table_info(struct xt_table_info *info) else vfree(info->entries[cpu]); } + + if (info->jumpstack != NULL) { + if (sizeof(void *) * info->stacksize > PAGE_SIZE) { + for_each_possible_cpu(cpu) + vfree(info->jumpstack[cpu]); + } else { + for_each_possible_cpu(cpu) + kfree(info->jumpstack[cpu]); + } + } + + if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE) + vfree(info->jumpstack); + else + kfree(info->jumpstack); + if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE) + vfree(info->stackptr); + else + kfree(info->stackptr); + kfree(info); } EXPORT_SYMBOL(xt_free_table_info); @@ -699,6 +727,49 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock); DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); +static int xt_jumpstack_alloc(struct xt_table_info *i) +{ + unsigned int size; + int cpu; + + size = sizeof(unsigned int) * nr_cpu_ids; + if (size > PAGE_SIZE) + i->stackptr = vmalloc(size); + else + i->stackptr = kmalloc(size, GFP_KERNEL); + if (i->stackptr == NULL) + return -ENOMEM; + memset(i->stackptr, 0, size); + + size = sizeof(void **) * nr_cpu_ids; + if (size > PAGE_SIZE) + i->jumpstack = vmalloc(size); + else + i->jumpstack = kmalloc(size, GFP_KERNEL); + if (i->jumpstack == NULL) + return -ENOMEM; + memset(i->jumpstack, 0, size); + + i->stacksize *= xt_jumpstack_multiplier; + size = sizeof(void *) * i->stacksize; + for_each_possible_cpu(cpu) { + if (size > PAGE_SIZE) + i->jumpstack[cpu] = vmalloc_node(size, + cpu_to_node(cpu)); + else + i->jumpstack[cpu] = kmalloc_node(size, + GFP_KERNEL, cpu_to_node(cpu)); + if (i->jumpstack[cpu] == NULL) + /* + * Freeing will be done later on by the callers. The + * chain is: xt_replace_table -> __do_replace -> + * do_replace -> xt_free_table_info. + */ + return -ENOMEM; + } + + return 0; +} struct xt_table_info * xt_replace_table(struct xt_table *table, @@ -707,6 +778,7 @@ xt_replace_table(struct xt_table *table, int *error) { struct xt_table_info *private; + int ret; /* Do the substitution. */ local_bh_disable(); @@ -714,13 +786,19 @@ xt_replace_table(struct xt_table *table, /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { - duprintf("num_counters != table->private->number (%u/%u)\n", + pr_debug("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); local_bh_enable(); *error = -EAGAIN; return NULL; } + ret = xt_jumpstack_alloc(newinfo); + if (ret < 0) { + *error = ret; + return NULL; + } + table->private = newinfo; newinfo->initial_entries = private->initial_entries; @@ -745,6 +823,10 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table_info *private; struct xt_table *t, *table; + ret = xt_jumpstack_alloc(newinfo); + if (ret < 0) + return ERR_PTR(ret); + /* Don't add one object to multiple lists. */ table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); if (!table) { @@ -771,7 +853,7 @@ struct xt_table *xt_register_table(struct net *net, goto unlock; private = table->private; - duprintf("table->private->number = %u\n", private->number); + pr_debug("table->private->number = %u\n", private->number); /* save number of initial entries */ private->initial_entries = private->number; diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 011bc80d..c2c0e4ab 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -27,7 +27,7 @@ MODULE_ALIAS("ipt_CLASSIFY"); MODULE_ALIAS("ip6t_CLASSIFY"); static unsigned int -classify_tg(struct sk_buff *skb, const struct xt_target_param *par) +classify_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_classify_target_info *clinfo = par->targinfo; diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c deleted file mode 100644 index 59345706..00000000 --- a/net/netfilter/xt_CONNMARK.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * xt_CONNMARK - Netfilter module to modify the connection mark values - * - * Copyright (C) 2002,2004 MARA Systems AB - * by Henrik Nordstrom - * Copyright © CC Computer Consultants GmbH, 2007 - 2008 - * Jan Engelhardt - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include -#include -#include -#include - -MODULE_AUTHOR("Henrik Nordstrom "); -MODULE_DESCRIPTION("Xtables: connection mark modification"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_CONNMARK"); -MODULE_ALIAS("ip6t_CONNMARK"); - -#include -#include -#include - -static unsigned int -connmark_tg(struct sk_buff *skb, const struct xt_target_param *par) -{ - const struct xt_connmark_tginfo1 *info = par->targinfo; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - u_int32_t newmark; - - ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) - return XT_CONTINUE; - - switch (info->mode) { - case XT_CONNMARK_SET: - newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; - if (ct->mark != newmark) { - ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, ct); - } - break; - case XT_CONNMARK_SAVE: - newmark = (ct->mark & ~info->ctmask) ^ - (skb->mark & info->nfmask); - if (ct->mark != newmark) { - ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, ct); - } - break; - case XT_CONNMARK_RESTORE: - newmark = (skb->mark & ~info->nfmask) ^ - (ct->mark & info->ctmask); - skb->mark = newmark; - break; - } - - return XT_CONTINUE; -} - -static bool connmark_tg_check(const struct xt_tgchk_param *par) -{ - if (nf_ct_l3proto_try_module_get(par->family) < 0) { - printk(KERN_WARNING "cannot load conntrack support for " - "proto=%u\n", par->family); - return false; - } - return true; -} - -static void connmark_tg_destroy(const struct xt_tgdtor_param *par) -{ - nf_ct_l3proto_module_put(par->family); -} - -static struct xt_target connmark_tg_reg __read_mostly = { - .name = "CONNMARK", - .revision = 1, - .family = NFPROTO_UNSPEC, - .checkentry = connmark_tg_check, - .target = connmark_tg, - .targetsize = sizeof(struct xt_connmark_tginfo1), - .destroy = connmark_tg_destroy, - .me = THIS_MODULE, -}; - -static int __init connmark_tg_init(void) -{ - return xt_register_target(&connmark_tg_reg); -} - -static void __exit connmark_tg_exit(void) -{ - xt_unregister_target(&connmark_tg_reg); -} - -module_init(connmark_tg_init); -module_exit(connmark_tg_exit); diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index b54c3756..e28dade5 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -65,7 +65,7 @@ static void secmark_restore(struct sk_buff *skb) } static unsigned int -connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par) +connsecmark_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connsecmark_target_info *info = par->targinfo; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c new file mode 100644 index 00000000..82afff5c --- /dev/null +++ b/net/netfilter/xt_CT.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2010 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int xt_ct_target(struct sk_buff *skb, + const struct xt_action_param *par) +{ + const struct xt_ct_target_info *info = par->targinfo; + struct nf_conn *ct = info->ct; + + /* Previously seen (loopback)? Ignore. */ + if (skb->nfct != NULL) + return XT_CONTINUE; + + atomic_inc(&ct->ct_general.use); + skb->nfct = &ct->ct_general; + skb->nfctinfo = IP_CT_NEW; + + return XT_CONTINUE; +} + +static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) +{ + if (par->family == AF_INET) { + const struct ipt_entry *e = par->entryinfo; + + if (e->ip.invflags & IPT_INV_PROTO) + return 0; + return e->ip.proto; + } else if (par->family == AF_INET6) { + const struct ip6t_entry *e = par->entryinfo; + + if (e->ipv6.invflags & IP6T_INV_PROTO) + return 0; + return e->ipv6.proto; + } else + return 0; +} + +static bool xt_ct_tg_check(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct nf_conntrack_tuple t; + struct nf_conn_help *help; + struct nf_conn *ct; + u8 proto; + + if (info->flags & ~XT_CT_NOTRACK) + return false; + + if (info->flags & XT_CT_NOTRACK) { + ct = nf_ct_untracked_get(); + atomic_inc(&ct->ct_general.use); + goto out; + } + +#ifndef CONFIG_NF_CONNTRACK_ZONES + if (info->zone) + goto err1; +#endif + + if (nf_ct_l3proto_try_module_get(par->family) < 0) + goto err1; + + memset(&t, 0, sizeof(t)); + ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); + if (IS_ERR(ct)) + goto err2; + + if ((info->ct_events || info->exp_events) && + !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, + GFP_KERNEL)) + goto err3; + + if (info->helper[0]) { + proto = xt_ct_find_proto(par); + if (!proto) + goto err3; + + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (help->helper == NULL) + goto err3; + } + + __set_bit(IPS_TEMPLATE_BIT, &ct->status); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); +out: + info->ct = ct; + return true; + +err3: + nf_conntrack_free(ct); +err2: + nf_ct_l3proto_module_put(par->family); +err1: + return false; +} + +static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct nf_conn *ct = info->ct; + struct nf_conn_help *help; + + if (!nf_ct_is_untracked(ct)) { + help = nfct_help(ct); + if (help) + module_put(help->helper->me); + + nf_ct_l3proto_module_put(par->family); + } + nf_ct_put(info->ct); +} + +static struct xt_target xt_ct_tg __read_mostly = { + .name = "CT", + .family = NFPROTO_UNSPEC, + .targetsize = XT_ALIGN(sizeof(struct xt_ct_target_info)), + .checkentry = xt_ct_tg_check, + .destroy = xt_ct_tg_destroy, + .target = xt_ct_target, + .table = "raw", + .me = THIS_MODULE, +}; + +static int __init xt_ct_tg_init(void) +{ + return xt_register_target(&xt_ct_tg); +} + +static void __exit xt_ct_tg_exit(void) +{ + xt_unregister_target(&xt_ct_tg); +} + +module_init(xt_ct_tg_init); +module_exit(xt_ct_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: connection tracking target"); +MODULE_ALIAS("ipt_CT"); +MODULE_ALIAS("ip6t_CT"); diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 74ce8926..3f413aa4 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -28,7 +28,7 @@ MODULE_ALIAS("ipt_TOS"); MODULE_ALIAS("ip6t_TOS"); static unsigned int -dscp_tg(struct sk_buff *skb, const struct xt_target_param *par) +dscp_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_DSCP_info *dinfo = par->targinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; @@ -45,7 +45,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_target_param *par) } static unsigned int -dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par) +dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_DSCP_info *dinfo = par->targinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; @@ -72,7 +72,7 @@ static bool dscp_tg_check(const struct xt_tgchk_param *par) } static unsigned int -tos_tg(struct sk_buff *skb, const struct xt_target_param *par) +tos_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tos_target_info *info = par->targinfo; struct iphdr *iph = ip_hdr(skb); @@ -92,14 +92,14 @@ tos_tg(struct sk_buff *skb, const struct xt_target_param *par) } static unsigned int -tos_tg6(struct sk_buff *skb, const struct xt_target_param *par) +tos_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tos_target_info *info = par->targinfo; struct ipv6hdr *iph = ipv6_hdr(skb); u_int8_t orig, nv; orig = ipv6_get_dsfield(iph); - nv = (orig & info->tos_mask) ^ info->tos_value; + nv = (orig & ~info->tos_mask) ^ info->tos_value; if (orig != nv) { if (!skb_make_writable(skb, sizeof(struct iphdr))) diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c index 10e789e2..d29d65ce 100644 --- a/net/netfilter/xt_HL.c +++ b/net/netfilter/xt_HL.c @@ -26,7 +26,7 @@ MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target"); MODULE_LICENSE("GPL"); static unsigned int -ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) +ttl_tg(struct sk_buff *skb, const struct xt_action_param *par) { struct iphdr *iph; const struct ipt_TTL_info *info = par->targinfo; @@ -66,7 +66,7 @@ ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) } static unsigned int -hl_tg6(struct sk_buff *skb, const struct xt_target_param *par) +hl_tg6(struct sk_buff *skb, const struct xt_action_param *par) { struct ipv6hdr *ip6h; const struct ip6t_HL_info *info = par->targinfo; diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 8ff7843b..ad21030b 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -42,7 +42,7 @@ struct xt_led_info_internal { }; static unsigned int -led_tg(struct sk_buff *skb, const struct xt_target_param *par) +led_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_led_info *ledinfo = par->targinfo; struct xt_led_info_internal *ledinternal = ledinfo->internal_data; diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c deleted file mode 100644 index 225f8d11..00000000 --- a/net/netfilter/xt_MARK.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * xt_MARK - Netfilter module to modify the NFMARK field of an skb - * - * (C) 1999-2001 Marc Boucher - * Copyright © CC Computer Consultants GmbH, 2007 - 2008 - * Jan Engelhardt - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include - -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Marc Boucher "); -MODULE_DESCRIPTION("Xtables: packet mark modification"); -MODULE_ALIAS("ipt_MARK"); -MODULE_ALIAS("ip6t_MARK"); - -static unsigned int -mark_tg(struct sk_buff *skb, const struct xt_target_param *par) -{ - const struct xt_mark_tginfo2 *info = par->targinfo; - - skb->mark = (skb->mark & ~info->mask) ^ info->mark; - return XT_CONTINUE; -} - -static struct xt_target mark_tg_reg __read_mostly = { - .name = "MARK", - .revision = 2, - .family = NFPROTO_UNSPEC, - .target = mark_tg, - .targetsize = sizeof(struct xt_mark_tginfo2), - .me = THIS_MODULE, -}; - -static int __init mark_tg_init(void) -{ - return xt_register_target(&mark_tg_reg); -} - -static void __exit mark_tg_exit(void) -{ - xt_unregister_target(&mark_tg_reg); -} - -module_init(mark_tg_init); -module_exit(mark_tg_exit); diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index a57c5cf0..0cfc98f1 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_NFLOG"); MODULE_ALIAS("ip6t_NFLOG"); static unsigned int -nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) +nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_nflog_info *info = par->targinfo; struct nf_loginfo li; diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index f28f6a5f..f30568f8 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -30,7 +30,7 @@ MODULE_ALIAS("arpt_NFQUEUE"); static u32 jhash_initval __read_mostly; static unsigned int -nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) +nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info *tinfo = par->targinfo; diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index e7a0a54f..9d782181 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -13,7 +13,7 @@ MODULE_ALIAS("ipt_NOTRACK"); MODULE_ALIAS("ip6t_NOTRACK"); static unsigned int -notrack_tg(struct sk_buff *skb, const struct xt_target_param *par) +notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) { /* Previously seen (loopback)? Ignore. */ if (skb->nfct != NULL) @@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_target_param *par) If there is a real ct entry correspondig to this packet, it'll hang aroun till timing out. We don't deal with it for performance reasons. JK */ - skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index d80b8192..104df8b1 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -71,7 +71,7 @@ void xt_rateest_put(struct xt_rateest *est) EXPORT_SYMBOL_GPL(xt_rateest_put); static unsigned int -xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par) +xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_rateest_target_info *info = par->targinfo; struct gnet_stats_basic_packed *stats = &info->est->bstats; diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 7a6f9e6f..0d287705 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -29,7 +29,7 @@ MODULE_ALIAS("ip6t_SECMARK"); static u8 mode; static unsigned int -secmark_tg(struct sk_buff *skb, const struct xt_target_param *par) +secmark_tg(struct sk_buff *skb, const struct xt_action_param *par) { u32 secmark = 0; const struct xt_secmark_target_info *info = par->targinfo; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index eda64c1c..e757ce9e 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -174,7 +174,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, } static unsigned int -tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par) +tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) { struct iphdr *iph = ip_hdr(skb); __be16 newlen; @@ -197,7 +197,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par) #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) static unsigned int -tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par) +tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); u8 nexthdr; @@ -241,6 +241,7 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par) { const struct xt_tcpmss_info *info = par->targinfo; const struct ipt_entry *e = par->entryinfo; + const struct xt_entry_match *ematch; if (info->mss == XT_TCPMSS_CLAMP_PMTU && (par->hook_mask & ~((1 << NF_INET_FORWARD) | @@ -250,8 +251,9 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par) "FORWARD, OUTPUT and POSTROUTING hooks\n"); return false; } - if (IPT_MATCH_ITERATE(e, find_syn_match)) - return true; + xt_ematch_foreach(ematch, e) + if (find_syn_match(ematch)) + return true; printk("xt_TCPMSS: Only works on TCP SYN packets\n"); return false; } @@ -261,6 +263,7 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par) { const struct xt_tcpmss_info *info = par->targinfo; const struct ip6t_entry *e = par->entryinfo; + const struct xt_entry_match *ematch; if (info->mss == XT_TCPMSS_CLAMP_PMTU && (par->hook_mask & ~((1 << NF_INET_FORWARD) | @@ -270,8 +273,9 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par) "FORWARD, OUTPUT and POSTROUTING hooks\n"); return false; } - if (IP6T_MATCH_ITERATE(e, find_syn_match)) - return true; + xt_ematch_foreach(ematch, e) + if (find_syn_match(ematch)) + return true; printk("xt_TCPMSS: Only works on TCP SYN packets\n"); return false; } diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 9dd8c8ef..97813398 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -75,7 +75,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, } static unsigned int -tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par) +tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par) { return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb), sizeof(struct iphdr) + sizeof(struct tcphdr)); @@ -83,7 +83,7 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par) #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE) static unsigned int -tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_target_param *par) +tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); int tcphoff; diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c new file mode 100644 index 00000000..7a118267 --- /dev/null +++ b/net/netfilter/xt_TEE.c @@ -0,0 +1,309 @@ +/* + * "TEE" target extension for Xtables + * Copyright © Sebastian Claßen, 2007 + * Jan Engelhardt, 2007-2010 + * + * based on ipt_ROUTE.c from Cédric de Launois + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 or later, as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +# define WITH_CONNTRACK 1 +# include +#endif +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +# define WITH_IPV6 1 +#endif + +struct xt_tee_priv { + struct notifier_block notifier; + struct xt_tee_tginfo *tginfo; + int oif; +}; + +static const union nf_inet_addr tee_zero_address; +static DEFINE_PER_CPU(bool, tee_active); + +static struct net *pick_net(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_NS + const struct dst_entry *dst; + + if (skb->dev != NULL) + return dev_net(skb->dev); + dst = skb_dst(skb); + if (dst != NULL && dst->dev != NULL) + return dev_net(dst->dev); +#endif + return &init_net; +} + +static bool +tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) +{ + const struct iphdr *iph = ip_hdr(skb); + struct net *net = pick_net(skb); + struct rtable *rt; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + if (info->priv) { + if (info->priv->oif == -1) + return false; + fl.oif = info->priv->oif; + } + fl.nl_u.ip4_u.daddr = info->gw.ip; + fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); + fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE; + if (ip_route_output_key(net, &rt, &fl) != 0) + return false; + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); + skb->dev = rt->u.dst.dev; + skb->protocol = htons(ETH_P_IP); + return true; +} + +static unsigned int +tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_tee_tginfo *info = par->targinfo; + struct iphdr *iph; + + if (percpu_read(tee_active)) + return XT_CONTINUE; + /* + * Copy the skb, and route the copy. Will later return %XT_CONTINUE for + * the original skb, which should continue on its way as if nothing has + * happened. The copy should be independently delivered to the TEE + * --gateway. + */ + skb = pskb_copy(skb, GFP_ATOMIC); + if (skb == NULL) + return XT_CONTINUE; + +#ifdef WITH_CONNTRACK + /* Avoid counting cloned packets towards the original connection. */ + nf_conntrack_put(skb->nfct); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); +#endif + /* + * If we are in PREROUTING/INPUT, the checksum must be recalculated + * since the length could have changed as a result of defragmentation. + * + * We also decrease the TTL to mitigate potential TEE loops + * between two hosts. + * + * Set %IP_DF so that the original source is notified of a potentially + * decreased MTU on the clone route. IPv6 does this too. + */ + iph = ip_hdr(skb); + iph->frag_off |= htons(IP_DF); + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_IN) + --iph->ttl; + ip_send_check(iph); + + if (tee_tg_route4(skb, info)) { + percpu_write(tee_active, true); + ip_local_out(skb); + percpu_write(tee_active, false); + } else { + kfree_skb(skb); + } + return XT_CONTINUE; +} + +#ifdef WITH_IPV6 +static bool +tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = pick_net(skb); + struct dst_entry *dst; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + if (info->priv) { + if (info->priv->oif == -1) + return false; + fl.oif = info->priv->oif; + } + fl.nl_u.ip6_u.daddr = info->gw.in6; + fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | + (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; + dst = ip6_route_output(net, NULL, &fl); + if (dst == NULL) + return false; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); + return true; +} + +static unsigned int +tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_tee_tginfo *info = par->targinfo; + + if (percpu_read(tee_active)) + return XT_CONTINUE; + skb = pskb_copy(skb, GFP_ATOMIC); + if (skb == NULL) + return XT_CONTINUE; + +#ifdef WITH_CONNTRACK + nf_conntrack_put(skb->nfct); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); +#endif + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_IN) { + struct ipv6hdr *iph = ipv6_hdr(skb); + --iph->hop_limit; + } + if (tee_tg_route6(skb, info)) { + percpu_write(tee_active, true); + ip6_local_out(skb); + percpu_write(tee_active, false); + } else { + kfree_skb(skb); + } + return XT_CONTINUE; +} +#endif /* WITH_IPV6 */ + +static int tee_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct xt_tee_priv *priv; + + priv = container_of(this, struct xt_tee_priv, notifier); + switch (event) { + case NETDEV_REGISTER: + if (!strcmp(dev->name, priv->tginfo->oif)) + priv->oif = dev->ifindex; + break; + case NETDEV_UNREGISTER: + if (dev->ifindex == priv->oif) + priv->oif = -1; + break; + case NETDEV_CHANGENAME: + if (!strcmp(dev->name, priv->tginfo->oif)) + priv->oif = dev->ifindex; + else if (dev->ifindex == priv->oif) + priv->oif = -1; + break; + } + + return NOTIFY_DONE; +} + +static int tee_tg_check(const struct xt_tgchk_param *par) +{ + struct xt_tee_tginfo *info = par->targinfo; + struct xt_tee_priv *priv; + + /* 0.0.0.0 and :: not allowed */ + if (memcmp(&info->gw, &tee_zero_address, + sizeof(tee_zero_address)) == 0) + return -EINVAL; + + if (info->oif[0]) { + if (info->oif[sizeof(info->oif)-1] != '\0') + return -EINVAL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + + priv->tginfo = info; + priv->oif = -1; + priv->notifier.notifier_call = tee_netdev_event; + info->priv = priv; + + register_netdevice_notifier(&priv->notifier); + } else + info->priv = NULL; + + return 0; +} + +static void tee_tg_destroy(const struct xt_tgdtor_param *par) +{ + struct xt_tee_tginfo *info = par->targinfo; + + if (info->priv) { + unregister_netdevice_notifier(&info->priv->notifier); + kfree(info->priv); + } +} + +static struct xt_target tee_tg_reg[] __read_mostly = { + { + .name = "TEE", + .revision = 1, + .family = NFPROTO_IPV4, + .target = tee_tg4, + .targetsize = sizeof(struct xt_tee_tginfo), + .checkentry = tee_tg_check, + .destroy = tee_tg_destroy, + .me = THIS_MODULE, + }, +#ifdef WITH_IPV6 + { + .name = "TEE", + .revision = 1, + .family = NFPROTO_IPV6, + .target = tee_tg6, + .targetsize = sizeof(struct xt_tee_tginfo), + .checkentry = tee_tg_check, + .destroy = tee_tg_destroy, + .me = THIS_MODULE, + }, +#endif +}; + +static int __init tee_tg_init(void) +{ + return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); +} + +static void __exit tee_tg_exit(void) +{ + xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); +} + +module_init(tee_tg_init); +module_exit(tee_tg_exit); +MODULE_AUTHOR("Sebastian Claßen "); +MODULE_AUTHOR("Jan Engelhardt "); +MODULE_DESCRIPTION("Xtables: Reroute packet copy"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_TEE"); +MODULE_ALIAS("ip6t_TEE"); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 1340c2fa..4fa12857 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -1,7 +1,7 @@ /* * Transparent proxy support for Linux/iptables * - * Copyright (c) 2006-2007 BalaBit IT Ltd. + * Copyright (c) 2006-2010 BalaBit IT Ltd. * Author: Balazs Scheidler, Krisztian Kovacs * * This program is free software; you can redistribute it and/or modify @@ -16,19 +16,112 @@ #include #include #include - +#include #include #include -#include #include -#include -static unsigned int -tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par) +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#define XT_TPROXY_HAVE_IPV6 1 +#include +#include +#include +#include +#endif + +#include +#include + +static bool tproxy_sk_is_transparent(struct sock *sk) +{ + if (sk->sk_state != TCP_TIME_WAIT) { + if (inet_sk(sk)->transparent) + return true; + sock_put(sk); + } else { + if (inet_twsk(sk)->tw_transparent) + return true; + inet_twsk_put(inet_twsk(sk)); + } + return false; +} + +static inline __be32 +tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) +{ + struct in_device *indev; + __be32 laddr; + + if (user_laddr) + return user_laddr; + + laddr = 0; + rcu_read_lock(); + indev = __in_dev_get_rcu(skb->dev); + for_primary_ifa(indev) { + laddr = ifa->ifa_local; + break; + } endfor_ifa(indev); + rcu_read_unlock(); + + return laddr ? laddr : daddr; +} + +/** + * tproxy_handle_time_wait4() - handle IPv4 TCP TIME_WAIT reopen redirections + * @skb: The skb being processed. + * @laddr: IPv4 address to redirect to or zero. + * @lport: TCP port to redirect to or zero. + * @sk: The TIME_WAIT TCP socket found by the lookup. + * + * We have to handle SYN packets arriving to TIME_WAIT sockets + * differently: instead of reopening the connection we should rather + * redirect the new connection to the proxy if there's a listener + * socket present. + * + * tproxy_handle_time_wait4() consumes the socket reference passed in. + * + * Returns the listener socket if there's one, the TIME_WAIT socket if + * no such listener is found, or NULL if the TCP header is incomplete. + */ +static struct sock * +tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, + struct sock *sk) +{ + const struct iphdr *iph = ip_hdr(skb); + struct tcphdr _hdr, *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); + if (hp == NULL) { + inet_twsk_put(inet_twsk(sk)); + return NULL; + } + + if (hp->syn && !hp->rst && !hp->ack && !hp->fin) { + /* SYN to a TIME_WAIT socket, we'd rather redirect it + * to a listener socket if there's one */ + struct sock *sk2; + + sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + iph->saddr, laddr ? laddr : iph->daddr, + hp->source, lport ? lport : hp->dest, + skb->dev, NFT_LOOKUP_LISTENER); + if (sk2) { + inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_put(inet_twsk(sk)); + sk = sk2; + } + } + + return sk; +} + +static unsigned int +tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, + u_int32_t mark_mask, u_int32_t mark_value) { const struct iphdr *iph = ip_hdr(skb); - const struct xt_tproxy_target_info *tgi = par->targinfo; struct udphdr _hdr, *hp; struct sock *sk; @@ -36,30 +129,236 @@ tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par) if (hp == NULL) return NF_DROP; + /* check if there's an ongoing connection on the packet + * addresses, this happens if the redirect already happened + * and the current packet belongs to an already established + * connection */ sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, - iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, - hp->source, tgi->lport ? tgi->lport : hp->dest, - par->in, true); + iph->saddr, iph->daddr, + hp->source, hp->dest, + skb->dev, NFT_LOOKUP_ESTABLISHED); + + laddr = tproxy_laddr4(skb, laddr, iph->daddr); + if (!lport) + lport = hp->dest; + + /* UDP has no TCP_TIME_WAIT state, so we never enter here */ + if (sk && sk->sk_state == TCP_TIME_WAIT) + /* reopening a TIME_WAIT connection needs special handling */ + sk = tproxy_handle_time_wait4(skb, laddr, lport, sk); + else if (!sk) + /* no, there's no established connection, check if + * there's a listener on the redirected addr/port */ + sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + iph->saddr, laddr, + hp->source, lport, + skb->dev, NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ - if (sk && nf_tproxy_assign_sock(skb, sk)) { + if (sk && tproxy_sk_is_transparent(sk)) { + /* This should be in a separate target, but we don't do multiple + targets on the same rule yet */ + skb->mark = (skb->mark & ~mark_mask) ^ mark_value; + + /* + pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n", + tproto, &iph->saddr, ntohs(hp->dest), + &tgi->laddr.in6, ntohs(tgi->lport), skb->mark); + */ + + nf_tproxy_assign_sock(skb, sk); + return NF_ACCEPT; + } + + pr_debug("no socket, dropping: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n", + iph->protocol, &iph->saddr, ntohs(hp->source), + &iph->daddr, ntohs(hp->dest), skb->mark); + return NF_DROP; +} + +static unsigned int +tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_tproxy_target_info *tgi = par->targinfo; + + return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); +} + +static unsigned int +tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + + return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); +} + +#ifdef XT_TPROXY_HAVE_IPV6 + +static inline const struct in6_addr * +tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, + const struct in6_addr *daddr) +{ + struct inet6_dev *indev; + struct inet6_ifaddr *ifa; + struct in6_addr *laddr; + + if (!ipv6_addr_any(user_laddr)) + return user_laddr; + laddr = NULL; + + rcu_read_lock(); + indev = __in6_dev_get(skb->dev); + if (indev) + list_for_each_entry(ifa, &indev->addr_list, if_list) { + if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) + continue; + + laddr = &ifa->addr; + break; + } + rcu_read_unlock(); + + return laddr ? laddr : daddr; +} + +/** + * tproxy_handle_time_wait6() - handle IPv6 TCP TIME_WAIT reopen redirections + * @skb: The skb being processed. + * @tproto: Transport protocol. + * @thoff: Transport protocol header offset. + * @par: Iptables target parameters. + * @sk: The TIME_WAIT TCP socket found by the lookup. + * + * We have to handle SYN packets arriving to TIME_WAIT sockets + * differently: instead of reopening the connection we should rather + * redirect the new connection to the proxy if there's a listener + * socket present. + * + * tproxy_handle_time_wait6() consumes the socket reference passed in. + * + * Returns the listener socket if there's one, the TIME_WAIT socket if + * no such listener is found, or NULL if the TCP header is incomplete. + */ +static struct sock * +tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, + const struct xt_action_param *par, + struct sock *sk) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct tcphdr _hdr, *hp; + const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); + if (hp == NULL) { + inet_twsk_put(inet_twsk(sk)); + return NULL; + } + + if (hp->syn && !hp->rst && !hp->ack && !hp->fin) { + /* SYN to a TIME_WAIT socket, we'd rather redirect it + * to a listener socket if there's one */ + struct sock *sk2; + + sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + &iph->saddr, + tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), + hp->source, + tgi->lport ? tgi->lport : hp->dest, + skb->dev, NFT_LOOKUP_LISTENER); + if (sk2) { + inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_put(inet_twsk(sk)); + sk = sk2; + } + } + + return sk; +} + +static unsigned int +tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + struct udphdr _hdr, *hp; + struct sock *sk; + const struct in6_addr *laddr; + __be16 lport; + int thoff; + int tproto; + + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); + if (tproto < 0) { + pr_debug("unable to find transport header in IPv6 packet, dropping\n"); + return NF_DROP; + } + + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); + if (hp == NULL) { + pr_debug("unable to grab transport header contents in IPv6 packet, dropping\n"); + return NF_DROP; + } + + /* check if there's an ongoing connection on the packet + * addresses, this happens if the redirect already happened + * and the current packet belongs to an already established + * connection */ + sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + &iph->saddr, &iph->daddr, + hp->source, hp->dest, + par->in, NFT_LOOKUP_ESTABLISHED); + + laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); + lport = tgi->lport ? tgi->lport : hp->dest; + + /* UDP has no TCP_TIME_WAIT state, so we never enter here */ + if (sk && sk->sk_state == TCP_TIME_WAIT) + /* reopening a TIME_WAIT connection needs special handling */ + sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk); + else if (!sk) + /* no there's no established connection, check if + * there's a listener on the redirected addr/port */ + sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + &iph->saddr, laddr, + hp->source, lport, + par->in, NFT_LOOKUP_LISTENER); + + /* NOTE: assign_sock consumes our sk reference */ + if (sk && tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; - pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n", - iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), - ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); + pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n", + tproto, &iph->saddr, ntohs(hp->source), + laddr, ntohs(lport), skb->mark); + + nf_tproxy_assign_sock(skb, sk); return NF_ACCEPT; } - pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n", - iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), - ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); + pr_debug("no socket, dropping: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n", + tproto, &iph->saddr, ntohs(hp->source), + &iph->daddr, ntohs(hp->dest), skb->mark); + return NF_DROP; } -static bool tproxy_tg_check(const struct xt_tgchk_param *par) +static int tproxy_tg6_check(const struct xt_tgchk_param *par) +{ + const struct ip6t_ip6 *i = par->entryinfo; + + if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) + && !(i->flags & IP6T_INV_PROTO)) + return 0; + + pr_info("Can be used only in combination with " + "either -p tcp or -p udp\n"); + return -EINVAL; +} +#endif + +static int tproxy_tg4_check(const struct xt_tgchk_param *par) { const struct ipt_ip *i = par->entryinfo; @@ -72,31 +371,64 @@ static bool tproxy_tg_check(const struct xt_tgchk_param *par) return false; } -static struct xt_target tproxy_tg_reg __read_mostly = { - .name = "TPROXY", - .family = AF_INET, - .table = "mangle", - .target = tproxy_tg, - .targetsize = sizeof(struct xt_tproxy_target_info), - .checkentry = tproxy_tg_check, - .hooks = 1 << NF_INET_PRE_ROUTING, - .me = THIS_MODULE, +static struct xt_target tproxy_tg_reg[] __read_mostly = { + { + .name = "TPROXY", + .family = NFPROTO_IPV4, + .table = "mangle", + .target = tproxy_tg4_v0, + .revision = 0, + .targetsize = sizeof(struct xt_tproxy_target_info), + .checkentry = tproxy_tg4_check, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, + { + .name = "TPROXY", + .family = NFPROTO_IPV4, + .table = "mangle", + .target = tproxy_tg4_v1, + .revision = 1, + .targetsize = sizeof(struct xt_tproxy_target_info_v1), + .checkentry = tproxy_tg4_check, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, +#ifdef XT_TPROXY_HAVE_IPV6 + { + .name = "TPROXY", + .family = NFPROTO_IPV6, + .table = "mangle", + .target = tproxy_tg6_v1, + .revision = 1, + .targetsize = sizeof(struct xt_tproxy_target_info_v1), + .checkentry = tproxy_tg6_check, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, +#endif + }; static int __init tproxy_tg_init(void) { nf_defrag_ipv4_enable(); - return xt_register_target(&tproxy_tg_reg); +#ifdef XT_TPROXY_HAVE_IPV6 + nf_defrag_ipv6_enable(); +#endif + + return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } static void __exit tproxy_tg_exit(void) { - xt_unregister_target(&tproxy_tg_reg); + xt_unregister_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } module_init(tproxy_tg_init); module_exit(tproxy_tg_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Krisztian Kovacs"); +MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs"); MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module."); MODULE_ALIAS("ipt_TPROXY"); +MODULE_ALIAS("ip6t_TPROXY"); diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index fbb04b86..df48967a 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -11,7 +11,7 @@ MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); static unsigned int -trace_tg(struct sk_buff *skb, const struct xt_target_param *par) +trace_tg(struct sk_buff *skb, const struct xt_action_param *par) { skb->nf_trace = 1; return XT_CONTINUE; diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 225ee3ec..0c9800b0 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -85,7 +85,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) } static bool -xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par) +xt_cluster_mt(const struct sk_buff *skb, const struct xt_action_param *par) { struct sk_buff *pskb = (struct sk_buff *)skb; const struct xt_cluster_match_info *info = par->matchinfo; @@ -119,7 +119,7 @@ xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (ct == NULL) return false; - if (ct == &nf_conntrack_untracked) + if (nf_ct_is_untracked(ct)) return false; if (ct->master) @@ -131,7 +131,7 @@ xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par) !!(info->flags & XT_CLUSTER_F_INV); } -static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) +static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_cluster_match_info *info = par->matchinfo; diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c index e8217983..1dbb3e13 100644 --- a/net/netfilter/xt_comment.c +++ b/net/netfilter/xt_comment.c @@ -16,7 +16,7 @@ MODULE_ALIAS("ipt_comment"); MODULE_ALIAS("ip6t_comment"); static bool -comment_mt(const struct sk_buff *skb, const struct xt_match_param *par) +comment_mt(const struct sk_buff *skb, const struct xt_action_param *par) { /* We always match */ return true; diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 955e6598..0744067b 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -17,7 +17,7 @@ MODULE_ALIAS("ipt_connbytes"); MODULE_ALIAS("ip6t_connbytes"); static bool -connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par) +connbytes_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connbytes_info *sinfo = par->matchinfo; const struct nf_conn *ct; @@ -92,7 +92,7 @@ connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par) return what >= sinfo->count.from; } -static bool connbytes_mt_check(const struct xt_mtchk_param *par) +static int connbytes_mt_check(const struct xt_mtchk_param *par) { const struct xt_connbytes_info *sinfo = par->matchinfo; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 38f03f75..b521f6fc 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -28,6 +28,7 @@ #include #include #include +#include /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { @@ -99,7 +100,8 @@ same_source_net(const union nf_inet_addr *addr, } } -static int count_them(struct xt_connlimit_data *data, +static int count_them(struct net *net, + struct xt_connlimit_data *data, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, @@ -122,7 +124,8 @@ static int count_them(struct xt_connlimit_data *data, /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { - found = nf_conntrack_find_get(&init_net, &conn->tuple); + found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, + &conn->tuple); found_ct = NULL; if (found != NULL) @@ -178,8 +181,9 @@ static int count_them(struct xt_connlimit_data *data, } static bool -connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) +connlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par) { + struct net *net = dev_net(par->in ? par->in : par->out); const struct xt_connlimit_info *info = par->matchinfo; union nf_inet_addr addr; struct nf_conntrack_tuple tuple; @@ -204,7 +208,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) } spin_lock_bh(&info->data->lock); - connections = count_them(info->data, tuple_ptr, &addr, + connections = count_them(net, info->data, tuple_ptr, &addr, &info->mask, par->family); spin_unlock_bh(&info->data->lock); @@ -221,7 +225,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) return false; } -static bool connlimit_mt_check(const struct xt_mtchk_param *par) +static int connlimit_mt_check(const struct xt_mtchk_param *par) { struct xt_connlimit_info *info = par->matchinfo; unsigned int i; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 122aa8b0..6e360ea8 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -1,5 +1,5 @@ /* - * xt_connmark - Netfilter module to match connection mark values + * xt_connmark - Netfilter module to operate on connection marks * * Copyright (C) 2002,2004 MARA Systems AB * by Henrik Nordstrom @@ -24,17 +24,73 @@ #include #include #include +#include #include #include MODULE_AUTHOR("Henrik Nordstrom "); -MODULE_DESCRIPTION("Xtables: connection mark match"); +MODULE_DESCRIPTION("Xtables: connection mark operations"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_CONNMARK"); +MODULE_ALIAS("ip6t_CONNMARK"); MODULE_ALIAS("ipt_connmark"); MODULE_ALIAS("ip6t_connmark"); +static unsigned int +connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_connmark_tginfo1 *info = par->targinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + u_int32_t newmark; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return XT_CONTINUE; + + switch (info->mode) { + case XT_CONNMARK_SET: + newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; + if (ct->mark != newmark) { + ct->mark = newmark; + nf_conntrack_event_cache(IPCT_MARK, ct); + } + break; + case XT_CONNMARK_SAVE: + newmark = (ct->mark & ~info->ctmask) ^ + (skb->mark & info->nfmask); + if (ct->mark != newmark) { + ct->mark = newmark; + nf_conntrack_event_cache(IPCT_MARK, ct); + } + break; + case XT_CONNMARK_RESTORE: + newmark = (skb->mark & ~info->nfmask) ^ + (ct->mark & info->ctmask); + skb->mark = newmark; + break; + } + + return XT_CONTINUE; +} + +static bool connmark_tg_check(const struct xt_tgchk_param *par) +{ + if (nf_ct_l3proto_try_module_get(par->family) < 0) { + printk(KERN_WARNING "cannot load conntrack support for " + "proto=%u\n", par->family); + return false; + } + return true; +} + +static void connmark_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_l3proto_module_put(par->family); +} + static bool -connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par) +connmark_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connmark_mtinfo1 *info = par->matchinfo; enum ip_conntrack_info ctinfo; @@ -47,7 +103,7 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par) return ((ct->mark & info->mask) == info->mark) ^ info->invert; } -static bool connmark_mt_check(const struct xt_mtchk_param *par) +static int connmark_mt_check(const struct xt_mtchk_param *par) { if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "cannot load conntrack support for " @@ -62,6 +118,17 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par) nf_ct_l3proto_module_put(par->family); } +static struct xt_target connmark_tg_reg __read_mostly = { + .name = "CONNMARK", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = connmark_tg_check, + .target = connmark_tg, + .targetsize = sizeof(struct xt_connmark_tginfo1), + .destroy = connmark_tg_destroy, + .me = THIS_MODULE, +}; + static struct xt_match connmark_mt_reg __read_mostly = { .name = "connmark", .revision = 1, @@ -75,12 +142,23 @@ static struct xt_match connmark_mt_reg __read_mostly = { static int __init connmark_mt_init(void) { - return xt_register_match(&connmark_mt_reg); + int ret; + + ret = xt_register_target(&connmark_tg_reg); + if (ret < 0) + return ret; + ret = xt_register_match(&connmark_mt_reg); + if (ret < 0) { + xt_unregister_target(&connmark_tg_reg); + return ret; + } + return 0; } static void __exit connmark_mt_exit(void) { xt_unregister_match(&connmark_mt_reg); + xt_unregister_target(&connmark_tg_reg); } module_init(connmark_mt_init); diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index ae66305f..e468ebc8 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -113,7 +113,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, } static bool -conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par, +conntrack_mt(const struct sk_buff *skb, const struct xt_action_param *par, u16 state_mask, u16 status_mask) { const struct xt_conntrack_mtinfo2 *info = par->matchinfo; @@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par, ct = nf_ct_get(skb, &ctinfo); - if (ct == &nf_conntrack_untracked) - statebit = XT_CONNTRACK_STATE_UNTRACKED; - else if (ct != NULL) - statebit = XT_CONNTRACK_STATE_BIT(ctinfo); - else + if (ct) { + if (nf_ct_is_untracked(ct)) + statebit = XT_CONNTRACK_STATE_UNTRACKED; + else + statebit = XT_CONNTRACK_STATE_BIT(ctinfo); + } else statebit = XT_CONNTRACK_STATE_INVALID; if (info->match_flags & XT_CONNTRACK_STATE) { @@ -191,7 +192,7 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par, } static bool -conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +conntrack_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_conntrack_mtinfo1 *info = par->matchinfo; @@ -199,14 +200,14 @@ conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) } static bool -conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par) +conntrack_mt_v2(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_conntrack_mtinfo2 *info = par->matchinfo; return conntrack_mt(skb, par, info->state_mask, info->status_mask); } -static bool conntrack_mt_check(const struct xt_mtchk_param *par) +static int conntrack_mt_check(const struct xt_mtchk_param *par) { if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 0989f29a..84f961e1 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -95,7 +95,7 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, } static bool -dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par) +dccp_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_dccp_info *info = par->matchinfo; const struct dccp_hdr *dh; @@ -123,7 +123,7 @@ dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par) XT_DCCP_OPTION, info->flags, info->invflags); } -static bool dccp_mt_check(const struct xt_mtchk_param *par) +static int dccp_mt_check(const struct xt_mtchk_param *par) { const struct xt_dccp_info *info = par->matchinfo; diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 0280d3a8..c52e98d0 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tos"); MODULE_ALIAS("ip6t_tos"); static bool -dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par) +dscp_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_dscp_info *info = par->matchinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; @@ -34,7 +34,7 @@ dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par) } static bool -dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +dscp_mt6(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_dscp_info *info = par->matchinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; @@ -42,7 +42,7 @@ dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par) return (dscp == info->dscp) ^ !!info->invert; } -static bool dscp_mt_check(const struct xt_mtchk_param *par) +static int dscp_mt_check(const struct xt_mtchk_param *par) { const struct xt_dscp_info *info = par->matchinfo; @@ -54,7 +54,8 @@ static bool dscp_mt_check(const struct xt_mtchk_param *par) return true; } -static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par) +static bool tos_mt(const struct sk_buff *skb, + const struct xt_action_param *par) { const struct xt_tos_match_info *info = par->matchinfo; diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 60943996..68e4aef0 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -6,7 +6,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include @@ -24,25 +24,20 @@ MODULE_DESCRIPTION("Xtables: IPsec-ESP packet match"); MODULE_ALIAS("ipt_esp"); MODULE_ALIAS("ip6t_esp"); -#if 0 -#define duprintf(format, args...) printk(format , ## args) -#else -#define duprintf(format, args...) -#endif - /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { bool r; - duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', - min, spi, max); + pr_debug("esp spi_match:%c 0x%x <= 0x%x <= 0x%x\n", + invert ? '!' : ' ', min, spi, max); r = (spi >= min && spi <= max) ^ invert; - duprintf(" result %s\n", r ? "PASS" : "FAILED"); + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } -static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par) +static bool esp_mt(const struct sk_buff *skb, + const struct xt_action_param *par) { const struct ip_esp_hdr *eh; struct ip_esp_hdr _esp; @@ -57,7 +52,7 @@ static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par) /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ - duprintf("Dropping evil ESP tinygram.\n"); + pr_debug("Dropping evil ESP tinygram.\n"); *par->hotdrop = true; return false; } @@ -66,12 +61,12 @@ static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par) !!(espinfo->invflags & XT_ESP_INV_SPI)); } -static bool esp_mt_check(const struct xt_mtchk_param *par) +static int esp_mt_check(const struct xt_mtchk_param *par) { const struct xt_esp *espinfo = par->matchinfo; if (espinfo->invflags & ~XT_ESP_INV_MASK) { - duprintf("xt_esp: unknown flags %X\n", espinfo->invflags); + pr_debug("unknown flags %X\n", espinfo->invflags); return false; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index dd16e404..da208fff 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -185,79 +185,8 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(unsigned long htlong); -static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family) -{ - struct xt_hashlimit_htable *hinfo; - unsigned int size; - unsigned int i; - - if (minfo->cfg.size) - size = minfo->cfg.size; - else { - size = ((totalram_pages << PAGE_SHIFT) / 16384) / - sizeof(struct list_head); - if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) - size = 8192; - if (size < 16) - size = 16; - } - /* FIXME: don't use vmalloc() here or anywhere else -HW */ - hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) + - sizeof(struct list_head) * size); - if (!hinfo) { - printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n"); - return -1; - } - minfo->hinfo = hinfo; - - /* copy match config into hashtable config */ - hinfo->cfg.mode = minfo->cfg.mode; - hinfo->cfg.avg = minfo->cfg.avg; - hinfo->cfg.burst = minfo->cfg.burst; - hinfo->cfg.max = minfo->cfg.max; - hinfo->cfg.gc_interval = minfo->cfg.gc_interval; - hinfo->cfg.expire = minfo->cfg.expire; - - if (family == NFPROTO_IPV4) - hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32; - else - hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128; - - hinfo->cfg.size = size; - if (!hinfo->cfg.max) - hinfo->cfg.max = 8 * hinfo->cfg.size; - else if (hinfo->cfg.max < hinfo->cfg.size) - hinfo->cfg.max = hinfo->cfg.size; - - for (i = 0; i < hinfo->cfg.size; i++) - INIT_HLIST_HEAD(&hinfo->hash[i]); - - atomic_set(&hinfo->use, 1); - hinfo->count = 0; - hinfo->family = family; - hinfo->rnd_initialized = 0; - spin_lock_init(&hinfo->lock); - hinfo->pde = proc_create_data(minfo->name, 0, - (family == NFPROTO_IPV4) ? - hashlimit_procdir4 : hashlimit_procdir6, - &dl_file_ops, hinfo); - if (!hinfo->pde) { - vfree(hinfo); - return -1; - } - - setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo); - hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval); - add_timer(&hinfo->timer); - - spin_lock_bh(&hashlimit_lock); - hlist_add_head(&hinfo->node, &hashlimit_htables); - spin_unlock_bh(&hashlimit_lock); - - return 0; -} - -static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family) +static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, + u_int8_t family) { struct xt_hashlimit_htable *hinfo; unsigned int size; @@ -563,58 +492,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, } static bool -hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct xt_hashlimit_info *r = par->matchinfo; - struct xt_hashlimit_htable *hinfo = r->hinfo; - unsigned long now = jiffies; - struct dsthash_ent *dh; - struct dsthash_dst dst; - - if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) - goto hotdrop; - - spin_lock_bh(&hinfo->lock); - dh = dsthash_find(hinfo, &dst); - if (!dh) { - dh = dsthash_alloc_init(hinfo, &dst); - if (!dh) { - spin_unlock_bh(&hinfo->lock); - goto hotdrop; - } - - dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - dh->rateinfo.prev = jiffies; - dh->rateinfo.credit = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.cost = user2credits(hinfo->cfg.avg); - } else { - /* update expiration timeout */ - dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now); - } - - if (dh->rateinfo.credit >= dh->rateinfo.cost) { - /* We're underlimit. */ - dh->rateinfo.credit -= dh->rateinfo.cost; - spin_unlock_bh(&hinfo->lock); - return true; - } - - spin_unlock_bh(&hinfo->lock); - - /* default case: we're overlimit, thus don't match */ - return false; - -hotdrop: - *par->hotdrop = true; - return false; -} - -static bool -hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) +hashlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; struct xt_hashlimit_htable *hinfo = info->hinfo; @@ -663,48 +541,7 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) return false; } -static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par) -{ - struct xt_hashlimit_info *r = par->matchinfo; - - /* Check for overflow. */ - if (r->cfg.burst == 0 || - user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) { - printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n", - r->cfg.avg, r->cfg.burst); - return false; - } - if (r->cfg.mode == 0 || - r->cfg.mode > (XT_HASHLIMIT_HASH_DPT | - XT_HASHLIMIT_HASH_DIP | - XT_HASHLIMIT_HASH_SIP | - XT_HASHLIMIT_HASH_SPT)) - return false; - if (!r->cfg.gc_interval) - return false; - if (!r->cfg.expire) - return false; - if (r->name[sizeof(r->name) - 1] != '\0') - return false; - - /* This is the best we've got: We cannot release and re-grab lock, - * since checkentry() is called before x_tables.c grabs xt_mutex. - * We also cannot grab the hashtable spinlock, since htable_create will - * call vmalloc, and that can sleep. And we cannot just re-search - * the list of htable's in htable_create(), since then we would - * create duplicate proc files. -HW */ - mutex_lock(&hlimit_mutex); - r->hinfo = htable_find_get(r->name, par->match->family); - if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) { - mutex_unlock(&hlimit_mutex); - return false; - } - mutex_unlock(&hlimit_mutex); - - return true; -} - -static bool hashlimit_mt_check(const struct xt_mtchk_param *par) +static int hashlimit_mt_check(const struct xt_mtchk_param *par) { struct xt_hashlimit_mtinfo1 *info = par->matchinfo; @@ -744,14 +581,6 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par) return true; } -static void -hashlimit_mt_destroy_v0(const struct xt_mtdtor_param *par) -{ - const struct xt_hashlimit_info *r = par->matchinfo; - - htable_put(r->hinfo); -} - static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; @@ -759,46 +588,7 @@ static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) htable_put(info->hinfo); } -#ifdef CONFIG_COMPAT -struct compat_xt_hashlimit_info { - char name[IFNAMSIZ]; - struct hashlimit_cfg cfg; - compat_uptr_t hinfo; - compat_uptr_t master; -}; - -static void hashlimit_mt_compat_from_user(void *dst, void *src) -{ - int off = offsetof(struct compat_xt_hashlimit_info, hinfo); - - memcpy(dst, src, off); - memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off); -} - -static int hashlimit_mt_compat_to_user(void __user *dst, void *src) -{ - int off = offsetof(struct compat_xt_hashlimit_info, hinfo); - - return copy_to_user(dst, src, off) ? -EFAULT : 0; -} -#endif - static struct xt_match hashlimit_mt_reg[] __read_mostly = { - { - .name = "hashlimit", - .revision = 0, - .family = NFPROTO_IPV4, - .match = hashlimit_mt_v0, - .matchsize = sizeof(struct xt_hashlimit_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_hashlimit_info), - .compat_from_user = hashlimit_mt_compat_from_user, - .compat_to_user = hashlimit_mt_compat_to_user, -#endif - .checkentry = hashlimit_mt_check_v0, - .destroy = hashlimit_mt_destroy_v0, - .me = THIS_MODULE - }, { .name = "hashlimit", .revision = 1, @@ -810,20 +600,6 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .me = THIS_MODULE, }, #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) - { - .name = "hashlimit", - .family = NFPROTO_IPV6, - .match = hashlimit_mt_v0, - .matchsize = sizeof(struct xt_hashlimit_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_hashlimit_info), - .compat_from_user = hashlimit_mt_compat_from_user, - .compat_to_user = hashlimit_mt_compat_to_user, -#endif - .checkentry = hashlimit_mt_check_v0, - .destroy = hashlimit_mt_destroy_v0, - .me = THIS_MODULE - }, { .name = "hashlimit", .revision = 1, diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 64fc7f27..1534ea6e 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_helper"); static bool -helper_mt(const struct sk_buff *skb, const struct xt_match_param *par) +helper_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_helper_info *info = par->matchinfo; const struct nf_conn *ct; @@ -54,7 +54,7 @@ helper_mt(const struct sk_buff *skb, const struct xt_match_param *par) return ret; } -static bool helper_mt_check(const struct xt_mtchk_param *par) +static int helper_mt_check(const struct xt_mtchk_param *par) { struct xt_helper_info *info = par->matchinfo; diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c index 7726154c..2fd5870c 100644 --- a/net/netfilter/xt_hl.c +++ b/net/netfilter/xt_hl.c @@ -25,7 +25,8 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_ttl"); MODULE_ALIAS("ip6t_hl"); -static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) +static bool ttl_mt(const struct sk_buff *skb, + const struct xt_action_param *par) { const struct ipt_ttl_info *info = par->matchinfo; const u8 ttl = ip_hdr(skb)->ttl; @@ -48,7 +49,8 @@ static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) return false; } -static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +static bool hl_mt6(const struct sk_buff *skb, + const struct xt_action_param *par) { const struct ip6t_hl_info *info = par->matchinfo; const struct ipv6hdr *ip6h = ipv6_hdr(skb); diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index ffc96387..8c7e8155 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -16,7 +16,7 @@ #include static bool -iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par) +iprange_mt4(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_iprange_mtinfo *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); @@ -67,7 +67,7 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b) } static bool -iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +iprange_mt6(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_iprange_mtinfo *info = par->matchinfo; const struct ipv6hdr *iph = ipv6_hdr(skb); diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index c4871ca6..842149b4 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c @@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_length"); MODULE_ALIAS("ip6t_length"); static bool -length_mt(const struct sk_buff *skb, const struct xt_match_param *par) +length_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_length_info *info = par->matchinfo; u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len); @@ -30,7 +30,7 @@ length_mt(const struct sk_buff *skb, const struct xt_match_param *par) } static bool -length_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +length_mt6(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_length_info *info = par->matchinfo; const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) + diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 2773be6a..12a0897c 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -63,7 +63,7 @@ static DEFINE_SPINLOCK(limit_lock); #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) static bool -limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) +limit_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_rateinfo *r = par->matchinfo; struct xt_limit_priv *priv = r->master; @@ -97,7 +97,7 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE; } -static bool limit_mt_check(const struct xt_mtchk_param *par) +static int limit_mt_check(const struct xt_mtchk_param *par) { struct xt_rateinfo *r = par->matchinfo; struct xt_limit_priv *priv; diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index c2007116..76131c4a 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -24,7 +24,8 @@ MODULE_DESCRIPTION("Xtables: MAC address match"); MODULE_ALIAS("ipt_mac"); MODULE_ALIAS("ip6t_mac"); -static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par) +static bool mac_mt(const struct sk_buff *skb, + const struct xt_action_param *par) { const struct xt_mac_info *info = par->matchinfo; diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 1db07d81..3c834707 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -18,18 +18,38 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); -MODULE_DESCRIPTION("Xtables: packet mark match"); +MODULE_DESCRIPTION("Xtables: packet mark operations"); MODULE_ALIAS("ipt_mark"); MODULE_ALIAS("ip6t_mark"); +MODULE_ALIAS("ipt_MARK"); +MODULE_ALIAS("ip6t_MARK"); + +static unsigned int +mark_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_mark_tginfo2 *info = par->targinfo; + + skb->mark = (skb->mark & ~info->mask) ^ info->mark; + return XT_CONTINUE; +} static bool -mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) +mark_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_mark_mtinfo1 *info = par->matchinfo; return ((skb->mark & info->mask) == info->mark) ^ info->invert; } +static struct xt_target mark_tg_reg __read_mostly = { + .name = "MARK", + .revision = 2, + .family = NFPROTO_UNSPEC, + .target = mark_tg, + .targetsize = sizeof(struct xt_mark_tginfo2), + .me = THIS_MODULE, +}; + static struct xt_match mark_mt_reg __read_mostly = { .name = "mark", .revision = 1, @@ -41,12 +61,23 @@ static struct xt_match mark_mt_reg __read_mostly = { static int __init mark_mt_init(void) { - return xt_register_match(&mark_mt_reg); + int ret; + + ret = xt_register_target(&mark_tg_reg); + if (ret < 0) + return ret; + ret = xt_register_match(&mark_mt_reg); + if (ret < 0) { + xt_unregister_target(&mark_tg_reg); + return ret; + } + return 0; } static void __exit mark_mt_exit(void) { xt_unregister_match(&mark_mt_reg); + xt_unregister_target(&mark_tg_reg); } module_init(mark_mt_init); diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index d06bb2dd..2ddafbe5 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -8,7 +8,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include @@ -26,29 +26,6 @@ MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP MODULE_ALIAS("ipt_multiport"); MODULE_ALIAS("ip6t_multiport"); -#if 0 -#define duprintf(format, args...) printk(format , ## args) -#else -#define duprintf(format, args...) -#endif - -/* Returns 1 if the port is matched by the test, 0 otherwise. */ -static inline bool -ports_match_v0(const u_int16_t *portlist, enum xt_multiport_flags flags, - u_int8_t count, u_int16_t src, u_int16_t dst) -{ - unsigned int i; - for (i = 0; i < count; i++) { - if (flags != XT_MULTIPORT_DESTINATION && portlist[i] == src) - return true; - - if (flags != XT_MULTIPORT_SOURCE && portlist[i] == dst) - return true; - } - - return false; -} - /* Returns 1 if the port is matched by the test, 0 otherwise. */ static inline bool ports_match_v1(const struct xt_multiport_v1 *minfo, @@ -63,7 +40,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, if (minfo->pflags[i]) { /* range port matching */ e = minfo->ports[++i]; - duprintf("src or dst matches with %d-%d?\n", s, e); + pr_debug("src or dst matches with %d-%d?\n", s, e); if (minfo->flags == XT_MULTIPORT_SOURCE && src >= s && src <= e) @@ -77,7 +54,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, return true ^ minfo->invert; } else { /* exact port matching */ - duprintf("src or dst matches with %d?\n", s); + pr_debug("src or dst matches with %d?\n", s); if (minfo->flags == XT_MULTIPORT_SOURCE && src == s) @@ -95,31 +72,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, } static bool -multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const __be16 *pptr; - __be16 _ports[2]; - const struct xt_multiport *multiinfo = par->matchinfo; - - if (par->fragoff != 0) - return false; - - pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports); - if (pptr == NULL) { - /* We've been asked to examine this packet, and we - * can't. Hence, no choice but to drop. - */ - duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); - *par->hotdrop = true; - return false; - } - - return ports_match_v0(multiinfo->ports, multiinfo->flags, - multiinfo->count, ntohs(pptr[0]), ntohs(pptr[1])); -} - -static bool -multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par) +multiport_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const __be16 *pptr; __be16 _ports[2]; @@ -133,7 +86,7 @@ multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par) /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ - duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); + pr_debug("Dropping evil offset=0 tinygram.\n"); *par->hotdrop = true; return false; } @@ -158,16 +111,7 @@ check(u_int16_t proto, && count <= XT_MULTI_PORTS; } -static bool multiport_mt_check_v0(const struct xt_mtchk_param *par) -{ - const struct ipt_ip *ip = par->entryinfo; - const struct xt_multiport *multiinfo = par->matchinfo; - - return check(ip->proto, ip->invflags, multiinfo->flags, - multiinfo->count); -} - -static bool multiport_mt_check(const struct xt_mtchk_param *par) +static int multiport_mt_check(const struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; const struct xt_multiport_v1 *multiinfo = par->matchinfo; @@ -176,16 +120,7 @@ static bool multiport_mt_check(const struct xt_mtchk_param *par) multiinfo->count); } -static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par) -{ - const struct ip6t_ip6 *ip = par->entryinfo; - const struct xt_multiport *multiinfo = par->matchinfo; - - return check(ip->proto, ip->invflags, multiinfo->flags, - multiinfo->count); -} - -static bool multiport_mt6_check(const struct xt_mtchk_param *par) +static int multiport_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_ip6 *ip = par->entryinfo; const struct xt_multiport_v1 *multiinfo = par->matchinfo; @@ -195,15 +130,6 @@ static bool multiport_mt6_check(const struct xt_mtchk_param *par) } static struct xt_match multiport_mt_reg[] __read_mostly = { - { - .name = "multiport", - .family = NFPROTO_IPV4, - .revision = 0, - .checkentry = multiport_mt_check_v0, - .match = multiport_mt_v0, - .matchsize = sizeof(struct xt_multiport), - .me = THIS_MODULE, - }, { .name = "multiport", .family = NFPROTO_IPV4, @@ -213,15 +139,6 @@ static struct xt_match multiport_mt_reg[] __read_mostly = { .matchsize = sizeof(struct xt_multiport_v1), .me = THIS_MODULE, }, - { - .name = "multiport", - .family = NFPROTO_IPV6, - .revision = 0, - .checkentry = multiport_mt6_check_v0, - .match = multiport_mt_v0, - .matchsize = sizeof(struct xt_multiport), - .me = THIS_MODULE, - }, { .name = "multiport", .family = NFPROTO_IPV6, diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 4d1a41bb..644ff1ab 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -193,8 +193,8 @@ static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info return ip->ttl == f_ttl; } -static bool xt_osf_match_packet(const struct sk_buff *skb, - const struct xt_match_param *p) +static bool +xt_osf_match_packet(const struct sk_buff *skb, const struct xt_action_param *p) { const struct xt_osf_info *info = p->matchinfo; const struct iphdr *ip = ip_hdr(skb); diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index d24c76df..3dd1391d 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -18,7 +18,7 @@ #include static bool -owner_mt(const struct sk_buff *skb, const struct xt_match_param *par) +owner_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 8d28ca58..52629734 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -22,7 +22,7 @@ MODULE_ALIAS("ip6t_physdev"); static bool -physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) +physdev_mt(const struct sk_buff *skb, const struct xt_action_param *par) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct xt_physdev_info *info = par->matchinfo; @@ -83,7 +83,7 @@ match_outdev: return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } -static bool physdev_mt_check(const struct xt_mtchk_param *par) +static int physdev_mt_check(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 69da1d3a..d95f2149 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_pkttype"); MODULE_ALIAS("ip6t_pkttype"); static bool -pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par) +pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_pkttype_info *info = par->matchinfo; u_int8_t type; diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 4cbfebda..68c53689 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -110,7 +110,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info, } static bool -policy_mt(const struct sk_buff *skb, const struct xt_match_param *par) +policy_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_policy_info *info = par->matchinfo; int ret; @@ -128,7 +128,7 @@ policy_mt(const struct sk_buff *skb, const struct xt_match_param *par) return ret; } -static bool policy_mt_check(const struct xt_mtchk_param *par) +static int policy_mt_check(const struct xt_mtchk_param *par) { const struct xt_policy_info *info = par->matchinfo; diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c new file mode 100644 index 00000000..814432a5 --- /dev/null +++ b/net/netfilter/xt_qtaguid.c @@ -0,0 +1,2780 @@ +/* + * Kernel iptables module to track stats for packets based on user tags. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * There are run-time debug flags enabled via the debug_mask module param, or + * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h. + */ +#define DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "xt_qtaguid_internal.h" +#include "xt_qtaguid_print.h" + +#define pr_warn_once printk +/* + * We only use the xt_socket funcs within a similar context to avoid unexpected + * return values. + */ +#define XT_SOCKET_SUPPORTED_HOOKS \ + ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) + + +static const char *module_procdirname = "xt_qtaguid"; +static struct proc_dir_entry *xt_qtaguid_procdir; + +static unsigned int proc_iface_perms = S_IRUGO; +module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR); + +static struct proc_dir_entry *xt_qtaguid_stats_file; +static unsigned int proc_stats_perms = S_IRUGO; +module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); + +static struct proc_dir_entry *xt_qtaguid_ctrl_file; +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; +#else +static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; +#endif +module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); + +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include +static gid_t proc_stats_readall_gid = AID_NET_BW_STATS; +static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT; +#else +/* 0 means, don't limit anybody */ +static gid_t proc_stats_readall_gid; +static gid_t proc_ctrl_write_gid; +#endif +module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, + S_IRUGO | S_IWUSR); +module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, + S_IRUGO | S_IWUSR); + +/* + * Limit the number of active tags (via socket tags) for a given UID. + * Multiple processes could share the UID. + */ +static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS; +module_param(max_sock_tags, int, S_IRUGO | S_IWUSR); + +/* + * After the kernel has initiallized this module, it is still possible + * to make it passive. + * Setting passive to Y: + * - the iface stats handling will not act on notifications. + * - iptables matches will never match. + * - ctrl commands silently succeed. + * - stats are always empty. + * This is mostly usefull when a bug is suspected. + */ +static bool module_passive; +module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR); + +/* + * Control how qtaguid data is tracked per proc/uid. + * Setting tag_tracking_passive to Y: + * - don't create proc specific structs to track tags + * - don't check that active tag stats exceed some limits. + * - don't clean up socket tags on process exits. + * This is mostly usefull when a bug is suspected. + */ +static bool qtu_proc_handling_passive; +module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool, + S_IRUGO | S_IWUSR); + +#define QTU_DEV_NAME "xt_qtaguid" + +uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK; +module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR); + +/*---------------------------------------------------------------------------*/ +static const char *iface_stat_procdirname = "iface_stat"; +static struct proc_dir_entry *iface_stat_procdir; +static const char *iface_stat_all_procfilename = "iface_stat_all"; +static struct proc_dir_entry *iface_stat_all_procfile; + +/* + * Ordering of locks: + * outer locks: + * iface_stat_list_lock + * sock_tag_list_lock + * inner locks: + * uid_tag_data_tree_lock + * tag_counter_set_list_lock + * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock + * is acquired. + * + * Call tree with all lock holders as of 2011-09-25: + * + * iface_stat_all_proc_read() + * iface_stat_list_lock + * (struct iface_stat) + * + * qtaguid_ctrl_proc_read() + * sock_tag_list_lock + * (sock_tag_tree) + * (struct proc_qtu_data->sock_tag_list) + * prdebug_full_state() + * sock_tag_list_lock + * (sock_tag_tree) + * uid_tag_data_tree_lock + * (uid_tag_data_tree) + * (proc_qtu_data_tree) + * iface_stat_list_lock + * + * qtaguid_stats_proc_read() + * iface_stat_list_lock + * struct iface_stat->tag_stat_list_lock + * + * qtudev_open() + * uid_tag_data_tree_lock + * + * qtudev_release() + * sock_tag_data_list_lock + * uid_tag_data_tree_lock + * prdebug_full_state() + * sock_tag_list_lock + * uid_tag_data_tree_lock + * iface_stat_list_lock + * + * iface_netdev_event_handler() + * iface_stat_create() + * iface_stat_list_lock + * iface_stat_update() + * iface_stat_list_lock + * + * iface_inetaddr_event_handler() + * iface_stat_create() + * iface_stat_list_lock + * iface_stat_update() + * iface_stat_list_lock + * + * iface_inet6addr_event_handler() + * iface_stat_create_ipv6() + * iface_stat_list_lock + * iface_stat_update() + * iface_stat_list_lock + * + * qtaguid_mt() + * account_for_uid() + * if_tag_stat_update() + * get_sock_stat() + * sock_tag_list_lock + * struct iface_stat->tag_stat_list_lock + * tag_stat_update() + * get_active_counter_set() + * tag_counter_set_list_lock + * tag_stat_update() + * get_active_counter_set() + * tag_counter_set_list_lock + * + * + * qtaguid_ctrl_parse() + * ctrl_cmd_delete() + * sock_tag_list_lock + * tag_counter_set_list_lock + * iface_stat_list_lock + * struct iface_stat->tag_stat_list_lock + * uid_tag_data_tree_lock + * ctrl_cmd_counter_set() + * tag_counter_set_list_lock + * ctrl_cmd_tag() + * sock_tag_list_lock + * (sock_tag_tree) + * get_tag_ref() + * uid_tag_data_tree_lock + * (uid_tag_data_tree) + * uid_tag_data_tree_lock + * (proc_qtu_data_tree) + * ctrl_cmd_untag() + * sock_tag_list_lock + * uid_tag_data_tree_lock + * + */ +static LIST_HEAD(iface_stat_list); +static DEFINE_SPINLOCK(iface_stat_list_lock); + +static struct rb_root sock_tag_tree = RB_ROOT; +static DEFINE_SPINLOCK(sock_tag_list_lock); + +static struct rb_root tag_counter_set_tree = RB_ROOT; +static DEFINE_SPINLOCK(tag_counter_set_list_lock); + +static struct rb_root uid_tag_data_tree = RB_ROOT; +static DEFINE_SPINLOCK(uid_tag_data_tree_lock); + +static struct rb_root proc_qtu_data_tree = RB_ROOT; +/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */ + +static struct qtaguid_event_counts qtu_events; +/*----------------------------------------------*/ +static bool can_manipulate_uids(void) +{ + /* root pwnd */ + return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) + || in_egroup_p(proc_ctrl_write_gid); +} + +static bool can_impersonate_uid(uid_t uid) +{ + return uid == current_fsuid() || can_manipulate_uids(); +} + +static bool can_read_other_uid_stats(uid_t uid) +{ + /* root pwnd */ + return unlikely(!current_fsuid()) || uid == current_fsuid() + || unlikely(!proc_stats_readall_gid) + || in_egroup_p(proc_stats_readall_gid); +} + +static inline void dc_add_byte_packets(struct data_counters *counters, int set, + enum ifs_tx_rx direction, + enum ifs_proto ifs_proto, + int bytes, + int packets) +{ + counters->bpc[set][direction][ifs_proto].bytes += bytes; + counters->bpc[set][direction][ifs_proto].packets += packets; +} + +static inline uint64_t dc_sum_bytes(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].bytes + + counters->bpc[set][direction][IFS_UDP].bytes + + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; +} + +static inline uint64_t dc_sum_packets(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].packets + + counters->bpc[set][direction][IFS_UDP].packets + + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; +} + +static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct tag_node *data = rb_entry(node, struct tag_node, node); + int result; + RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " + " node=%p data=%p\n", tag, node, data); + result = tag_compare(tag, data->tag); + RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " + " data.tag=0x%llx (uid=%u) res=%d\n", + tag, data->tag, get_uid_from_tag(data->tag), result); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct tag_node *this = rb_entry(*new, struct tag_node, + node); + int result = tag_compare(data->tag, this->tag); + RB_DEBUG("qtaguid: %s(): tag=0x%llx" + " (uid=%u)\n", __func__, + this->tag, + get_uid_from_tag(this->tag)); + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) +{ + tag_node_tree_insert(&data->tn, root); +} + +static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) +{ + struct tag_node *node = tag_node_tree_search(root, tag); + if (!node) + return NULL; + return rb_entry(&node->node, struct tag_stat, tn.node); +} + +static void tag_counter_set_tree_insert(struct tag_counter_set *data, + struct rb_root *root) +{ + tag_node_tree_insert(&data->tn, root); +} + +static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root, + tag_t tag) +{ + struct tag_node *node = tag_node_tree_search(root, tag); + if (!node) + return NULL; + return rb_entry(&node->node, struct tag_counter_set, tn.node); + +} + +static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root) +{ + tag_node_tree_insert(&data->tn, root); +} + +static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag) +{ + struct tag_node *node = tag_node_tree_search(root, tag); + if (!node) + return NULL; + return rb_entry(&node->node, struct tag_ref, tn.node); +} + +static struct sock_tag *sock_tag_tree_search(struct rb_root *root, + const struct sock *sk) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct sock_tag *data = rb_entry(node, struct sock_tag, + sock_node); + if (sk < data->sk) + node = node->rb_left; + else if (sk > data->sk) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct sock_tag *this = rb_entry(*new, struct sock_tag, + sock_node); + parent = *new; + if (data->sk < this->sk) + new = &((*new)->rb_left); + else if (data->sk > this->sk) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->sock_node, parent, new); + rb_insert_color(&data->sock_node, root); +} + +static void sock_tag_tree_erase(struct rb_root *st_to_free_tree) +{ + struct rb_node *node; + struct sock_tag *st_entry; + + node = rb_first(st_to_free_tree); + while (node) { + st_entry = rb_entry(node, struct sock_tag, sock_node); + node = rb_next(node); + CT_DEBUG("qtaguid: %s(): " + "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__, + st_entry->sk, + st_entry->tag, + get_uid_from_tag(st_entry->tag)); + rb_erase(&st_entry->sock_node, st_to_free_tree); + sockfd_put(st_entry->socket); + kfree(st_entry); + } +} + +static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root, + const pid_t pid) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct proc_qtu_data *data = rb_entry(node, + struct proc_qtu_data, + node); + if (pid < data->pid) + node = node->rb_left; + else if (pid > data->pid) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void proc_qtu_data_tree_insert(struct proc_qtu_data *data, + struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct proc_qtu_data *this = rb_entry(*new, + struct proc_qtu_data, + node); + parent = *new; + if (data->pid < this->pid) + new = &((*new)->rb_left); + else if (data->pid > this->pid) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void uid_tag_data_tree_insert(struct uid_tag_data *data, + struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct uid_tag_data *this = rb_entry(*new, + struct uid_tag_data, + node); + parent = *new; + if (data->uid < this->uid) + new = &((*new)->rb_left); + else if (data->uid > this->uid) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root, + uid_t uid) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct uid_tag_data *data = rb_entry(node, + struct uid_tag_data, + node); + if (uid < data->uid) + node = node->rb_left; + else if (uid > data->uid) + node = node->rb_right; + else + return data; + } + return NULL; +} + +/* + * Allocates a new uid_tag_data struct if needed. + * Returns a pointer to the found or allocated uid_tag_data. + * Returns a PTR_ERR on failures, and lock is not held. + * If found is not NULL: + * sets *found to true if not allocated. + * sets *found to false if allocated. + */ +struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res) +{ + struct uid_tag_data *utd_entry; + + /* Look for top level uid_tag_data for the UID */ + utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid); + DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry); + + if (found_res) + *found_res = utd_entry; + if (utd_entry) + return utd_entry; + + utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC); + if (!utd_entry) { + pr_err("qtaguid: get_uid_data(%u): " + "tag data alloc failed\n", uid); + return ERR_PTR(-ENOMEM); + } + + utd_entry->uid = uid; + utd_entry->tag_ref_tree = RB_ROOT; + uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree); + DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry); + return utd_entry; +} + +/* Never returns NULL. Either PTR_ERR or a valid ptr. */ +static struct tag_ref *new_tag_ref(tag_t new_tag, + struct uid_tag_data *utd_entry) +{ + struct tag_ref *tr_entry; + int res; + + if (utd_entry->num_active_tags + 1 > max_sock_tags) { + pr_info("qtaguid: new_tag_ref(0x%llx): " + "tag ref alloc quota exceeded. max=%d\n", + new_tag, max_sock_tags); + res = -EMFILE; + goto err_res; + + } + + tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC); + if (!tr_entry) { + pr_err("qtaguid: new_tag_ref(0x%llx): " + "tag ref alloc failed\n", + new_tag); + res = -ENOMEM; + goto err_res; + } + tr_entry->tn.tag = new_tag; + /* tr_entry->num_sock_tags handled by caller */ + utd_entry->num_active_tags++; + tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree); + DR_DEBUG("qtaguid: new_tag_ref(0x%llx): " + " inserted new tag ref %p\n", + new_tag, tr_entry); + return tr_entry; + +err_res: + return ERR_PTR(res); +} + +static struct tag_ref *lookup_tag_ref(tag_t full_tag, + struct uid_tag_data **utd_res) +{ + struct uid_tag_data *utd_entry; + struct tag_ref *tr_entry; + bool found_utd; + uid_t uid = get_uid_from_tag(full_tag); + + DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n", + full_tag, uid); + + utd_entry = get_uid_data(uid, &found_utd); + if (IS_ERR_OR_NULL(utd_entry)) { + if (utd_res) + *utd_res = utd_entry; + return NULL; + } + + tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag); + if (utd_res) + *utd_res = utd_entry; + DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n", + full_tag, utd_entry, tr_entry); + return tr_entry; +} + +/* Never returns NULL. Either PTR_ERR or a valid ptr. */ +static struct tag_ref *get_tag_ref(tag_t full_tag, + struct uid_tag_data **utd_res) +{ + struct uid_tag_data *utd_entry; + struct tag_ref *tr_entry; + + DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", + full_tag); + spin_lock_bh(&uid_tag_data_tree_lock); + tr_entry = lookup_tag_ref(full_tag, &utd_entry); + BUG_ON(IS_ERR_OR_NULL(utd_entry)); + if (!tr_entry) + tr_entry = new_tag_ref(full_tag, utd_entry); + + spin_unlock_bh(&uid_tag_data_tree_lock); + if (utd_res) + *utd_res = utd_entry; + DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", + full_tag, utd_entry, tr_entry); + return tr_entry; +} + +/* Checks and maybe frees the UID Tag Data entry */ +static void put_utd_entry(struct uid_tag_data *utd_entry) +{ + /* Are we done with the UID tag data entry? */ + if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) && + !utd_entry->num_pqd) { + DR_DEBUG("qtaguid: %s(): " + "erase utd_entry=%p uid=%u " + "by pid=%u tgid=%u uid=%u\n", __func__, + utd_entry, utd_entry->uid, + current->pid, current->tgid, current_fsuid()); + BUG_ON(utd_entry->num_active_tags); + rb_erase(&utd_entry->node, &uid_tag_data_tree); + kfree(utd_entry); + } else { + DR_DEBUG("qtaguid: %s(): " + "utd_entry=%p still has %d tags %d proc_qtu_data\n", + __func__, utd_entry, utd_entry->num_active_tags, + utd_entry->num_pqd); + BUG_ON(!(utd_entry->num_active_tags || + utd_entry->num_pqd)); + } +} + +/* + * If no sock_tags are using this tag_ref, + * decrements refcount of utd_entry, removes tr_entry + * from utd_entry->tag_ref_tree and frees. + */ +static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry, + struct uid_tag_data *utd_entry) +{ + DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__, + tr_entry, tr_entry->tn.tag, + get_uid_from_tag(tr_entry->tn.tag)); + if (!tr_entry->num_sock_tags) { + BUG_ON(!utd_entry->num_active_tags); + utd_entry->num_active_tags--; + rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree); + DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry); + kfree(tr_entry); + } +} + +static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) +{ + struct rb_node *node; + struct tag_ref *tr_entry; + tag_t acct_tag; + + DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__, + full_tag, get_uid_from_tag(full_tag)); + acct_tag = get_atag_from_tag(full_tag); + node = rb_first(&utd_entry->tag_ref_tree); + while (node) { + tr_entry = rb_entry(node, struct tag_ref, tn.node); + node = rb_next(node); + if (!acct_tag || tr_entry->tn.tag == full_tag) + free_tag_ref_from_utd_entry(tr_entry, utd_entry); + } +} + +static int read_proc_u64(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + uint64_t value; + char *p = page; + uint64_t *iface_entry = data; + + if (!data) + return 0; + + value = *iface_entry; + p += sprintf(p, "%llu\n", value); + len = (p - page) - off; + *eof = (len <= count) ? 1 : 0; + *start = page + off; + return len; +} + +static int read_proc_bool(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + bool value; + char *p = page; + bool *bool_entry = data; + + if (!data) + return 0; + + value = *bool_entry; + p += sprintf(p, "%u\n", value); + len = (p - page) - off; + *eof = (len <= count) ? 1 : 0; + *start = page + off; + return len; +} + +static int get_active_counter_set(tag_t tag) +{ + int active_set = 0; + struct tag_counter_set *tcs; + + MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)" + " (uid=%u)\n", + tag, get_uid_from_tag(tag)); + /* For now we only handle UID tags for active sets */ + tag = get_utag_from_tag(tag); + spin_lock_bh(&tag_counter_set_list_lock); + tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); + if (tcs) + active_set = tcs->active_set; + spin_unlock_bh(&tag_counter_set_list_lock); + return active_set; +} + +/* + * Find the entry for tracking the specified interface. + * Caller must hold iface_stat_list_lock + */ +static struct iface_stat *get_iface_entry(const char *ifname) +{ + struct iface_stat *iface_entry; + + /* Find the entry for tracking the specified tag within the interface */ + if (ifname == NULL) { + pr_info("qtaguid: iface_stat: get() NULL device name\n"); + return NULL; + } + + /* Iterate over interfaces */ + list_for_each_entry(iface_entry, &iface_stat_list, list) { + if (!strcmp(ifname, iface_entry->ifname)) + goto done; + } + iface_entry = NULL; +done: + return iface_entry; +} + +static int iface_stat_all_proc_read(char *page, char **num_items_returned, + off_t items_to_skip, int char_count, + int *eof, void *data) +{ + char *outp = page; + int item_index = 0; + int len; + struct iface_stat *iface_entry; + const struct net_device_stats *stats; + struct net_device_stats no_dev_stats = {0}; + + if (unlikely(module_passive)) { + *eof = 1; + return 0; + } + + CT_DEBUG("qtaguid:proc iface_stat_all " + "page=%p *num_items_returned=%p off=%ld " + "char_count=%d *eof=%d\n", page, *num_items_returned, + items_to_skip, char_count, *eof); + + if (*eof) + return 0; + + /* + * This lock will prevent iface_stat_update() from changing active, + * and in turn prevent an interface from unregistering itself. + */ + spin_lock_bh(&iface_stat_list_lock); + list_for_each_entry(iface_entry, &iface_stat_list, list) { + if (item_index++ < items_to_skip) + continue; + + if (iface_entry->active) { + stats = dev_get_stats(iface_entry->net_dev); + } else { + stats = &no_dev_stats; + } + len = snprintf(outp, char_count, + "%s %d " + "%llu %llu %llu %llu " + "%lu %lu %lu %lu\n", + iface_entry->ifname, + iface_entry->active, + iface_entry->totals[IFS_RX].bytes, + iface_entry->totals[IFS_RX].packets, + iface_entry->totals[IFS_TX].bytes, + iface_entry->totals[IFS_TX].packets, + stats->rx_bytes, stats->rx_packets, + stats->tx_bytes, stats->tx_packets); + if (len >= char_count) { + spin_unlock_bh(&iface_stat_list_lock); + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + spin_unlock_bh(&iface_stat_list_lock); + + *eof = 1; + return outp - page; +} + +static void iface_create_proc_worker(struct work_struct *work) +{ + struct proc_dir_entry *proc_entry; + struct iface_stat_work *isw = container_of(work, struct iface_stat_work, + iface_work); + struct iface_stat *new_iface = isw->iface_entry; + + /* iface_entries are not deleted, so safe to manipulate. */ + proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir); + if (IS_ERR_OR_NULL(proc_entry)) { + pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n"); + kfree(isw); + return; + } + + new_iface->proc_ptr = proc_entry; + + create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_TX].bytes); + create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_RX].bytes); + create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_TX].packets); + create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_RX].packets); + create_proc_read_entry("active", proc_iface_perms, proc_entry, + read_proc_bool, &new_iface->active); + + IF_DEBUG("qtaguid: iface_stat: create_proc(): done " + "entry=%p dev=%s\n", new_iface, new_iface->ifname); + kfree(isw); +} + +/* + * Will set the entry's active state, and + * update the net_dev accordingly also. + */ +static void _iface_stat_set_active(struct iface_stat *entry, + struct net_device *net_dev, + bool activate) +{ + if (activate) { + entry->net_dev = net_dev; + entry->active = true; + } else { + entry->active = false; + entry->net_dev = NULL; + + } +} + +/* Caller must hold iface_stat_list_lock */ +static struct iface_stat *iface_alloc(struct net_device *net_dev) +{ + struct iface_stat *new_iface; + struct iface_stat_work *isw; + + new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC); + if (new_iface == NULL) { + pr_err("qtaguid: iface_stat: create(%s): " + "iface_stat alloc failed\n", net_dev->name); + return NULL; + } + new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC); + if (new_iface->ifname == NULL) { + pr_err("qtaguid: iface_stat: create(%s): " + "ifname alloc failed\n", net_dev->name); + kfree(new_iface); + return NULL; + } + spin_lock_init(&new_iface->tag_stat_list_lock); + new_iface->tag_stat_tree = RB_ROOT; + _iface_stat_set_active(new_iface, net_dev, true); + + /* + * ipv6 notifier chains are atomic :( + * No create_proc_read_entry() for you! + */ + isw = kmalloc(sizeof(*isw), GFP_ATOMIC); + if (!isw) { + pr_err("qtaguid: iface_stat: create(%s): " + "work alloc failed\n", new_iface->ifname); + _iface_stat_set_active(new_iface, net_dev, false); + kfree(new_iface->ifname); + kfree(new_iface); + return NULL; + } + isw->iface_entry = new_iface; + INIT_WORK(&isw->iface_work, iface_create_proc_worker); + schedule_work(&isw->iface_work); + list_add(&new_iface->list, &iface_stat_list); + return new_iface; +} + +static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, + struct iface_stat *iface) +{ + const struct net_device_stats *stats; + bool stats_rewound; + + stats = dev_get_stats(net_dev); + /* No empty packets */ + stats_rewound = + (stats->rx_bytes < iface->last_known[IFS_RX].bytes) + || (stats->tx_bytes < iface->last_known[IFS_TX].bytes); + + IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p " + "bytes rx/tx=%lu/%lu " + "active=%d last_known=%d " + "stats_rewound=%d\n", __func__, + net_dev ? net_dev->name : "?", + iface, net_dev, + stats->rx_bytes, stats->tx_bytes, + iface->active, iface->last_known_valid, stats_rewound); + + if (iface->active && iface->last_known_valid && stats_rewound) { + pr_warn_once("qtaguid: iface_stat: %s(%s): " + "iface reset its stats unexpectedly\n", __func__, + net_dev->name); + + iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes; + iface->totals[IFS_TX].packets += + iface->last_known[IFS_TX].packets; + iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes; + iface->totals[IFS_RX].packets += + iface->last_known[IFS_RX].packets; + iface->last_known_valid = false; + IF_DEBUG("qtaguid: %s(%s): iface=%p " + "used last known bytes rx/tx=%llu/%llu\n", __func__, + iface->ifname, iface, iface->last_known[IFS_RX].bytes, + iface->last_known[IFS_TX].bytes); + } +} + +/* + * Create a new entry for tracking the specified interface. + * Do nothing if the entry already exists. + * Called when an interface is configured with a valid IP address. + */ +static void iface_stat_create(struct net_device *net_dev, + struct in_ifaddr *ifa) +{ + struct in_device *in_dev = NULL; + const char *ifname; + struct iface_stat *entry; + __be32 ipaddr = 0; + struct iface_stat *new_iface; + + IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n", + net_dev ? net_dev->name : "?", + ifa, net_dev); + if (!net_dev) { + pr_err("qtaguid: iface_stat: create(): no net dev\n"); + return; + } + + ifname = net_dev->name; + if (!ifa) { + in_dev = in_dev_get(net_dev); + if (!in_dev) { + pr_err("qtaguid: iface_stat: create(%s): no inet dev\n", + ifname); + return; + } + IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n", + ifname, in_dev); + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + IF_DEBUG("qtaguid: iface_stat: create(%s): " + "ifa=%p ifa_label=%s\n", + ifname, ifa, + ifa->ifa_label ? ifa->ifa_label : "(null)"); + if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) + break; + } + } + + if (!ifa) { + IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n", + ifname); + goto done_put; + } + ipaddr = ifa->ifa_local; + + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(ifname); + if (entry != NULL) { + bool activate = !ipv4_is_loopback(ipaddr); + IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", + ifname, entry); + iface_check_stats_reset_and_adjust(net_dev, entry); + _iface_stat_set_active(entry, net_dev, activate); + IF_DEBUG("qtaguid: %s(%s): " + "tracking now %d on ip=%pI4\n", __func__, + entry->ifname, activate, &ipaddr); + goto done_unlock_put; + } else if (ipv4_is_loopback(ipaddr)) { + IF_DEBUG("qtaguid: iface_stat: create(%s): " + "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); + goto done_unlock_put; + } + + new_iface = iface_alloc(net_dev); + IF_DEBUG("qtaguid: iface_stat: create(%s): done " + "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr); +done_unlock_put: + spin_unlock_bh(&iface_stat_list_lock); +done_put: + if (in_dev) + in_dev_put(in_dev); +} + +static void iface_stat_create_ipv6(struct net_device *net_dev, + struct inet6_ifaddr *ifa) +{ + struct in_device *in_dev; + const char *ifname; + struct iface_stat *entry; + struct iface_stat *new_iface; + int addr_type; + + IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n", + ifa, net_dev, net_dev ? net_dev->name : ""); + if (!net_dev) { + pr_err("qtaguid: iface_stat: create6(): no net dev!\n"); + return; + } + ifname = net_dev->name; + + in_dev = in_dev_get(net_dev); + if (!in_dev) { + pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n", + ifname); + return; + } + + IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n", + ifname, in_dev); + + if (!ifa) { + IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n", + ifname); + goto done_put; + } + addr_type = ipv6_addr_type(&ifa->addr); + + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(ifname); + if (entry != NULL) { + bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, + ifname, entry); + iface_check_stats_reset_and_adjust(net_dev, entry); + _iface_stat_set_active(entry, net_dev, activate); + IF_DEBUG("qtaguid: %s(%s): " + "tracking now %d on ip=%pI6c\n", __func__, + entry->ifname, activate, &ifa->addr); + goto done_unlock_put; + } else if (addr_type & IPV6_ADDR_LOOPBACK) { + IF_DEBUG("qtaguid: %s(%s): " + "ignore loopback dev. ip=%pI6c\n", __func__, + ifname, &ifa->addr); + goto done_unlock_put; + } + + new_iface = iface_alloc(net_dev); + IF_DEBUG("qtaguid: iface_stat: create6(%s): done " + "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr); + +done_unlock_put: + spin_unlock_bh(&iface_stat_list_lock); +done_put: + in_dev_put(in_dev); +} + +static struct sock_tag *get_sock_stat_nl(const struct sock *sk) +{ + MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk); + return sock_tag_tree_search(&sock_tag_tree, sk); +} + +static struct sock_tag *get_sock_stat(const struct sock *sk) +{ + struct sock_tag *sock_tag_entry; + MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk); + if (!sk) + return NULL; + spin_lock_bh(&sock_tag_list_lock); + sock_tag_entry = get_sock_stat_nl(sk); + spin_unlock_bh(&sock_tag_list_lock); + return sock_tag_entry; +} + +static void +data_counters_update(struct data_counters *dc, int set, + enum ifs_tx_rx direction, int proto, int bytes) +{ + switch (proto) { + case IPPROTO_TCP: + dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1); + break; + case IPPROTO_UDP: + dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1); + break; + case IPPROTO_IP: + default: + dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes, + 1); + break; + } +} + +/* + * Update stats for the specified interface. Do nothing if the entry + * does not exist (when a device was never configured with an IP address). + * Called when an device is being unregistered. + */ +static void iface_stat_update(struct net_device *net_dev, bool stash_only) +{ + const struct net_device_stats *stats; + struct iface_stat *entry; + + stats = dev_get_stats(net_dev); + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(net_dev->name); + if (entry == NULL) { + IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n", + net_dev->name); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, + net_dev->name, entry); + if (!entry->active) { + IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__, + net_dev->name); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + + if (stash_only) { + entry->last_known[IFS_TX].bytes = stats->tx_bytes; + entry->last_known[IFS_TX].packets = stats->tx_packets; + entry->last_known[IFS_RX].bytes = stats->rx_bytes; + entry->last_known[IFS_RX].packets = stats->rx_packets; + entry->last_known_valid = true; + IF_DEBUG("qtaguid: %s(%s): " + "dev stats stashed rx/tx=%lu/%lu\n", __func__, + net_dev->name, stats->rx_bytes, stats->tx_bytes); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + entry->totals[IFS_TX].bytes += stats->tx_bytes; + entry->totals[IFS_TX].packets += stats->tx_packets; + entry->totals[IFS_RX].bytes += stats->rx_bytes; + entry->totals[IFS_RX].packets += stats->rx_packets; + /* We don't need the last_known[] anymore */ + entry->last_known_valid = false; + _iface_stat_set_active(entry, net_dev, false); + IF_DEBUG("qtaguid: %s(%s): " + "disable tracking. rx/tx=%lu/%lu\n", __func__, + net_dev->name, stats->rx_bytes, stats->tx_bytes); + spin_unlock_bh(&iface_stat_list_lock); +} + +static void tag_stat_update(struct tag_stat *tag_entry, + enum ifs_tx_rx direction, int proto, int bytes) +{ + int active_set; + active_set = get_active_counter_set(tag_entry->tn.tag); + MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d " + "dir=%d proto=%d bytes=%d)\n", + tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag), + active_set, direction, proto, bytes); + data_counters_update(&tag_entry->counters, active_set, direction, + proto, bytes); + if (tag_entry->parent_counters) + data_counters_update(tag_entry->parent_counters, active_set, + direction, proto, bytes); +} + +/* + * Create a new entry for tracking the specified {acct_tag,uid_tag} within + * the interface. + * iface_entry->tag_stat_list_lock should be held. + */ +static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry, + tag_t tag) +{ + struct tag_stat *new_tag_stat_entry = NULL; + IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx" + " (uid=%u)\n", __func__, + iface_entry, tag, get_uid_from_tag(tag)); + new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC); + if (!new_tag_stat_entry) { + pr_err("qtaguid: iface_stat: tag stat alloc failed\n"); + goto done; + } + new_tag_stat_entry->tn.tag = tag; + tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree); +done: + return new_tag_stat_entry; +} + +static void if_tag_stat_update(const char *ifname, uid_t uid, + const struct sock *sk, enum ifs_tx_rx direction, + int proto, int bytes) +{ + struct tag_stat *tag_stat_entry; + tag_t tag, acct_tag; + tag_t uid_tag; + struct data_counters *uid_tag_counters; + struct sock_tag *sock_tag_entry; + struct iface_stat *iface_entry; + struct tag_stat *new_tag_stat; + MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " + "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", + ifname, uid, sk, direction, proto, bytes); + + + iface_entry = get_iface_entry(ifname); + if (!iface_entry) { + pr_err("qtaguid: iface_stat: stat_update() %s not found\n", + ifname); + return; + } + /* It is ok to process data when an iface_entry is inactive */ + + MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", + ifname, iface_entry); + + /* + * Look for a tagged sock. + * It will have an acct_uid. + */ + sock_tag_entry = get_sock_stat(sk); + if (sock_tag_entry) { + tag = sock_tag_entry->tag; + acct_tag = get_atag_from_tag(tag); + uid_tag = get_utag_from_tag(tag); + } else { + acct_tag = make_atag_from_value(0); + tag = combine_atag_with_uid(acct_tag, uid); + uid_tag = make_tag_from_uid(uid); + } + MT_DEBUG("qtaguid: iface_stat: stat_update(): " + " looking for tag=0x%llx (uid=%u) in ife=%p\n", + tag, get_uid_from_tag(tag), iface_entry); + /* Loop over tag list under this interface for {acct_tag,uid_tag} */ + spin_lock_bh(&iface_entry->tag_stat_list_lock); + + tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, + tag); + if (tag_stat_entry) { + /* + * Updating the {acct_tag, uid_tag} entry handles both stats: + * {0, uid_tag} will also get updated. + */ + tag_stat_update(tag_stat_entry, direction, proto, bytes); + spin_unlock_bh(&iface_entry->tag_stat_list_lock); + return; + } + + /* Loop over tag list under this interface for {0,uid_tag} */ + tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, + uid_tag); + if (!tag_stat_entry) { + /* Here: the base uid_tag did not exist */ + /* + * No parent counters. So + * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. + */ + new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); + uid_tag_counters = &new_tag_stat->counters; + } else { + uid_tag_counters = &tag_stat_entry->counters; + } + + if (acct_tag) { + new_tag_stat = create_if_tag_stat(iface_entry, tag); + new_tag_stat->parent_counters = uid_tag_counters; + } + tag_stat_update(new_tag_stat, direction, proto, bytes); + spin_unlock_bh(&iface_entry->tag_stat_list_lock); +} + +static int iface_netdev_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (unlikely(module_passive)) + return NOTIFY_DONE; + + IF_DEBUG("qtaguid: iface_stat: netdev_event(): " + "ev=0x%lx/%s netdev=%p->name=%s\n", + event, netdev_evt_str(event), dev, dev ? dev->name : ""); + + switch (event) { + case NETDEV_UP: + iface_stat_create(dev, NULL); + atomic64_inc(&qtu_events.iface_events); + break; + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + iface_stat_update(dev, event == NETDEV_DOWN); + atomic64_inc(&qtu_events.iface_events); + break; + } + return NOTIFY_DONE; +} + +static int iface_inet6addr_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = ptr; + struct net_device *dev; + + if (unlikely(module_passive)) + return NOTIFY_DONE; + + IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): " + "ev=0x%lx/%s ifa=%p\n", + event, netdev_evt_str(event), ifa); + + switch (event) { + case NETDEV_UP: + BUG_ON(!ifa || !ifa->idev); + dev = (struct net_device *)ifa->idev->dev; + iface_stat_create_ipv6(dev, ifa); + atomic64_inc(&qtu_events.iface_events); + break; + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + BUG_ON(!ifa || !ifa->idev); + dev = (struct net_device *)ifa->idev->dev; + iface_stat_update(dev, event == NETDEV_DOWN); + atomic64_inc(&qtu_events.iface_events); + break; + } + return NOTIFY_DONE; +} + +static int iface_inetaddr_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct net_device *dev; + + if (unlikely(module_passive)) + return NOTIFY_DONE; + + IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): " + "ev=0x%lx/%s ifa=%p\n", + event, netdev_evt_str(event), ifa); + + switch (event) { + case NETDEV_UP: + BUG_ON(!ifa || !ifa->ifa_dev); + dev = ifa->ifa_dev->dev; + iface_stat_create(dev, ifa); + atomic64_inc(&qtu_events.iface_events); + break; + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + BUG_ON(!ifa || !ifa->ifa_dev); + dev = ifa->ifa_dev->dev; + iface_stat_update(dev, event == NETDEV_DOWN); + atomic64_inc(&qtu_events.iface_events); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block iface_netdev_notifier_blk = { + .notifier_call = iface_netdev_event_handler, +}; + +static struct notifier_block iface_inetaddr_notifier_blk = { + .notifier_call = iface_inetaddr_event_handler, +}; + +static struct notifier_block iface_inet6addr_notifier_blk = { + .notifier_call = iface_inet6addr_event_handler, +}; + +static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) +{ + int err; + + iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir); + if (!iface_stat_procdir) { + pr_err("qtaguid: iface_stat: init failed to create proc entry\n"); + err = -1; + goto err; + } + + iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename, + proc_iface_perms, + parent_procdir); + if (!iface_stat_all_procfile) { + pr_err("qtaguid: iface_stat: init " + " failed to create stat_all proc entry\n"); + err = -1; + goto err_zap_entry; + } + iface_stat_all_procfile->read_proc = iface_stat_all_proc_read; + + + err = register_netdevice_notifier(&iface_netdev_notifier_blk); + if (err) { + pr_err("qtaguid: iface_stat: init " + "failed to register dev event handler\n"); + goto err_zap_all_stats_entry; + } + err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); + if (err) { + pr_err("qtaguid: iface_stat: init " + "failed to register ipv4 dev event handler\n"); + goto err_unreg_nd; + } + + err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk); + if (err) { + pr_err("qtaguid: iface_stat: init " + "failed to register ipv6 dev event handler\n"); + goto err_unreg_ip4_addr; + } + return 0; + +err_unreg_ip4_addr: + unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); +err_unreg_nd: + unregister_netdevice_notifier(&iface_netdev_notifier_blk); +err_zap_all_stats_entry: + remove_proc_entry(iface_stat_all_procfilename, parent_procdir); +err_zap_entry: + remove_proc_entry(iface_stat_procdirname, parent_procdir); +err: + return err; +} + +static struct sock *qtaguid_find_sk(const struct sk_buff *skb, + struct xt_action_param *par) +{ + struct sock *sk; + unsigned int hook_mask = (1 << par->hooknum); + + MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, + par->hooknum, par->family); + + /* + * Let's not abuse the the xt_socket_get*_sk(), or else it will + * return garbage SKs. + */ + if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS)) + return NULL; + + switch (par->family) { +#ifdef XT_SOCKET_HAVE_IPV6 + case NFPROTO_IPV6: + sk = xt_socket_get6_sk(skb, par); + break; +#endif + case NFPROTO_IPV4: + sk = xt_socket_get4_sk(skb, par); + break; + default: + return NULL; + } + + /* + * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs. + * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959 + * Not fixed in 3.0-r3 :( + */ + if (sk) { + MT_DEBUG("qtaguid: %p->sk_proto=%u " + "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); + if (sk->sk_state == TCP_TIME_WAIT) { + xt_socket_put_sk(sk); + sk = NULL; + } + } + return sk; +} + +static void account_for_uid(const struct sk_buff *skb, + const struct sock *alternate_sk, uid_t uid, + struct xt_action_param *par) +{ + const struct net_device *el_dev; + + if (!skb->dev) { + MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); + el_dev = par->in ? : par->out; + } else { + const struct net_device *other_dev; + el_dev = skb->dev; + other_dev = par->in ? : par->out; + if (el_dev != other_dev) { + MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " + "par->(in/out)=%p %s\n", + par->hooknum, el_dev, el_dev->name, other_dev, + other_dev->name); + } + } + + if (unlikely(!el_dev)) { + pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); + } else if (unlikely(!el_dev->name)) { + pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); + } else { + MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n", + par->hooknum, + el_dev->name, + el_dev->type); + + if_tag_stat_update(el_dev->name, uid, + skb->sk ? skb->sk : alternate_sk, + par->in ? IFS_RX : IFS_TX, + ip_hdr(skb)->protocol, skb->len); + } +} + +static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_qtaguid_match_info *info = par->matchinfo; + const struct file *filp; + bool got_sock = false; + struct sock *sk; + uid_t sock_uid; + bool res; + + if (unlikely(module_passive)) + return (info->match ^ info->invert) == 0; + + MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n", + par->hooknum, skb, par->in, par->out, par->family); + + atomic64_inc(&qtu_events.match_calls); + if (skb == NULL) { + res = (info->match ^ info->invert) == 0; + goto ret_res; + } + + sk = skb->sk; + + if (sk == NULL) { + /* + * A missing sk->sk_socket happens when packets are in-flight + * and the matching socket is already closed and gone. + */ + sk = qtaguid_find_sk(skb, par); + /* + * If we got the socket from the find_sk(), we will need to put + * it back, as nf_tproxy_get_sock_v4() got it. + */ + got_sock = sk; + if (sk) + atomic64_inc(&qtu_events.match_found_sk_in_ct); + else + atomic64_inc(&qtu_events.match_found_no_sk_in_ct); + } else { + atomic64_inc(&qtu_events.match_found_sk); + } + MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n", + par->hooknum, sk, got_sock, ip_hdr(skb)->protocol); + if (sk != NULL) { + MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", + par->hooknum, sk, sk->sk_socket, + sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", + par->hooknum, filp ? filp->f_cred->fsuid : -1); + } + + if (sk == NULL || sk->sk_socket == NULL) { + /* + * Here, the qtaguid_find_sk() using connection tracking + * couldn't find the owner, so for now we just count them + * against the system. + */ + /* + * TODO: unhack how to force just accounting. + * For now we only do iface stats when the uid-owner is not + * requested. + */ + if (!(info->match & XT_QTAGUID_UID)) + account_for_uid(skb, sk, 0, par); + MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", + par->hooknum, + sk ? sk->sk_socket : NULL); + res = (info->match ^ info->invert) == 0; + atomic64_inc(&qtu_events.match_no_sk); + goto put_sock_ret_res; + } else if (info->match & info->invert & XT_QTAGUID_SOCKET) { + res = false; + goto put_sock_ret_res; + } + filp = sk->sk_socket->file; + if (filp == NULL) { + MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); + account_for_uid(skb, sk, 0, par); + res = ((info->match ^ info->invert) & + (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; + atomic64_inc(&qtu_events.match_no_sk_file); + goto put_sock_ret_res; + } + sock_uid = filp->f_cred->fsuid; + /* + * TODO: unhack how to force just accounting. + * For now we only do iface stats when the uid-owner is not requested + */ + if (!(info->match & XT_QTAGUID_UID)) + account_for_uid(skb, sk, sock_uid, par); + + /* + * The following two tests fail the match when: + * id not in range AND no inverted condition requested + * or id in range AND inverted condition requested + * Thus (!a && b) || (a && !b) == a ^ b + */ + if (info->match & XT_QTAGUID_UID) + if ((filp->f_cred->fsuid >= info->uid_min && + filp->f_cred->fsuid <= info->uid_max) ^ + !(info->invert & XT_QTAGUID_UID)) { + MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", + par->hooknum); + res = false; + goto put_sock_ret_res; + } + if (info->match & XT_QTAGUID_GID) + if ((filp->f_cred->fsgid >= info->gid_min && + filp->f_cred->fsgid <= info->gid_max) ^ + !(info->invert & XT_QTAGUID_GID)) { + MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", + par->hooknum); + res = false; + goto put_sock_ret_res; + } + + MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum); + res = true; + +put_sock_ret_res: + if (got_sock) + xt_socket_put_sk(sk); +ret_res: + MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res); + return res; +} + +#ifdef DDEBUG +/* This function is not in xt_qtaguid_print.c because of locks visibility */ +static void prdebug_full_state(int indent_level, const char *fmt, ...) +{ + va_list args; + char *fmt_buff; + char *buff; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + fmt_buff = kasprintf(GFP_ATOMIC, + "qtaguid: %s(): %s {\n", __func__, fmt); + BUG_ON(!fmt_buff); + va_start(args, fmt); + buff = kvasprintf(GFP_ATOMIC, + fmt_buff, args); + BUG_ON(!buff); + pr_debug("%s", buff); + kfree(fmt_buff); + kfree(buff); + va_end(args); + + spin_lock_bh(&sock_tag_list_lock); + prdebug_sock_tag_tree(indent_level, &sock_tag_tree); + spin_unlock_bh(&sock_tag_list_lock); + + spin_lock_bh(&sock_tag_list_lock); + spin_lock_bh(&uid_tag_data_tree_lock); + prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); + prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); + spin_unlock_bh(&uid_tag_data_tree_lock); + spin_unlock_bh(&sock_tag_list_lock); + + spin_lock_bh(&iface_stat_list_lock); + prdebug_iface_stat_list(indent_level, &iface_stat_list); + spin_unlock_bh(&iface_stat_list_lock); + + pr_debug("qtaguid: %s(): }\n", __func__); +} +#else +static void prdebug_full_state(int indent_level, const char *fmt, ...) {} +#endif + +/* + * Procfs reader to get all active socket tags using style "1)" as described in + * fs/proc/generic.c + */ +static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, + off_t items_to_skip, int char_count, int *eof, + void *data) +{ + char *outp = page; + int len; + uid_t uid; + struct rb_node *node; + struct sock_tag *sock_tag_entry; + int item_index = 0; + int indent_level = 0; + long f_count; + + if (unlikely(module_passive)) { + *eof = 1; + return 0; + } + + if (*eof) + return 0; + + CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n", + page, items_to_skip, char_count, *eof); + + spin_lock_bh(&sock_tag_list_lock); + for (node = rb_first(&sock_tag_tree); + node; + node = rb_next(node)) { + if (item_index++ < items_to_skip) + continue; + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); + uid = get_uid_from_tag(sock_tag_entry->tag); + CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) " + "pid=%u\n", + sock_tag_entry->sk, + sock_tag_entry->tag, + uid, + sock_tag_entry->pid + ); + f_count = atomic_long_read( + &sock_tag_entry->socket->file->f_count); + len = snprintf(outp, char_count, + "sock=%p tag=0x%llx (uid=%u) pid=%u " + "f_count=%lu\n", + sock_tag_entry->sk, + sock_tag_entry->tag, uid, + sock_tag_entry->pid, f_count); + if (len >= char_count) { + spin_unlock_bh(&sock_tag_list_lock); + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + spin_unlock_bh(&sock_tag_list_lock); + + if (item_index++ >= items_to_skip) { + len = snprintf(outp, char_count, + "events: sockets_tagged=%llu " + "sockets_untagged=%llu " + "counter_set_changes=%llu " + "delete_cmds=%llu " + "iface_events=%llu " + "match_calls=%llu " + "match_found_sk=%llu " + "match_found_sk_in_ct=%llu " + "match_found_no_sk_in_ct=%llu " + "match_no_sk=%llu " + "match_no_sk_file=%llu\n", + atomic64_read(&qtu_events.sockets_tagged), + atomic64_read(&qtu_events.sockets_untagged), + atomic64_read(&qtu_events.counter_set_changes), + atomic64_read(&qtu_events.delete_cmds), + atomic64_read(&qtu_events.iface_events), + atomic64_read(&qtu_events.match_calls), + atomic64_read(&qtu_events.match_found_sk), + atomic64_read(&qtu_events.match_found_sk_in_ct), + atomic64_read( + &qtu_events.match_found_no_sk_in_ct), + atomic64_read(&qtu_events.match_no_sk), + atomic64_read(&qtu_events.match_no_sk_file)); + if (len >= char_count) { + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + + /* Count the following as part of the last item_index */ + if (item_index > items_to_skip) { + prdebug_full_state(indent_level, "proc ctrl"); + } + + *eof = 1; + return outp - page; +} + +/* + * Delete socket tags, and stat tags associated with a given + * accouting tag and uid. + */ +static int ctrl_cmd_delete(const char *input) +{ + char cmd; + uid_t uid; + uid_t entry_uid; + tag_t acct_tag; + tag_t tag; + int res, argc; + struct iface_stat *iface_entry; + struct rb_node *node; + struct sock_tag *st_entry; + struct rb_root st_to_free_tree = RB_ROOT; + struct tag_stat *ts_entry; + struct tag_counter_set *tcs_entry; + struct tag_ref *tr_entry; + struct uid_tag_data *utd_entry; + + argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); + CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " + "user_tag=0x%llx uid=%u\n", input, argc, cmd, + acct_tag, uid); + if (argc < 2) { + res = -EINVAL; + goto err; + } + if (!valid_atag(acct_tag)) { + pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input); + res = -EINVAL; + goto err; + } + if (argc < 3) { + uid = current_fsuid(); + } else if (!can_impersonate_uid(uid)) { + pr_info("qtaguid: ctrl_delete(%s): " + "insufficient priv from pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + res = -EPERM; + goto err; + } + + tag = combine_atag_with_uid(acct_tag, uid); + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "looking for tag=0x%llx (uid=%u)\n", + input, tag, uid); + + /* Delete socket tags */ + spin_lock_bh(&sock_tag_list_lock); + node = rb_first(&sock_tag_tree); + while (node) { + st_entry = rb_entry(node, struct sock_tag, sock_node); + entry_uid = get_uid_from_tag(st_entry->tag); + node = rb_next(node); + if (entry_uid != uid) + continue; + + CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n", + input, st_entry->tag, entry_uid); + + if (!acct_tag || st_entry->tag == tag) { + rb_erase(&st_entry->sock_node, &sock_tag_tree); + /* Can't sockfd_put() within spinlock, do it later. */ + sock_tag_tree_insert(st_entry, &st_to_free_tree); + tr_entry = lookup_tag_ref(st_entry->tag, NULL); + BUG_ON(tr_entry->num_sock_tags <= 0); + tr_entry->num_sock_tags--; + /* + * TODO: remove if, and start failing. + * This is a hack to work around the fact that in some + * places we have "if (IS_ERR_OR_NULL(pqd_entry))" + * and are trying to work around apps + * that didn't open the /dev/xt_qtaguid. + */ + if (st_entry->list.next && st_entry->list.prev) + list_del(&st_entry->list); + } + } + spin_unlock_bh(&sock_tag_list_lock); + + sock_tag_tree_erase(&st_to_free_tree); + + /* Delete tag counter-sets */ + spin_lock_bh(&tag_counter_set_list_lock); + /* Counter sets are only on the uid tag, not full tag */ + tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag); + if (tcs_entry) { + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "erase tcs: tag=0x%llx (uid=%u) set=%d\n", + input, + tcs_entry->tn.tag, + get_uid_from_tag(tcs_entry->tn.tag), + tcs_entry->active_set); + rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree); + kfree(tcs_entry); + } + spin_unlock_bh(&tag_counter_set_list_lock); + + /* + * If acct_tag is 0, then all entries belonging to uid are + * erased. + */ + spin_lock_bh(&iface_stat_list_lock); + list_for_each_entry(iface_entry, &iface_stat_list, list) { + spin_lock_bh(&iface_entry->tag_stat_list_lock); + node = rb_first(&iface_entry->tag_stat_tree); + while (node) { + ts_entry = rb_entry(node, struct tag_stat, tn.node); + entry_uid = get_uid_from_tag(ts_entry->tn.tag); + node = rb_next(node); + + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "ts tag=0x%llx (uid=%u)\n", + input, ts_entry->tn.tag, entry_uid); + + if (entry_uid != uid) + continue; + if (!acct_tag || ts_entry->tn.tag == tag) { + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "erase ts: %s 0x%llx %u\n", + input, iface_entry->ifname, + get_atag_from_tag(ts_entry->tn.tag), + entry_uid); + rb_erase(&ts_entry->tn.node, + &iface_entry->tag_stat_tree); + kfree(ts_entry); + } + } + spin_unlock_bh(&iface_entry->tag_stat_list_lock); + } + spin_unlock_bh(&iface_stat_list_lock); + + /* Cleanup the uid_tag_data */ + spin_lock_bh(&uid_tag_data_tree_lock); + node = rb_first(&uid_tag_data_tree); + while (node) { + utd_entry = rb_entry(node, struct uid_tag_data, node); + entry_uid = utd_entry->uid; + node = rb_next(node); + + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "utd uid=%u\n", + input, entry_uid); + + if (entry_uid != uid) + continue; + /* + * Go over the tag_refs, and those that don't have + * sock_tags using them are freed. + */ + put_tag_ref_tree(tag, utd_entry); + put_utd_entry(utd_entry); + } + spin_unlock_bh(&uid_tag_data_tree_lock); + + atomic64_inc(&qtu_events.delete_cmds); + res = 0; + +err: + return res; +} + +static int ctrl_cmd_counter_set(const char *input) +{ + char cmd; + uid_t uid = 0; + tag_t tag; + int res, argc; + struct tag_counter_set *tcs; + int counter_set; + + argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid); + CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c " + "set=%d uid=%u\n", input, argc, cmd, + counter_set, uid); + if (argc != 3) { + res = -EINVAL; + goto err; + } + if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) { + pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n", + input); + res = -EINVAL; + goto err; + } + if (!can_manipulate_uids()) { + pr_info("qtaguid: ctrl_counterset(%s): " + "insufficient priv from pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + res = -EPERM; + goto err; + } + + tag = make_tag_from_uid(uid); + spin_lock_bh(&tag_counter_set_list_lock); + tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); + if (!tcs) { + tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC); + if (!tcs) { + spin_unlock_bh(&tag_counter_set_list_lock); + pr_err("qtaguid: ctrl_counterset(%s): " + "failed to alloc counter set\n", + input); + res = -ENOMEM; + goto err; + } + tcs->tn.tag = tag; + tag_counter_set_tree_insert(tcs, &tag_counter_set_tree); + CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx " + "(uid=%u) set=%d\n", + input, tag, get_uid_from_tag(tag), counter_set); + } + tcs->active_set = counter_set; + spin_unlock_bh(&tag_counter_set_list_lock); + atomic64_inc(&qtu_events.counter_set_changes); + res = 0; + +err: + return res; +} + +static int ctrl_cmd_tag(const char *input) +{ + char cmd; + int sock_fd = 0; + uid_t uid = 0; + tag_t acct_tag = make_atag_from_value(0); + tag_t full_tag; + struct socket *el_socket; + int res, argc; + struct sock_tag *sock_tag_entry; + struct tag_ref *tag_ref_entry; + struct uid_tag_data *uid_tag_data_entry; + struct proc_qtu_data *pqd_entry; + + /* Unassigned args will get defaulted later. */ + argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); + CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " + "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd, + acct_tag, uid); + if (argc < 2) { + res = -EINVAL; + goto err; + } + el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ + if (!el_socket) { + pr_info("qtaguid: ctrl_tag(%s): failed to lookup" + " sock_fd=%d err=%d\n", input, sock_fd, res); + goto err; + } + CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", + input, atomic_long_read(&el_socket->file->f_count), + el_socket->sk); + if (argc < 3) { + acct_tag = make_atag_from_value(0); + } else if (!valid_atag(acct_tag)) { + pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input); + res = -EINVAL; + goto err_put; + } + CT_DEBUG("qtaguid: ctrl_tag(%s): " + "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " + "in_group=%d in_egroup=%d\n", + input, current->pid, current->tgid, current_uid(), + current_euid(), current_fsuid(), + in_group_p(proc_ctrl_write_gid), + in_egroup_p(proc_ctrl_write_gid)); + if (argc < 4) { + uid = current_fsuid(); + } else if (!can_impersonate_uid(uid)) { + pr_info("qtaguid: ctrl_tag(%s): " + "insufficient priv from pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + res = -EPERM; + goto err_put; + } + full_tag = combine_atag_with_uid(acct_tag, uid); + + spin_lock_bh(&sock_tag_list_lock); + sock_tag_entry = get_sock_stat_nl(el_socket->sk); + tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); + if (IS_ERR(tag_ref_entry)) { + res = PTR_ERR(tag_ref_entry); + spin_unlock_bh(&sock_tag_list_lock); + goto err_put; + } + tag_ref_entry->num_sock_tags++; + if (sock_tag_entry) { + struct tag_ref *prev_tag_ref_entry; + + CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p " + "st@%p ...->f_count=%ld\n", + input, el_socket->sk, sock_tag_entry, + atomic_long_read(&el_socket->file->f_count)); + /* + * This is a re-tagging, so release the sock_fd that was + * locked at the time of the 1st tagging. + * There is still the ref from this call's sockfd_lookup() so + * it can be done within the spinlock. + */ + sockfd_put(sock_tag_entry->socket); + prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, + &uid_tag_data_entry); + BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry)); + BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0); + prev_tag_ref_entry->num_sock_tags--; + sock_tag_entry->tag = full_tag; + } else { + CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n", + input, el_socket->sk); + sock_tag_entry = kzalloc(sizeof(*sock_tag_entry), + GFP_ATOMIC); + if (!sock_tag_entry) { + pr_err("qtaguid: ctrl_tag(%s): " + "socket tag alloc failed\n", + input); + spin_unlock_bh(&sock_tag_list_lock); + res = -ENOMEM; + goto err_tag_unref_put; + } + sock_tag_entry->sk = el_socket->sk; + sock_tag_entry->socket = el_socket; + sock_tag_entry->pid = current->tgid; + sock_tag_entry->tag = combine_atag_with_uid(acct_tag, + uid); + spin_lock_bh(&uid_tag_data_tree_lock); + pqd_entry = proc_qtu_data_tree_search( + &proc_qtu_data_tree, current->tgid); + /* + * TODO: remove if, and start failing. + * At first, we want to catch user-space code that is not + * opening the /dev/xt_qtaguid. + */ + if (IS_ERR_OR_NULL(pqd_entry)) + pr_warn_once( + "qtaguid: %s(): " + "User space forgot to open /dev/xt_qtaguid? " + "pid=%u tgid=%u uid=%u\n", __func__, + current->pid, current->tgid, + current_fsuid()); + else + list_add(&sock_tag_entry->list, + &pqd_entry->sock_tag_list); + spin_unlock_bh(&uid_tag_data_tree_lock); + + sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); + atomic64_inc(&qtu_events.sockets_tagged); + } + spin_unlock_bh(&sock_tag_list_lock); + /* We keep the ref to the socket (file) until it is untagged */ + CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n", + input, sock_tag_entry, + atomic_long_read(&el_socket->file->f_count)); + return 0; + +err_tag_unref_put: + BUG_ON(tag_ref_entry->num_sock_tags <= 0); + tag_ref_entry->num_sock_tags--; + free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); +err_put: + CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n", + input, atomic_long_read(&el_socket->file->f_count) - 1); + /* Release the sock_fd that was grabbed by sockfd_lookup(). */ + sockfd_put(el_socket); + return res; + +err: + CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input); + return res; +} + +static int ctrl_cmd_untag(const char *input) +{ + char cmd; + int sock_fd = 0; + struct socket *el_socket; + int res, argc; + struct sock_tag *sock_tag_entry; + struct tag_ref *tag_ref_entry; + struct uid_tag_data *utd_entry; + struct proc_qtu_data *pqd_entry; + + argc = sscanf(input, "%c %d", &cmd, &sock_fd); + CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n", + input, argc, cmd, sock_fd); + if (argc < 2) { + res = -EINVAL; + goto err; + } + el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ + if (!el_socket) { + pr_info("qtaguid: ctrl_untag(%s): failed to lookup" + " sock_fd=%d err=%d\n", input, sock_fd, res); + goto err; + } + CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", + input, atomic_long_read(&el_socket->file->f_count), + el_socket->sk); + spin_lock_bh(&sock_tag_list_lock); + sock_tag_entry = get_sock_stat_nl(el_socket->sk); + if (!sock_tag_entry) { + spin_unlock_bh(&sock_tag_list_lock); + res = -EINVAL; + goto err_put; + } + /* + * The socket already belongs to the current process + * so it can do whatever it wants to it. + */ + rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree); + + tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry); + BUG_ON(!tag_ref_entry); + BUG_ON(tag_ref_entry->num_sock_tags <= 0); + spin_lock_bh(&uid_tag_data_tree_lock); + pqd_entry = proc_qtu_data_tree_search( + &proc_qtu_data_tree, current->tgid); + /* + * TODO: remove if, and start failing. + * At first, we want to catch user-space code that is not + * opening the /dev/xt_qtaguid. + */ + if (IS_ERR_OR_NULL(pqd_entry)) + pr_warn_once("qtaguid: %s(): " + "User space forgot to open /dev/xt_qtaguid? " + "pid=%u tgid=%u uid=%u\n", __func__, + current->pid, current->tgid, current_fsuid()); + else + list_del(&sock_tag_entry->list); + spin_unlock_bh(&uid_tag_data_tree_lock); + /* + * We don't free tag_ref from the utd_entry here, + * only during a cmd_delete(). + */ + tag_ref_entry->num_sock_tags--; + spin_unlock_bh(&sock_tag_list_lock); + /* + * Release the sock_fd that was grabbed at tag time, + * and once more for the sockfd_lookup() here. + */ + sockfd_put(sock_tag_entry->socket); + CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n", + input, sock_tag_entry, + atomic_long_read(&el_socket->file->f_count) - 1); + sockfd_put(el_socket); + + kfree(sock_tag_entry); + atomic64_inc(&qtu_events.sockets_untagged); + + return 0; + +err_put: + CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n", + input, atomic_long_read(&el_socket->file->f_count) - 1); + /* Release the sock_fd that was grabbed by sockfd_lookup(). */ + sockfd_put(el_socket); + return res; + +err: + CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input); + return res; +} + +static int qtaguid_ctrl_parse(const char *input, int count) +{ + char cmd; + int res; + + cmd = input[0]; + /* Collect params for commands */ + switch (cmd) { + case 'd': + res = ctrl_cmd_delete(input); + break; + + case 's': + res = ctrl_cmd_counter_set(input); + break; + + case 't': + res = ctrl_cmd_tag(input); + break; + + case 'u': + res = ctrl_cmd_untag(input); + break; + + default: + res = -EINVAL; + goto err; + } + if (!res) + res = count; +err: + CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res); + return res; +} + +#define MAX_QTAGUID_CTRL_INPUT_LEN 255 +static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; + + if (unlikely(module_passive)) + return count; + + if (count >= MAX_QTAGUID_CTRL_INPUT_LEN) + return -EINVAL; + + if (copy_from_user(input_buf, buffer, count)) + return -EFAULT; + + input_buf[count] = '\0'; + return qtaguid_ctrl_parse(input_buf, count); +} + +struct proc_print_info { + char *outp; + char **num_items_returned; + struct iface_stat *iface_entry; + struct tag_stat *ts_entry; + int item_index; + int items_to_skip; + int char_count; +}; + +static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) +{ + int len; + struct data_counters *cnts; + + if (!ppi->item_index) { + if (ppi->item_index++ < ppi->items_to_skip) + return 0; + len = snprintf(ppi->outp, ppi->char_count, + "idx iface acct_tag_hex uid_tag_int cnt_set " + "rx_bytes rx_packets " + "tx_bytes tx_packets " + "rx_tcp_bytes rx_tcp_packets " + "rx_udp_bytes rx_udp_packets " + "rx_other_bytes rx_other_packets " + "tx_tcp_bytes tx_tcp_packets " + "tx_udp_bytes tx_udp_packets " + "tx_other_bytes tx_other_packets\n"); + } else { + tag_t tag = ppi->ts_entry->tn.tag; + uid_t stat_uid = get_uid_from_tag(tag); + + if (!can_read_other_uid_stats(stat_uid)) { + CT_DEBUG("qtaguid: stats line: " + "%s 0x%llx %u: insufficient priv " + "from pid=%u tgid=%u uid=%u\n", + ppi->iface_entry->ifname, + get_atag_from_tag(tag), stat_uid, + current->pid, current->tgid, current_fsuid()); + return 0; + } + if (ppi->item_index++ < ppi->items_to_skip) + return 0; + cnts = &ppi->ts_entry->counters; + len = snprintf( + ppi->outp, ppi->char_count, + "%d %s 0x%llx %u %u " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu\n", + ppi->item_index, + ppi->iface_entry->ifname, + get_atag_from_tag(tag), + stat_uid, + cnt_set, + dc_sum_bytes(cnts, cnt_set, IFS_RX), + dc_sum_packets(cnts, cnt_set, IFS_RX), + dc_sum_bytes(cnts, cnt_set, IFS_TX), + dc_sum_packets(cnts, cnt_set, IFS_TX), + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); + } + return len; +} + +static bool pp_sets(struct proc_print_info *ppi) +{ + int len; + int counter_set; + for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; + counter_set++) { + len = pp_stats_line(ppi, counter_set); + if (len >= ppi->char_count) { + *ppi->outp = '\0'; + return false; + } + if (len) { + ppi->outp += len; + ppi->char_count -= len; + (*ppi->num_items_returned)++; + } + } + return true; +} + +/* + * Procfs reader to get all tag stats using style "1)" as described in + * fs/proc/generic.c + * Groups all protocols tx/rx bytes. + */ +static int qtaguid_stats_proc_read(char *page, char **num_items_returned, + off_t items_to_skip, int char_count, int *eof, + void *data) +{ + struct proc_print_info ppi; + int len; + + ppi.outp = page; + ppi.item_index = 0; + ppi.char_count = char_count; + ppi.num_items_returned = num_items_returned; + ppi.items_to_skip = items_to_skip; + + if (unlikely(module_passive)) { + len = pp_stats_line(&ppi, 0); + /* The header should always be shorter than the buffer. */ + BUG_ON(len >= ppi.char_count); + (*num_items_returned)++; + *eof = 1; + return len; + } + + CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld " + "char_count=%d *eof=%d\n", page, *num_items_returned, + items_to_skip, char_count, *eof); + + if (*eof) + return 0; + + /* The idx is there to help debug when things go belly up. */ + len = pp_stats_line(&ppi, 0); + /* Don't advance the outp unless the whole line was printed */ + if (len >= ppi.char_count) { + *ppi.outp = '\0'; + return ppi.outp - page; + } + if (len) { + ppi.outp += len; + ppi.char_count -= len; + (*num_items_returned)++; + } + + spin_lock_bh(&iface_stat_list_lock); + list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) { + struct rb_node *node; + spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock); + for (node = rb_first(&ppi.iface_entry->tag_stat_tree); + node; + node = rb_next(node)) { + ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node); + if (!pp_sets(&ppi)) { + spin_unlock_bh( + &ppi.iface_entry->tag_stat_list_lock); + spin_unlock_bh(&iface_stat_list_lock); + return ppi.outp - page; + } + } + spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock); + } + spin_unlock_bh(&iface_stat_list_lock); + + *eof = 1; + return ppi.outp - page; +} + +/*------------------------------------------*/ +static int qtudev_open(struct inode *inode, struct file *file) +{ + struct uid_tag_data *utd_entry; + struct proc_qtu_data *pqd_entry; + struct proc_qtu_data *new_pqd_entry; + int res; + bool utd_entry_found; + + if (unlikely(qtu_proc_handling_passive)) + return 0; + + DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n", + current->pid, current->tgid, current_fsuid()); + + spin_lock_bh(&uid_tag_data_tree_lock); + + /* Look for existing uid data, or alloc one. */ + utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); + if (IS_ERR_OR_NULL(utd_entry)) { + res = PTR_ERR(utd_entry); + goto err; + } + + /* Look for existing PID based proc_data */ + pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree, + current->tgid); + if (pqd_entry) { + pr_err("qtaguid: qtudev_open(): %u/%u %u " + "%s already opened\n", + current->pid, current->tgid, current_fsuid(), + QTU_DEV_NAME); + res = -EBUSY; + goto err_unlock_free_utd; + } + + new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC); + if (!new_pqd_entry) { + pr_err("qtaguid: qtudev_open(): %u/%u %u: " + "proc data alloc failed\n", + current->pid, current->tgid, current_fsuid()); + res = -ENOMEM; + goto err_unlock_free_utd; + } + new_pqd_entry->pid = current->tgid; + INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list); + new_pqd_entry->parent_tag_data = utd_entry; + utd_entry->num_pqd++; + + proc_qtu_data_tree_insert(new_pqd_entry, + &proc_qtu_data_tree); + + spin_unlock_bh(&uid_tag_data_tree_lock); + DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n", + current_fsuid(), new_pqd_entry); + file->private_data = new_pqd_entry; + return 0; + +err_unlock_free_utd: + if (!utd_entry_found) { + rb_erase(&utd_entry->node, &uid_tag_data_tree); + kfree(utd_entry); + } + spin_unlock_bh(&uid_tag_data_tree_lock); +err: + return res; +} + +static int qtudev_release(struct inode *inode, struct file *file) +{ + struct proc_qtu_data *pqd_entry = file->private_data; + struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data; + struct sock_tag *st_entry; + struct rb_root st_to_free_tree = RB_ROOT; + struct list_head *entry, *next; + struct tag_ref *tr; + + if (unlikely(qtu_proc_handling_passive)) + return 0; + + /* + * Do not trust the current->pid, it might just be a kworker cleaning + * up after a dead proc. + */ + DR_DEBUG("qtaguid: qtudev_release(): " + "pid=%u tgid=%u uid=%u " + "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n", + current->pid, current->tgid, pqd_entry->parent_tag_data->uid, + pqd_entry, pqd_entry->pid, utd_entry, + utd_entry->num_active_tags); + + spin_lock_bh(&sock_tag_list_lock); + spin_lock_bh(&uid_tag_data_tree_lock); + + list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) { + st_entry = list_entry(entry, struct sock_tag, list); + DR_DEBUG("qtaguid: %s(): " + "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n", + __func__, + st_entry, st_entry->sk, + current->pid, current->tgid, + pqd_entry->parent_tag_data->uid); + + utd_entry = uid_tag_data_tree_search( + &uid_tag_data_tree, + get_uid_from_tag(st_entry->tag)); + BUG_ON(IS_ERR_OR_NULL(utd_entry)); + DR_DEBUG("qtaguid: %s(): " + "looking for tag=0x%llx in utd_entry=%p\n", __func__, + st_entry->tag, utd_entry); + tr = tag_ref_tree_search(&utd_entry->tag_ref_tree, + st_entry->tag); + BUG_ON(!tr); + BUG_ON(tr->num_sock_tags <= 0); + tr->num_sock_tags--; + free_tag_ref_from_utd_entry(tr, utd_entry); + + rb_erase(&st_entry->sock_node, &sock_tag_tree); + list_del(&st_entry->list); + /* Can't sockfd_put() within spinlock, do it later. */ + sock_tag_tree_insert(st_entry, &st_to_free_tree); + + /* + * Try to free the utd_entry if no other proc_qtu_data is + * using it (num_pqd is 0) and it doesn't have active tags + * (num_active_tags is 0). + */ + put_utd_entry(utd_entry); + } + + rb_erase(&pqd_entry->node, &proc_qtu_data_tree); + BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1); + pqd_entry->parent_tag_data->num_pqd--; + put_utd_entry(pqd_entry->parent_tag_data); + kfree(pqd_entry); + file->private_data = NULL; + + spin_unlock_bh(&uid_tag_data_tree_lock); + spin_unlock_bh(&sock_tag_list_lock); + + + sock_tag_tree_erase(&st_to_free_tree); + + prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, + current->pid, current->tgid); + return 0; +} + +/*------------------------------------------*/ +static const struct file_operations qtudev_fops = { + .owner = THIS_MODULE, + .open = qtudev_open, + .release = qtudev_release, +}; + +static struct miscdevice qtu_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = QTU_DEV_NAME, + .fops = &qtudev_fops, + /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */ +}; + +/*------------------------------------------*/ +static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) +{ + int ret; + *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net); + if (!*res_procdir) { + pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n"); + ret = -ENOMEM; + goto no_dir; + } + + xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms, + *res_procdir); + if (!xt_qtaguid_ctrl_file) { + pr_err("qtaguid: failed to create xt_qtaguid/ctrl " + " file\n"); + ret = -ENOMEM; + goto no_ctrl_entry; + } + xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read; + xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write; + + xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms, + *res_procdir); + if (!xt_qtaguid_stats_file) { + pr_err("qtaguid: failed to create xt_qtaguid/stats " + "file\n"); + ret = -ENOMEM; + goto no_stats_entry; + } + xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read; + /* + * TODO: add support counter hacking + * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write; + */ + return 0; + +no_stats_entry: + remove_proc_entry("ctrl", *res_procdir); +no_ctrl_entry: + remove_proc_entry("xt_qtaguid", NULL); +no_dir: + return ret; +} + +static struct xt_match qtaguid_mt_reg __read_mostly = { + /* + * This module masquerades as the "owner" module so that iptables + * tools can deal with it. + */ + .name = "owner", + .revision = 1, + .family = NFPROTO_UNSPEC, + .match = qtaguid_mt, + .matchsize = sizeof(struct xt_qtaguid_match_info), + .me = THIS_MODULE, +}; + +static int __init qtaguid_mt_init(void) +{ + if (qtaguid_proc_register(&xt_qtaguid_procdir) + || iface_stat_init(xt_qtaguid_procdir) + || xt_register_match(&qtaguid_mt_reg) + || misc_register(&qtu_device)) + return -1; + return 0; +} + +/* + * TODO: allow unloading of the module. + * For now stats are permanent. + * Kconfig forces'y/n' and never an 'm'. + */ + +module_init(qtaguid_mt_init); +MODULE_AUTHOR("jpa "); +MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_owner"); +MODULE_ALIAS("ip6t_owner"); +MODULE_ALIAS("ipt_qtaguid"); +MODULE_ALIAS("ip6t_qtaguid"); diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h new file mode 100644 index 00000000..02479d6d --- /dev/null +++ b/net/netfilter/xt_qtaguid_internal.h @@ -0,0 +1,330 @@ +/* + * Kernel iptables module to track stats for packets based on user tags. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __XT_QTAGUID_INTERNAL_H__ +#define __XT_QTAGUID_INTERNAL_H__ + +#include +#include +#include +#include + +/* Iface handling */ +#define IDEBUG_MASK (1<<0) +/* Iptable Matching. Per packet. */ +#define MDEBUG_MASK (1<<1) +/* Red-black tree handling. Per packet. */ +#define RDEBUG_MASK (1<<2) +/* procfs ctrl/stats handling */ +#define CDEBUG_MASK (1<<3) +/* dev and resource tracking */ +#define DDEBUG_MASK (1<<4) + +/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */ +#define DEFAULT_DEBUG_MASK 0 + +/* + * (Un)Define these *DEBUG to compile out/in the pr_debug calls. + * All undef: text size ~ 0x3030; all def: ~ 0x4404. + */ +#define IDEBUG +#define MDEBUG +#define RDEBUG +#define CDEBUG +#define DDEBUG + +#define MSK_DEBUG(mask, ...) do { \ + if (unlikely(qtaguid_debug_mask & (mask))) \ + pr_debug(__VA_ARGS__); \ + } while (0) +#ifdef IDEBUG +#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__) +#else +#define IF_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef MDEBUG +#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__) +#else +#define MT_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef RDEBUG +#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__) +#else +#define RB_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef CDEBUG +#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__) +#else +#define CT_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef DDEBUG +#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__) +#else +#define DR_DEBUG(...) no_printk(__VA_ARGS__) +#endif + +extern uint qtaguid_debug_mask; + +/*---------------------------------------------------------------------------*/ +/* + * Tags: + * + * They represent what the data usage counters will be tracked against. + * By default a tag is just based on the UID. + * The UID is used as the base for policing, and can not be ignored. + * So a tag will always at least represent a UID (uid_tag). + * + * A tag can be augmented with an "accounting tag" which is associated + * with a UID. + * User space can set the acct_tag portion of the tag which is then used + * with sockets: all data belonging to that socket will be counted against the + * tag. The policing is then based on the tag's uid_tag portion, + * and stats are collected for the acct_tag portion separately. + * + * There could be + * a: {acct_tag=1, uid_tag=10003} + * b: {acct_tag=2, uid_tag=10003} + * c: {acct_tag=3, uid_tag=10003} + * d: {acct_tag=0, uid_tag=10003} + * a, b, and c represent tags associated with specific sockets. + * d is for the totals for that uid, including all untagged traffic. + * Typically d is used with policing/quota rules. + * + * We want tag_t big enough to distinguish uid_t and acct_tag. + * It might become a struct if needed. + * Nothing should be using it as an int. + */ +typedef uint64_t tag_t; /* Only used via accessors */ + +#define TAG_UID_MASK 0xFFFFFFFFULL +#define TAG_ACCT_MASK (~0xFFFFFFFFULL) + +static inline int tag_compare(tag_t t1, tag_t t2) +{ + return t1 < t2 ? -1 : t1 == t2 ? 0 : 1; +} + +static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid) +{ + return acct_tag | uid; +} +static inline tag_t make_tag_from_uid(uid_t uid) +{ + return uid; +} +static inline uid_t get_uid_from_tag(tag_t tag) +{ + return tag & TAG_UID_MASK; +} +static inline tag_t get_utag_from_tag(tag_t tag) +{ + return tag & TAG_UID_MASK; +} +static inline tag_t get_atag_from_tag(tag_t tag) +{ + return tag & TAG_ACCT_MASK; +} + +static inline bool valid_atag(tag_t tag) +{ + return !(tag & TAG_UID_MASK); +} +static inline tag_t make_atag_from_value(uint32_t value) +{ + return (uint64_t)value << 32; +} +/*---------------------------------------------------------------------------*/ + +/* + * Maximum number of socket tags that a UID is allowed to have active. + * Multiple processes belonging to the same UID contribute towards this limit. + * Special UIDs that can impersonate a UID also contribute (e.g. download + * manager, ...) + */ +#define DEFAULT_MAX_SOCK_TAGS 1024 + +/* + * For now we only track 2 sets of counters. + * The default set is 0. + * Userspace can activate another set for a given uid being tracked. + */ +#define IFS_MAX_COUNTER_SETS 2 + +enum ifs_tx_rx { + IFS_TX, + IFS_RX, + IFS_MAX_DIRECTIONS +}; + +/* For now, TCP, UDP, the rest */ +enum ifs_proto { + IFS_TCP, + IFS_UDP, + IFS_PROTO_OTHER, + IFS_MAX_PROTOS +}; + +struct byte_packet_counters { + uint64_t bytes; + uint64_t packets; +}; + +struct data_counters { + struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; +}; + +/* Generic X based nodes used as a base for rb_tree ops */ +struct tag_node { + struct rb_node node; + tag_t tag; +}; + +struct tag_stat { + struct tag_node tn; + struct data_counters counters; + /* + * If this tag is acct_tag based, we need to count against the + * matching parent uid_tag. + */ + struct data_counters *parent_counters; +}; + +struct iface_stat { + struct list_head list; /* in iface_stat_list */ + char *ifname; + bool active; + /* net_dev is only valid for active iface_stat */ + struct net_device *net_dev; + + struct byte_packet_counters totals[IFS_MAX_DIRECTIONS]; + /* + * We keep the last_known, because some devices reset their counters + * just before NETDEV_UP, while some will reset just before + * NETDEV_REGISTER (which is more normal). + * So now, if the device didn't do a NETDEV_UNREGISTER and we see + * its current dev stats smaller that what was previously known, we + * assume an UNREGISTER and just use the last_known. + */ + struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS]; + /* last_known is usable when last_known_valid is true */ + bool last_known_valid; + + struct proc_dir_entry *proc_ptr; + + struct rb_root tag_stat_tree; + spinlock_t tag_stat_list_lock; +}; + +/* This is needed to create proc_dir_entries from atomic context. */ +struct iface_stat_work { + struct work_struct iface_work; + struct iface_stat *iface_entry; +}; + +/* + * Track tag that this socket is transferring data for, and not necessarily + * the uid that owns the socket. + * This is the tag against which tag_stat.counters will be billed. + * These structs need to be looked up by sock and pid. + */ +struct sock_tag { + struct rb_node sock_node; + struct sock *sk; /* Only used as a number, never dereferenced */ + /* The socket is needed for sockfd_put() */ + struct socket *socket; + /* Used to associate with a given pid */ + struct list_head list; /* in proc_qtu_data.sock_tag_list */ + pid_t pid; + + tag_t tag; +}; + +struct qtaguid_event_counts { + /* Various successful events */ + atomic64_t sockets_tagged; + atomic64_t sockets_untagged; + atomic64_t counter_set_changes; + atomic64_t delete_cmds; + atomic64_t iface_events; /* Number of NETDEV_* events handled */ + + atomic64_t match_calls; /* Number of times iptables called mt */ + /* + * match_found_sk_*: numbers related to the netfilter matching + * function finding a sock for the sk_buff. + * Total skbs processed is sum(match_found*). + */ + atomic64_t match_found_sk; /* An sk was already in the sk_buff. */ + /* The connection tracker had or didn't have the sk. */ + atomic64_t match_found_sk_in_ct; + atomic64_t match_found_no_sk_in_ct; + /* + * No sk could be found. No apparent owner. Could happen with + * unsolicited traffic. + */ + atomic64_t match_no_sk; + /* + * The file ptr in the sk_socket wasn't there. + * This might happen for traffic while the socket is being closed. + */ + atomic64_t match_no_sk_file; +}; + +/* Track the set active_set for the given tag. */ +struct tag_counter_set { + struct tag_node tn; + int active_set; +}; + +/*----------------------------------------------*/ +/* + * The qtu uid data is used to track resources that are created directly or + * indirectly by processes (uid tracked). + * It is shared by the processes with the same uid. + * Some of the resource will be counted to prevent further rogue allocations, + * some will need freeing once the owner process (uid) exits. + */ +struct uid_tag_data { + struct rb_node node; + uid_t uid; + + /* + * For the uid, how many accounting tags have been set. + */ + int num_active_tags; + /* Track the number of proc_qtu_data that reference it */ + int num_pqd; + struct rb_root tag_ref_tree; + /* No tag_node_tree_lock; use uid_tag_data_tree_lock */ +}; + +struct tag_ref { + struct tag_node tn; + + /* + * This tracks the number of active sockets that have a tag on them + * which matches this tag_ref.tn.tag. + * A tag ref can live on after the sockets are untagged. + * A tag ref can only be removed during a tag delete command. + */ + int num_sock_tags; +}; + +struct proc_qtu_data { + struct rb_node node; + pid_t pid; + + struct uid_tag_data *parent_tag_data; + + /* Tracks the sock_tags that need freeing upon this proc's death */ + struct list_head sock_tag_list; + /* No spinlock_t sock_tag_list_lock; use the global one. */ +}; + +/*----------------------------------------------*/ +#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */ diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c new file mode 100644 index 00000000..39176785 --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.c @@ -0,0 +1,556 @@ +/* + * Pretty printing Support for iptables xt_qtaguid module. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Most of the functions in this file just waste time if DEBUG is not defined. + * The matching xt_qtaguid_print.h will static inline empty funcs if the needed + * debug flags ore not defined. + * Those funcs that fail to allocate memory will panic as there is no need to + * hobble allong just pretending to do the requested work. + */ + +#define DEBUG + +#include +#include +#include +#include +#include +#include + + +#include "xt_qtaguid_internal.h" +#include "xt_qtaguid_print.h" + +#ifdef DDEBUG + +static void _bug_on_err_or_null(void *ptr) +{ + if (IS_ERR_OR_NULL(ptr)) { + pr_err("qtaguid: kmalloc failed\n"); + BUG(); + } +} + +char *pp_tag_t(tag_t *tag) +{ + char *res; + + if (!tag) + res = kasprintf(GFP_ATOMIC, "tag_t@null{}"); + else + res = kasprintf(GFP_ATOMIC, + "tag_t@%p{tag=0x%llx, uid=%u}", + tag, *tag, get_uid_from_tag(*tag)); + _bug_on_err_or_null(res); + return res; +} + +char *pp_data_counters(struct data_counters *dc, bool showValues) +{ + char *res; + + if (!dc) + res = kasprintf(GFP_ATOMIC, "data_counters@null{}"); + else if (showValues) + res = kasprintf( + GFP_ATOMIC, "data_counters@%p{" + "set0{" + "rx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}, " + "tx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}}, " + "set1{" + "rx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}, " + "tx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}}}", + dc, + dc->bpc[0][IFS_RX][IFS_TCP].bytes, + dc->bpc[0][IFS_RX][IFS_TCP].packets, + dc->bpc[0][IFS_RX][IFS_UDP].bytes, + dc->bpc[0][IFS_RX][IFS_UDP].packets, + dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes, + dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets, + dc->bpc[0][IFS_TX][IFS_TCP].bytes, + dc->bpc[0][IFS_TX][IFS_TCP].packets, + dc->bpc[0][IFS_TX][IFS_UDP].bytes, + dc->bpc[0][IFS_TX][IFS_UDP].packets, + dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes, + dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets, + dc->bpc[1][IFS_RX][IFS_TCP].bytes, + dc->bpc[1][IFS_RX][IFS_TCP].packets, + dc->bpc[1][IFS_RX][IFS_UDP].bytes, + dc->bpc[1][IFS_RX][IFS_UDP].packets, + dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes, + dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets, + dc->bpc[1][IFS_TX][IFS_TCP].bytes, + dc->bpc[1][IFS_TX][IFS_TCP].packets, + dc->bpc[1][IFS_TX][IFS_UDP].bytes, + dc->bpc[1][IFS_TX][IFS_UDP].packets, + dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes, + dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets); + else + res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc); + _bug_on_err_or_null(res); + return res; +} + +char *pp_tag_node(struct tag_node *tn) +{ + char *tag_str; + char *res; + + if (!tn) { + res = kasprintf(GFP_ATOMIC, "tag_node@null{}"); + _bug_on_err_or_null(res); + return res; + } + tag_str = pp_tag_t(&tn->tag); + res = kasprintf(GFP_ATOMIC, + "tag_node@%p{tag=%s}", + tn, tag_str); + _bug_on_err_or_null(res); + kfree(tag_str); + return res; +} + +char *pp_tag_ref(struct tag_ref *tr) +{ + char *tn_str; + char *res; + + if (!tr) { + res = kasprintf(GFP_ATOMIC, "tag_ref@null{}"); + _bug_on_err_or_null(res); + return res; + } + tn_str = pp_tag_node(&tr->tn); + res = kasprintf(GFP_ATOMIC, + "tag_ref@%p{%s, num_sock_tags=%d}", + tr, tn_str, tr->num_sock_tags); + _bug_on_err_or_null(res); + kfree(tn_str); + return res; +} + +char *pp_tag_stat(struct tag_stat *ts) +{ + char *tn_str; + char *counters_str; + char *parent_counters_str; + char *res; + + if (!ts) { + res = kasprintf(GFP_ATOMIC, "tag_stat@null{}"); + _bug_on_err_or_null(res); + return res; + } + tn_str = pp_tag_node(&ts->tn); + counters_str = pp_data_counters(&ts->counters, true); + parent_counters_str = pp_data_counters(ts->parent_counters, false); + res = kasprintf(GFP_ATOMIC, + "tag_stat@%p{%s, counters=%s, parent_counters=%s}", + ts, tn_str, counters_str, parent_counters_str); + _bug_on_err_or_null(res); + kfree(tn_str); + kfree(counters_str); + kfree(parent_counters_str); + return res; +} + +char *pp_iface_stat(struct iface_stat *is) +{ + char *res; + if (!is) + res = kasprintf(GFP_ATOMIC, "iface_stat@null{}"); + else + res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" + "list=list_head{...}, " + "ifname=%s, " + "total={rx={bytes=%llu, " + "packets=%llu}, " + "tx={bytes=%llu, " + "packets=%llu}}, " + "last_known_valid=%d, " + "last_known={rx={bytes=%llu, " + "packets=%llu}, " + "tx={bytes=%llu, " + "packets=%llu}}, " + "active=%d, " + "net_dev=%p, " + "proc_ptr=%p, " + "tag_stat_tree=rb_root{...}}", + is, + is->ifname, + is->totals[IFS_RX].bytes, + is->totals[IFS_RX].packets, + is->totals[IFS_TX].bytes, + is->totals[IFS_TX].packets, + is->last_known_valid, + is->last_known[IFS_RX].bytes, + is->last_known[IFS_RX].packets, + is->last_known[IFS_TX].bytes, + is->last_known[IFS_TX].packets, + is->active, + is->net_dev, + is->proc_ptr); + _bug_on_err_or_null(res); + return res; +} + +char *pp_sock_tag(struct sock_tag *st) +{ + char *tag_str; + char *res; + + if (!st) { + res = kasprintf(GFP_ATOMIC, "sock_tag@null{}"); + _bug_on_err_or_null(res); + return res; + } + tag_str = pp_tag_t(&st->tag); + res = kasprintf(GFP_ATOMIC, "sock_tag@%p{" + "sock_node=rb_node{...}, " + "sk=%p socket=%p (f_count=%lu), list=list_head{...}, " + "pid=%u, tag=%s}", + st, st->sk, st->socket, atomic_long_read( + &st->socket->file->f_count), + st->pid, tag_str); + _bug_on_err_or_null(res); + kfree(tag_str); + return res; +} + +char *pp_uid_tag_data(struct uid_tag_data *utd) +{ + char *res; + + if (!utd) + res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}"); + else + res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{" + "uid=%u, num_active_acct_tags=%d, " + "num_pqd=%d, " + "tag_node_tree=rb_root{...}, " + "proc_qtu_data_tree=rb_root{...}}", + utd, utd->uid, + utd->num_active_tags, utd->num_pqd); + _bug_on_err_or_null(res); + return res; +} + +char *pp_proc_qtu_data(struct proc_qtu_data *pqd) +{ + char *parent_tag_data_str; + char *res; + + if (!pqd) { + res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}"); + _bug_on_err_or_null(res); + return res; + } + parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data); + res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{" + "node=rb_node{...}, pid=%u, " + "parent_tag_data=%s, " + "sock_tag_list=list_head{...}}", + pqd, pqd->pid, parent_tag_data_str + ); + _bug_on_err_or_null(res); + kfree(parent_tag_data_str); + return res; +} + +/*------------------------------------------*/ +void prdebug_sock_tag_tree(int indent_level, + struct rb_root *sock_tag_tree) +{ + struct rb_node *node; + struct sock_tag *sock_tag_entry; + char *str; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(sock_tag_tree)) { + str = "sock_tag_tree=rb_root{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "sock_tag_tree=rb_root{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(sock_tag_tree); + node; + node = rb_next(node)) { + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); + str = pp_sock_tag(sock_tag_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_sock_tag_list(int indent_level, + struct list_head *sock_tag_list) +{ + struct sock_tag *sock_tag_entry; + char *str; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (list_empty(sock_tag_list)) { + str = "sock_tag_list=list_head{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "sock_tag_list=list_head{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + list_for_each_entry(sock_tag_entry, sock_tag_list, list) { + str = pp_sock_tag(sock_tag_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_proc_qtu_data_tree(int indent_level, + struct rb_root *proc_qtu_data_tree) +{ + char *str; + struct rb_node *node; + struct proc_qtu_data *proc_qtu_data_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(proc_qtu_data_tree)) { + str = "proc_qtu_data_tree=rb_root{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "proc_qtu_data_tree=rb_root{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(proc_qtu_data_tree); + node; + node = rb_next(node)) { + proc_qtu_data_entry = rb_entry(node, + struct proc_qtu_data, + node); + str = pp_proc_qtu_data(proc_qtu_data_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, + str); + kfree(str); + indent_level++; + prdebug_sock_tag_list(indent_level, + &proc_qtu_data_entry->sock_tag_list); + indent_level--; + + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) +{ + char *str; + struct rb_node *node; + struct tag_ref *tag_ref_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(tag_ref_tree)) { + str = "tag_ref_tree{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "tag_ref_tree{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(tag_ref_tree); + node; + node = rb_next(node)) { + tag_ref_entry = rb_entry(node, + struct tag_ref, + tn.node); + str = pp_tag_ref(tag_ref_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, + str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_uid_tag_data_tree(int indent_level, + struct rb_root *uid_tag_data_tree) +{ + char *str; + struct rb_node *node; + struct uid_tag_data *uid_tag_data_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(uid_tag_data_tree)) { + str = "uid_tag_data_tree=rb_root{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "uid_tag_data_tree=rb_root{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(uid_tag_data_tree); + node; + node = rb_next(node)) { + uid_tag_data_entry = rb_entry(node, struct uid_tag_data, + node); + str = pp_uid_tag_data(uid_tag_data_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); + kfree(str); + if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) { + indent_level++; + prdebug_tag_ref_tree(indent_level, + &uid_tag_data_entry->tag_ref_tree); + indent_level--; + } + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_tag_stat_tree(int indent_level, + struct rb_root *tag_stat_tree) +{ + char *str; + struct rb_node *node; + struct tag_stat *ts_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(tag_stat_tree)) { + str = "tag_stat_tree{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "tag_stat_tree{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(tag_stat_tree); + node; + node = rb_next(node)) { + ts_entry = rb_entry(node, struct tag_stat, tn.node); + str = pp_tag_stat(ts_entry); + pr_debug("%*d: %s\n", indent_level*2, indent_level, + str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_iface_stat_list(int indent_level, + struct list_head *iface_stat_list) +{ + char *str; + struct iface_stat *iface_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (list_empty(iface_stat_list)) { + str = "iface_stat_list=list_head{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "iface_stat_list=list_head{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + list_for_each_entry(iface_entry, iface_stat_list, list) { + str = pp_iface_stat(iface_entry); + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + kfree(str); + + spin_lock_bh(&iface_entry->tag_stat_list_lock); + if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) { + indent_level++; + prdebug_tag_stat_tree(indent_level, + &iface_entry->tag_stat_tree); + indent_level--; + } + spin_unlock_bh(&iface_entry->tag_stat_list_lock); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +#endif /* ifdef DDEBUG */ +/*------------------------------------------*/ +static const char * const netdev_event_strings[] = { + "netdev_unknown", + "NETDEV_UP", + "NETDEV_DOWN", + "NETDEV_REBOOT", + "NETDEV_CHANGE", + "NETDEV_REGISTER", + "NETDEV_UNREGISTER", + "NETDEV_CHANGEMTU", + "NETDEV_CHANGEADDR", + "NETDEV_GOING_DOWN", + "NETDEV_CHANGENAME", + "NETDEV_FEAT_CHANGE", + "NETDEV_BONDING_FAILOVER", + "NETDEV_PRE_UP", + "NETDEV_PRE_TYPE_CHANGE", + "NETDEV_POST_TYPE_CHANGE", + "NETDEV_POST_INIT", + "NETDEV_UNREGISTER_BATCH", + "NETDEV_RELEASE", + "NETDEV_NOTIFY_PEERS", + "NETDEV_JOIN", +}; + +const char *netdev_evt_str(int netdev_event) +{ + if (netdev_event < 0 + || netdev_event >= ARRAY_SIZE(netdev_event_strings)) + return "bad event num"; + return netdev_event_strings[netdev_event]; +} diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h new file mode 100644 index 00000000..b63871a0 --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.h @@ -0,0 +1,120 @@ +/* + * Pretty printing Support for iptables xt_qtaguid module. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __XT_QTAGUID_PRINT_H__ +#define __XT_QTAGUID_PRINT_H__ + +#include "xt_qtaguid_internal.h" + +#ifdef DDEBUG + +char *pp_tag_t(tag_t *tag); +char *pp_data_counters(struct data_counters *dc, bool showValues); +char *pp_tag_node(struct tag_node *tn); +char *pp_tag_ref(struct tag_ref *tr); +char *pp_tag_stat(struct tag_stat *ts); +char *pp_iface_stat(struct iface_stat *is); +char *pp_sock_tag(struct sock_tag *st); +char *pp_uid_tag_data(struct uid_tag_data *qtd); +char *pp_proc_qtu_data(struct proc_qtu_data *pqd); + +/*------------------------------------------*/ +void prdebug_sock_tag_list(int indent_level, + struct list_head *sock_tag_list); +void prdebug_sock_tag_tree(int indent_level, + struct rb_root *sock_tag_tree); +void prdebug_proc_qtu_data_tree(int indent_level, + struct rb_root *proc_qtu_data_tree); +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree); +void prdebug_uid_tag_data_tree(int indent_level, + struct rb_root *uid_tag_data_tree); +void prdebug_tag_stat_tree(int indent_level, + struct rb_root *tag_stat_tree); +void prdebug_iface_stat_list(int indent_level, + struct list_head *iface_stat_list); + +#else + +/*------------------------------------------*/ +static inline char *pp_tag_t(tag_t *tag) +{ + return NULL; +} +static inline char *pp_data_counters(struct data_counters *dc, bool showValues) +{ + return NULL; +} +static inline char *pp_tag_node(struct tag_node *tn) +{ + return NULL; +} +static inline char *pp_tag_ref(struct tag_ref *tr) +{ + return NULL; +} +static inline char *pp_tag_stat(struct tag_stat *ts) +{ + return NULL; +} +static inline char *pp_iface_stat(struct iface_stat *is) +{ + return NULL; +} +static inline char *pp_sock_tag(struct sock_tag *st) +{ + return NULL; +} +static inline char *pp_uid_tag_data(struct uid_tag_data *qtd) +{ + return NULL; +} +static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd) +{ + return NULL; +} + +/*------------------------------------------*/ +static inline +void prdebug_sock_tag_list(int indent_level, + struct list_head *sock_tag_list) +{ +} +static inline +void prdebug_sock_tag_tree(int indent_level, + struct rb_root *sock_tag_tree) +{ +} +static inline +void prdebug_proc_qtu_data_tree(int indent_level, + struct rb_root *proc_qtu_data_tree) +{ +} +static inline +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) +{ +} +static inline +void prdebug_uid_tag_data_tree(int indent_level, + struct rb_root *uid_tag_data_tree) +{ +} +static inline +void prdebug_tag_stat_tree(int indent_level, + struct rb_root *tag_stat_tree) +{ +} +static inline +void prdebug_iface_stat_list(int indent_level, + struct list_head *iface_stat_list) +{ +} +#endif +/*------------------------------------------*/ +const char *netdev_evt_str(int netdev_event); +#endif /* ifndef __XT_QTAGUID_PRINT_H__ */ diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 390b7d09..b1c7ada8 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -22,7 +22,7 @@ MODULE_ALIAS("ip6t_quota"); static DEFINE_SPINLOCK(quota_lock); static bool -quota_mt(const struct sk_buff *skb, const struct xt_match_param *par) +quota_mt(const struct sk_buff *skb, const struct xt_action_param *par) { struct xt_quota_info *q = (void *)par->matchinfo; struct xt_quota_priv *priv = q->master; @@ -43,7 +43,7 @@ quota_mt(const struct sk_buff *skb, const struct xt_match_param *par) return ret; } -static bool quota_mt_check(const struct xt_mtchk_param *par) +static int quota_mt_check(const struct xt_mtchk_param *par) { struct xt_quota_info *q = par->matchinfo; diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c new file mode 100644 index 00000000..814051c5 --- /dev/null +++ b/net/netfilter/xt_quota2.c @@ -0,0 +1,382 @@ +/* + * xt_quota2 - enhanced xt_quota that can count upwards and in packets + * as a minimal accounting match. + * by Jan Engelhardt , 2008 + * + * Originally based on xt_quota.c: + * netfilter module to enforce network quotas + * Sam Johnston + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License; either + * version 2 of the License, as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +#include +#endif + +/** + * @lock: lock to protect quota writers from each other + */ +struct xt_quota_counter { + u_int64_t quota; + spinlock_t lock; + struct list_head list; + atomic_t ref; + char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)]; + struct proc_dir_entry *procfs_entry; +}; + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +/* Harald's favorite number +1 :D From ipt_ULOG.C */ +static int qlog_nl_event = 112; +module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(event_num, + "Event number for NETLINK_NFLOG message. 0 disables log." + "111 is what ipt_ULOG uses."); +static struct sock *nflognl; +#endif + +static LIST_HEAD(counter_list); +static DEFINE_SPINLOCK(counter_list_lock); + +static struct proc_dir_entry *proc_xt_quota; +static unsigned int quota_list_perms = S_IRUGO | S_IWUSR; +static unsigned int quota_list_uid = 0; +static unsigned int quota_list_gid = 0; +module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR); +module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR); +module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR); + + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +static void quota2_log(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ + ulog_packet_msg_t *pm; + struct sk_buff *log_skb; + size_t size; + struct nlmsghdr *nlh; + + if (!qlog_nl_event) + return; + + size = NLMSG_SPACE(sizeof(*pm)); + size = max(size, (size_t)NLMSG_GOODSIZE); + log_skb = alloc_skb(size, GFP_ATOMIC); + if (!log_skb) { + pr_err("xt_quota2: cannot alloc skb for logging\n"); + return; + } + + /* NLMSG_PUT() uses "goto nlmsg_failure" */ + nlh = NLMSG_PUT(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event, + sizeof(*pm)); + pm = NLMSG_DATA(nlh); + if (skb->tstamp.tv64 == 0) + __net_timestamp((struct sk_buff *)skb); + pm->data_len = 0; + pm->hook = hooknum; + if (prefix != NULL) + strlcpy(pm->prefix, prefix, sizeof(pm->prefix)); + else + *(pm->prefix) = '\0'; + if (in) + strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name)); + else + pm->indev_name[0] = '\0'; + + if (out) + strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); + else + pm->outdev_name[0] = '\0'; + + NETLINK_CB(log_skb).dst_group = 1; + pr_debug("throwing 1 packets to netlink group 1\n"); + netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC); + +nlmsg_failure: /* Used within NLMSG_PUT() */ + pr_debug("xt_quota2: error during NLMSG_PUT\n"); +} +#else +static void quota2_log(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ +} +#endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */ + +static int quota_proc_read(char *page, char **start, off_t offset, + int count, int *eof, void *data) +{ + struct xt_quota_counter *e = data; + int ret; + + spin_lock_bh(&e->lock); + ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota); + spin_unlock_bh(&e->lock); + return ret; +} + +static int quota_proc_write(struct file *file, const char __user *input, + unsigned long size, void *data) +{ + struct xt_quota_counter *e = data; + char buf[sizeof("18446744073709551616")]; + + if (size > sizeof(buf)) + size = sizeof(buf); + if (copy_from_user(buf, input, size) != 0) + return -EFAULT; + buf[sizeof(buf)-1] = '\0'; + + spin_lock_bh(&e->lock); + e->quota = simple_strtoull(buf, NULL, 0); + spin_unlock_bh(&e->lock); + return size; +} + +static struct xt_quota_counter * +q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon) +{ + struct xt_quota_counter *e; + unsigned int size; + + /* Do not need all the procfs things for anonymous counters. */ + size = anon ? offsetof(typeof(*e), list) : sizeof(*e); + e = kmalloc(size, GFP_KERNEL); + if (e == NULL) + return NULL; + + e->quota = q->quota; + spin_lock_init(&e->lock); + if (!anon) { + INIT_LIST_HEAD(&e->list); + atomic_set(&e->ref, 1); + strlcpy(e->name, q->name, sizeof(e->name)); + } + return e; +} + +/** + * q2_get_counter - get ref to counter or create new + * @name: name of counter + */ +static struct xt_quota_counter * +q2_get_counter(const struct xt_quota_mtinfo2 *q) +{ + struct proc_dir_entry *p; + struct xt_quota_counter *e = NULL; + struct xt_quota_counter *new_e; + + if (*q->name == '\0') + return q2_new_counter(q, true); + + /* No need to hold a lock while getting a new counter */ + new_e = q2_new_counter(q, false); + if (new_e == NULL) + goto out; + + spin_lock_bh(&counter_list_lock); + list_for_each_entry(e, &counter_list, list) + if (strcmp(e->name, q->name) == 0) { + atomic_inc(&e->ref); + spin_unlock_bh(&counter_list_lock); + kfree(new_e); + pr_debug("xt_quota2: old counter name=%s", e->name); + return e; + } + e = new_e; + pr_debug("xt_quota2: new_counter name=%s", e->name); + list_add_tail(&e->list, &counter_list); + /* The entry having a refcount of 1 is not directly destructible. + * This func has not yet returned the new entry, thus iptables + * has not references for destroying this entry. + * For another rule to try to destroy it, it would 1st need for this + * func* to be re-invoked, acquire a new ref for the same named quota. + * Nobody will access the e->procfs_entry either. + * So release the lock. */ + spin_unlock_bh(&counter_list_lock); + + /* create_proc_entry() is not spin_lock happy */ + p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms, + proc_xt_quota); + + if (IS_ERR_OR_NULL(p)) { + spin_lock_bh(&counter_list_lock); + list_del(&e->list); + spin_unlock_bh(&counter_list_lock); + goto out; + } + p->data = e; + p->read_proc = quota_proc_read; + p->write_proc = quota_proc_write; + p->uid = quota_list_uid; + p->gid = quota_list_gid; + return e; + + out: + kfree(e); + return NULL; +} + +static int quota_mt2_check(const struct xt_mtchk_param *par) +{ + struct xt_quota_mtinfo2 *q = par->matchinfo; + + pr_debug("xt_quota2: check() flags=0x%04x", q->flags); + + if (q->flags & ~XT_QUOTA_MASK) + return -EINVAL; + + q->name[sizeof(q->name)-1] = '\0'; + if (*q->name == '.' || strchr(q->name, '/') != NULL) { + printk(KERN_ERR "xt_quota.3: illegal name\n"); + return -EINVAL; + } + + q->master = q2_get_counter(q); + if (q->master == NULL) { + printk(KERN_ERR "xt_quota.3: memory alloc failure\n"); + return -ENOMEM; + } + + return 0; +} + +static void quota_mt2_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_quota_mtinfo2 *q = par->matchinfo; + struct xt_quota_counter *e = q->master; + + if (*q->name == '\0') { + kfree(e); + return; + } + + spin_lock_bh(&counter_list_lock); + if (!atomic_dec_and_test(&e->ref)) { + spin_unlock_bh(&counter_list_lock); + return; + } + + list_del(&e->list); + remove_proc_entry(e->name, proc_xt_quota); + spin_unlock_bh(&counter_list_lock); + kfree(e); +} + +static bool +quota_mt2(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct xt_quota_mtinfo2 *q = (void *)par->matchinfo; + struct xt_quota_counter *e = q->master; + bool ret = q->flags & XT_QUOTA_INVERT; + + spin_lock_bh(&e->lock); + if (q->flags & XT_QUOTA_GROW) { + /* + * While no_change is pointless in "grow" mode, we will + * implement it here simply to have a consistent behavior. + */ + if (!(q->flags & XT_QUOTA_NO_CHANGE)) { + e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; + } + ret = true; + } else { + if (e->quota >= skb->len) { + if (!(q->flags & XT_QUOTA_NO_CHANGE)) + e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; + ret = !ret; + } else { + /* We are transitioning, log that fact. */ + if (e->quota) { + quota2_log(par->hooknum, + skb, + par->in, + par->out, + q->name); + } + /* we do not allow even small packets from now on */ + e->quota = 0; + } + } + spin_unlock_bh(&e->lock); + return ret; +} + +static struct xt_match quota_mt2_reg[] __read_mostly = { + { + .name = "quota2", + .revision = 3, + .family = NFPROTO_IPV4, + .checkentry = quota_mt2_check, + .match = quota_mt2, + .destroy = quota_mt2_destroy, + .matchsize = sizeof(struct xt_quota_mtinfo2), + .me = THIS_MODULE, + }, + { + .name = "quota2", + .revision = 3, + .family = NFPROTO_IPV6, + .checkentry = quota_mt2_check, + .match = quota_mt2, + .destroy = quota_mt2_destroy, + .matchsize = sizeof(struct xt_quota_mtinfo2), + .me = THIS_MODULE, + }, +}; + +static int __init quota_mt2_init(void) +{ + int ret; + pr_debug("xt_quota2: init()"); + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG + nflognl = netlink_kernel_create(&init_net, + NETLINK_NFLOG, 1, NULL, + NULL, THIS_MODULE); + if (!nflognl) + return -ENOMEM; +#endif + + proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net); + if (proc_xt_quota == NULL) + return -EACCES; + + ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); + if (ret < 0) + remove_proc_entry("xt_quota", init_net.proc_net); + pr_debug("xt_quota2: init() %d", ret); + return ret; +} + +static void __exit quota_mt2_exit(void) +{ + xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); + remove_proc_entry("xt_quota", init_net.proc_net); +} + +module_init(quota_mt2_init); +module_exit(quota_mt2_exit); +MODULE_DESCRIPTION("Xtables: countdown quota match; up counter"); +MODULE_AUTHOR("Sam Johnston "); +MODULE_AUTHOR("Jan Engelhardt "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_quota2"); +MODULE_ALIAS("ip6t_quota2"); diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 4fc6a917..90d37505 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -15,7 +15,7 @@ static bool -xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par) +xt_rateest_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; struct gnet_stats_rate_est *r; @@ -74,7 +74,7 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par) return ret; } -static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) +static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index 484d1689..b063c783 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -22,7 +22,7 @@ MODULE_DESCRIPTION("Xtables: Routing realm match"); MODULE_ALIAS("ipt_realm"); static bool -realm_mt(const struct sk_buff *skb, const struct xt_match_param *par) +realm_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_realm_info *info = par->matchinfo; const struct dst_entry *dst = skb_dst(skb); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index eb0ceb84..fa4cb9d4 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -173,10 +173,10 @@ recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, static void recent_entry_update(struct recent_table *t, struct recent_entry *e) { + e->index %= ip_pkt_list_tot; e->stamps[e->index++] = jiffies; if (e->index > e->nstamps) e->nstamps = e->index; - e->index %= ip_pkt_list_tot; list_move_tail(&e->lru_list, &t->lru_list); } @@ -201,7 +201,7 @@ static void recent_table_flush(struct recent_table *t) } static bool -recent_mt(const struct sk_buff *skb, const struct xt_match_param *par) +recent_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; @@ -260,7 +260,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par) for (i = 0; i < e->nstamps; i++) { if (info->seconds && time_after(time, e->stamps[i])) continue; - if (++hits >= info->hit_count) { + if (!info->hit_count || ++hits >= info->hit_count) { ret = !ret; break; } @@ -277,7 +277,7 @@ out: return ret; } -static bool recent_mt_check(const struct xt_mtchk_param *par) +static int recent_mt_check(const struct xt_mtchk_param *par) { const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h new file mode 100644 index 00000000..6efe4e5a --- /dev/null +++ b/net/netfilter/xt_repldata.h @@ -0,0 +1,35 @@ +/* + * Today's hack: quantum tunneling in structs + * + * 'entries' and 'term' are never anywhere referenced by word in code. In fact, + * they serve as the hanging-off data accessed through repl.data[]. + */ + +#define xt_alloc_initial_table(type, typ2) ({ \ + unsigned int hook_mask = info->valid_hooks; \ + unsigned int nhooks = hweight32(hook_mask); \ + unsigned int bytes = 0, hooknum = 0, i = 0; \ + struct { \ + struct type##_replace repl; \ + struct type##_standard entries[nhooks]; \ + struct type##_error term; \ + } *tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); \ + if (tbl == NULL) \ + return NULL; \ + strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \ + tbl->term = (struct type##_error)typ2##_ERROR_INIT; \ + tbl->repl.valid_hooks = hook_mask; \ + tbl->repl.num_entries = nhooks + 1; \ + tbl->repl.size = nhooks * sizeof(struct type##_standard) + \ + sizeof(struct type##_error); \ + for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \ + if (!(hook_mask & 1)) \ + continue; \ + tbl->repl.hook_entry[hooknum] = bytes; \ + tbl->repl.underflow[hooknum] = bytes; \ + tbl->entries[i++] = (struct type##_standard) \ + typ2##_STANDARD_INIT(NF_ACCEPT); \ + bytes += sizeof(struct type##_standard); \ + } \ + tbl; \ +}) diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index a189ada9..38577d32 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -1,3 +1,4 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include @@ -15,12 +16,6 @@ MODULE_DESCRIPTION("Xtables: SCTP protocol packet match"); MODULE_ALIAS("ipt_sctp"); MODULE_ALIAS("ip6t_sctp"); -#ifdef DEBUG_SCTP -#define duprintf(format, args...) printk(format , ## args) -#else -#define duprintf(format, args...) -#endif - #define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ || (!!((invflag) & (option)) ^ (cond))) @@ -52,7 +47,7 @@ match_packet(const struct sk_buff *skb, const struct xt_sctp_flag_info *flag_info = info->flag_info; int flag_count = info->flag_count; -#ifdef DEBUG_SCTP +#ifdef DEBUG int i = 0; #endif @@ -62,17 +57,19 @@ match_packet(const struct sk_buff *skb, do { sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch); if (sch == NULL || sch->length == 0) { - duprintf("Dropping invalid SCTP packet.\n"); + pr_debug("Dropping invalid SCTP packet.\n"); *hotdrop = true; return false; } - - duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n", - ++i, offset, sch->type, htons(sch->length), sch->flags); - +#ifdef DEBUG + pr_debug("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d" + "\tflags: %x\n", + ++i, offset, sch->type, htons(sch->length), + sch->flags); +#endif offset += (ntohs(sch->length) + 3) & ~3; - duprintf("skb->len: %d\toffset: %d\n", skb->len, offset); + pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset); if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) { switch (chunk_match_type) { @@ -117,24 +114,24 @@ match_packet(const struct sk_buff *skb, } static bool -sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par) +sctp_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_sctp_info *info = par->matchinfo; const sctp_sctphdr_t *sh; sctp_sctphdr_t _sh; if (par->fragoff != 0) { - duprintf("Dropping non-first fragment.. FIXME\n"); + pr_debug("Dropping non-first fragment.. FIXME\n"); return false; } sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh); if (sh == NULL) { - duprintf("Dropping evil TCP offset=0 tinygram.\n"); + pr_debug("Dropping evil TCP offset=0 tinygram.\n"); *par->hotdrop = true; return false; } - duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest)); + pr_debug("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest)); return SCCHECK(ntohs(sh->source) >= info->spts[0] && ntohs(sh->source) <= info->spts[1], @@ -147,7 +144,7 @@ sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par) XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); } -static bool sctp_mt_check(const struct xt_mtchk_param *par) +static int sctp_mt_check(const struct xt_mtchk_param *par) { const struct xt_sctp_info *info = par->matchinfo; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index ebf00ad5..9b38fd15 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,6 +22,12 @@ #include #include +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#define XT_SOCKET_HAVE_IPV6 1 +#include +#include +#endif + #include #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -29,8 +35,18 @@ #include #endif +void +xt_socket_put_sk(struct sock *sk) +{ + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); +} +EXPORT_SYMBOL(xt_socket_put_sk); + static int -extract_icmp_fields(const struct sk_buff *skb, +extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, __be32 *raddr, __be32 *laddr, @@ -86,10 +102,8 @@ extract_icmp_fields(const struct sk_buff *skb, return 0; } - -static bool -socket_match(const struct sk_buff *skb, const struct xt_match_param *par, - const struct xt_socket_mtinfo1 *info) +struct sock* +xt_socket_get4_sk(const struct sk_buff *skb, struct xt_action_param *par) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -106,7 +120,7 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par, hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp == NULL) - return false; + return NULL; protocol = iph->protocol; saddr = iph->saddr; @@ -115,11 +129,11 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par, dport = hp->dest; } else if (iph->protocol == IPPROTO_ICMP) { - if (extract_icmp_fields(skb, &protocol, &saddr, &daddr, + if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, &sport, &dport)) - return false; + return NULL; } else { - return false; + return NULL; } #ifdef XT_SOCKET_HAVE_CONNTRACK @@ -127,11 +141,11 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par, * reply packet of an established SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); - if (ct && (ct != &nf_conntrack_untracked) && + if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || + ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) && + ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; @@ -142,7 +156,24 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par, #endif sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, - saddr, daddr, sport, dport, par->in, false); + saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); + + pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", + protocol, &saddr, ntohs(sport), + &daddr, ntohs(dport), + &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); + + return sk; +} +EXPORT_SYMBOL(xt_socket_get4_sk); + +static bool +socket_match(const struct sk_buff *skb, struct xt_action_param *par, + const struct xt_socket_mtinfo1 *info) +{ + struct sock *sk; + + sk = xt_socket_get4_sk(skb, par); if (sk != NULL) { bool wildcard; bool transparent = true; @@ -156,42 +187,170 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par, if (info && info->flags & XT_SOCKET_TRANSPARENT) transparent = ((sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->transparent) || - (sk->sk_state == TCP_TIME_WAIT && + (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - nf_tproxy_put_sock(sk); + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; } - pr_debug("socket match: proto %u %08x:%u -> %08x:%u " - "(orig %08x:%u) sock %p\n", - protocol, ntohl(saddr), ntohs(sport), - ntohl(daddr), ntohs(dport), - ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk); - return (sk != NULL); } static bool -socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) +socket_mt4_v0(const struct sk_buff *skb, const struct xt_action_param *par) { return socket_match(skb, par, NULL); } static bool -socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +socket_mt4_v1(const struct sk_buff *skb, const struct xt_action_param *par) { return socket_match(skb, par, par->matchinfo); } +#ifdef XT_SOCKET_HAVE_IPV6 + +static int +extract_icmp6_fields(const struct sk_buff *skb, + unsigned int outside_hdrlen, + int *protocol, + struct in6_addr **raddr, + struct in6_addr **laddr, + __be16 *rport, + __be16 *lport) +{ + struct ipv6hdr *inside_iph, _inside_iph; + struct icmp6hdr *icmph, _icmph; + __be16 *ports, _ports[2]; + u8 inside_nexthdr; + int inside_hdrlen; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) + return 1; + + inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph); + if (inside_iph == NULL) + return 1; + inside_nexthdr = inside_iph->nexthdr; + + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), &inside_nexthdr); + if (inside_hdrlen < 0) + return 1; /* hjm: Packet has no/incomplete transport layer headers. */ + + if (inside_nexthdr != IPPROTO_TCP && + inside_nexthdr != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, inside_hdrlen, + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_nexthdr; + *laddr = &inside_iph->saddr; + *lport = ports[0]; + *raddr = &inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +struct sock* +xt_socket_get6_sk(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + struct udphdr _hdr, *hp = NULL; + struct sock *sk; + struct in6_addr *daddr, *saddr; + __be16 dport, sport; + int thoff, tproto; + + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); + if (tproto < 0) { + pr_debug("unable to find transport header in IPv6 packet, dropping\n"); + return NF_DROP; + } + + if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { + hp = skb_header_pointer(skb, thoff, + sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + saddr = &iph->saddr; + sport = hp->source; + daddr = &iph->daddr; + dport = hp->dest; + + } else if (tproto == IPPROTO_ICMPV6) { + if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, + &sport, &dport)) + return NULL; + } else { + return NULL; + } + + sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); + pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu " + "(orig %pI6:%hu) sock %p\n", + tproto, saddr, ntohs(sport), + daddr, ntohs(dport), + &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); + return sk; +} +EXPORT_SYMBOL(xt_socket_get6_sk); + +static bool +socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct sock *sk; + const struct xt_socket_mtinfo1 *info; + + info = (struct xt_socket_mtinfo1 *) par->matchinfo; + sk = xt_socket_get6_sk(skb, par); + if (sk != NULL) { + bool wildcard; + bool transparent = true; + + /* Ignore sockets listening on INADDR_ANY */ + wildcard = (sk->sk_state != TCP_TIME_WAIT && + ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); + + /* Ignore non-transparent sockets, + if XT_SOCKET_TRANSPARENT is used */ + if (info && info->flags & XT_SOCKET_TRANSPARENT) + transparent = ((sk->sk_state != TCP_TIME_WAIT && + inet_sk(sk)->transparent) || + (sk->sk_state == TCP_TIME_WAIT && + inet_twsk(sk)->tw_transparent)); + + xt_socket_put_sk(sk); + + if (wildcard || !transparent) + sk = NULL; + } + + return (sk != NULL); +} +#endif + static struct xt_match socket_mt_reg[] __read_mostly = { { .name = "socket", .revision = 0, .family = NFPROTO_IPV4, - .match = socket_mt_v0, + .match = socket_mt4_v0, .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, @@ -199,16 +358,31 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV4, - .match = socket_mt_v1, + .match = socket_mt4_v1, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = 1 << NF_INET_PRE_ROUTING, .me = THIS_MODULE, }, +#ifdef XT_SOCKET_HAVE_IPV6 + { + .name = "socket", + .revision = 1, + .family = NFPROTO_IPV6, + .match = socket_mt6_v1, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#endif }; static int __init socket_mt_init(void) { nf_defrag_ipv4_enable(); +#ifdef XT_SOCKET_HAVE_IPV6 + nf_defrag_ipv6_enable(); +#endif return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } @@ -224,3 +398,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); MODULE_DESCRIPTION("x_tables socket match module"); MODULE_ALIAS("ipt_socket"); +MODULE_ALIAS("ip6t_socket"); diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 4c946cbd..e8fcde42 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -21,23 +21,25 @@ MODULE_ALIAS("ipt_state"); MODULE_ALIAS("ip6t_state"); static bool -state_mt(const struct sk_buff *skb, const struct xt_match_param *par) +state_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_state_info *sinfo = par->matchinfo; enum ip_conntrack_info ctinfo; unsigned int statebit; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (nf_ct_is_untracked(skb)) - statebit = XT_STATE_UNTRACKED; - else if (!nf_ct_get(skb, &ctinfo)) + if (!ct) statebit = XT_STATE_INVALID; - else - statebit = XT_STATE_BIT(ctinfo); - + else { + if (nf_ct_is_untracked(ct)) + statebit = XT_STATE_UNTRACKED; + else + statebit = XT_STATE_BIT(ctinfo); + } return (sinfo->statemask & statebit); } -static bool state_mt_check(const struct xt_mtchk_param *par) +static int state_mt_check(const struct xt_mtchk_param *par) { if (nf_ct_l3proto_try_module_get(par->match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index d8c0f8f1..05d9ff01 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -29,7 +29,7 @@ MODULE_ALIAS("ip6t_statistic"); static DEFINE_SPINLOCK(nth_lock); static bool -statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par) +statistic_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_statistic_info *info = par->matchinfo; bool ret = info->flags & XT_STATISTIC_INVERT; @@ -52,7 +52,7 @@ statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par) return ret; } -static bool statistic_mt_check(const struct xt_mtchk_param *par) +static int statistic_mt_check(const struct xt_mtchk_param *par) { struct xt_statistic_info *info = par->matchinfo; diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index b4d77411..adbdfc90 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_string"); MODULE_ALIAS("ip6t_string"); static bool -string_mt(const struct sk_buff *skb, const struct xt_match_param *par) +string_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_string_info *conf = par->matchinfo; struct ts_state state; @@ -40,7 +40,7 @@ string_mt(const struct sk_buff *skb, const struct xt_match_param *par) #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m)) -static bool string_mt_check(const struct xt_mtchk_param *par) +static int string_mt_check(const struct xt_mtchk_param *par) { struct xt_string_info *conf = par->matchinfo; struct ts_config *ts_conf; diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 4809b34b..5c8a7b43 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tcpmss"); MODULE_ALIAS("ip6t_tcpmss"); static bool -tcpmss_mt(const struct sk_buff *skb, const struct xt_match_param *par) +tcpmss_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tcpmss_match_info *info = par->matchinfo; const struct tcphdr *th; diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 1ebdc493..89ec64f6 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -1,3 +1,4 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include @@ -19,13 +20,6 @@ MODULE_ALIAS("ipt_tcp"); MODULE_ALIAS("ip6t_udp"); MODULE_ALIAS("ip6t_tcp"); -#ifdef DEBUG_IP_FIREWALL_USER -#define duprintf(format, args...) printk(format , ## args) -#else -#define duprintf(format, args...) -#endif - - /* Returns 1 if the port is matched by the range, 0 otherwise */ static inline bool port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert) @@ -46,7 +40,7 @@ tcp_find_option(u_int8_t option, u_int8_t _opt[60 - sizeof(struct tcphdr)]; unsigned int i; - duprintf("tcp_match: finding option\n"); + pr_debug("finding option\n"); if (!optlen) return invert; @@ -68,7 +62,8 @@ tcp_find_option(u_int8_t option, return invert; } -static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par) +static bool tcp_mt(const struct sk_buff *skb, + const struct xt_action_param *par) { const struct tcphdr *th; struct tcphdr _tcph; @@ -82,7 +77,7 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par) flag overwrite to pass the direction checks. */ if (par->fragoff == 1) { - duprintf("Dropping evil TCP offset=1 frag.\n"); + pr_debug("Dropping evil TCP offset=1 frag.\n"); *par->hotdrop = true; } /* Must not be a fragment. */ @@ -95,7 +90,7 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (th == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ - duprintf("Dropping evil TCP offset=0 tinygram.\n"); + pr_debug("Dropping evil TCP offset=0 tinygram.\n"); *par->hotdrop = true; return false; } @@ -126,7 +121,7 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool tcp_mt_check(const struct xt_mtchk_param *par) +static int tcp_mt_check(const struct xt_mtchk_param *par) { const struct xt_tcp *tcpinfo = par->matchinfo; @@ -134,7 +129,8 @@ static bool tcp_mt_check(const struct xt_mtchk_param *par) return !(tcpinfo->invflags & ~XT_TCP_INV_MASK); } -static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par) +static bool udp_mt(const struct sk_buff *skb, + const struct xt_action_param *par) { const struct udphdr *uh; struct udphdr _udph; @@ -148,7 +144,7 @@ static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (uh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ - duprintf("Dropping evil UDP tinygram.\n"); + pr_debug("Dropping evil UDP tinygram.\n"); *par->hotdrop = true; return false; } @@ -161,7 +157,7 @@ static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par) !!(udpinfo->invflags & XT_UDP_INV_DSTPT)); } -static bool udp_mt_check(const struct xt_mtchk_param *par) +static int udp_mt_check(const struct xt_mtchk_param *par) { const struct xt_udp *udpinfo = par->matchinfo; diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 93acaa59..539add45 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -153,7 +153,7 @@ static void localtime_3(struct xtm *r, time_t time) } static bool -time_mt(const struct sk_buff *skb, const struct xt_match_param *par) +time_mt(const struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_time_info *info = par->matchinfo; unsigned int packet_time; @@ -218,7 +218,7 @@ time_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool time_mt_check(const struct xt_mtchk_param *par) +static int time_mt_check(const struct xt_mtchk_param *par) { const struct xt_time_info *info = par->matchinfo; diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c index 24a52762..676e70d7 100644 --- a/net/netfilter/xt_u32.c +++ b/net/netfilter/xt_u32.c @@ -87,7 +87,8 @@ static bool u32_match_it(const struct xt_u32 *data, return true; } -static bool u32_mt(const struct sk_buff *skb, const struct xt_match_param *par) +static bool u32_mt(const struct sk_buff *skb, + const struct xt_action_param *par) { const struct xt_u32 *data = par->matchinfo; bool ret; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19e98007..5a7dcdf2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1363,7 +1363,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; - struct sk_buff *skb, *frag __maybe_unused = NULL; + struct sk_buff *skb, *data_skb; int err; if (flags&MSG_OOB) @@ -1375,45 +1375,35 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; + data_skb = skb; + #ifdef CONFIG_COMPAT_NETLINK_MESSAGES if (unlikely(skb_shinfo(skb)->frag_list)) { - bool need_compat = !!(flags & MSG_CMSG_COMPAT); - /* - * If this skb has a frag_list, then here that means that - * we will have to use the frag_list skb for compat tasks - * and the regular skb for non-compat tasks. + * If this skb has a frag_list, then here that means that we + * will have to use the frag_list skb's data for compat tasks + * and the regular skb's data for normal (non-compat) tasks. * - * The skb might (and likely will) be cloned, so we can't - * just reset frag_list and go on with things -- we need to - * keep that. For the compat case that's easy -- simply get - * a reference to the compat skb and free the regular one - * including the frag. For the non-compat case, we need to - * avoid sending the frag to the user -- so assign NULL but - * restore it below before freeing the skb. + * If we need to send the compat skb, assign it to the + * 'data_skb' variable so that it will be used below for data + * copying. We keep 'skb' for everything else, including + * freeing both later. */ - if (need_compat) { - struct sk_buff *compskb = skb_shinfo(skb)->frag_list; - skb_get(compskb); - kfree_skb(skb); - skb = compskb; - } else { - frag = skb_shinfo(skb)->frag_list; - skb_shinfo(skb)->frag_list = NULL; - } + if (flags & MSG_CMSG_COMPAT) + data_skb = skb_shinfo(skb)->frag_list; } #endif msg->msg_namelen = 0; - copied = skb->len; + copied = data_skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } - skb_reset_transport_header(skb); - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + skb_reset_transport_header(data_skb); + err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied); if (msg->msg_name) { struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; @@ -1433,11 +1423,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } siocb->scm->creds = *NETLINK_CREDS(skb); if (flags & MSG_TRUNC) - copied = skb->len; - -#ifdef CONFIG_COMPAT_NETLINK_MESSAGES - skb_shinfo(skb)->frag_list = frag; -#endif + copied = data_skb->len; skb_free_datagram(sk, skb); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 41866eb2..43b6934e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -515,7 +515,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, struct sk_filter *filter; rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); + filter = rcu_dereference_bh(sk->sk_filter); if (filter != NULL) res = sk_run_filter(skb, filter->insns, filter->len); rcu_read_unlock_bh(); @@ -767,6 +767,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; h.h2->tp_vlan_tci = skb->vlan_tci; + h.h2->tp_padding = 0; hdrlen = sizeof(*h.h2); break; default: @@ -1044,9 +1045,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } packet_increment_head(&po->tx_ring); len_sum += tp_len; - } while (likely((ph != NULL) || ((!(msg->msg_flags & MSG_DONTWAIT)) - && (atomic_read(&po->tx_ring.pending)))) - ); + } while (likely((ph != NULL) || + ((!(msg->msg_flags & MSG_DONTWAIT)) && + (atomic_read(&po->tx_ring.pending)))) + ); err = len_sum; goto out_put; @@ -1499,6 +1501,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_net = skb_network_offset(skb); aux.tp_vlan_tci = skb->vlan_tci; + aux.tp_padding = 0; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } @@ -1526,7 +1529,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, uaddr->sa_family = AF_PACKET; dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex); if (dev) { - strlcpy(uaddr->sa_data, dev->name, 15); + strncpy(uaddr->sa_data, dev->name, 14); dev_put(dev); } else memset(uaddr->sa_data, 0, 14); @@ -1549,6 +1552,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; + sll->sll_pkttype = 0; dev = dev_get_by_index(sock_net(sk), po->ifindex); if (dev) { sll->sll_hatype = dev->type; diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index f60c0c2a..519ff9d4 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -67,6 +67,8 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol) struct phonet_protocol *pnp; int err; + if (!net_eq(net, &init_net)) + return -EAFNOSUPPORT; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -353,6 +355,8 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct sockaddr_pn sa; u16 len; + if (!net_eq(net, &init_net)) + goto out; /* check we have at least a full Phonet header */ if (!pskb_pull(skb, sizeof(struct phonethdr))) goto out; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 5f32d217..4d983027 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -224,12 +224,13 @@ static void pipe_grant_credits(struct sock *sk) static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); - struct pnpipehdr *hdr = pnp_hdr(skb); + struct pnpipehdr *hdr; int wake = 0; if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) return -EINVAL; + hdr = pnp_hdr(skb); if (hdr->data[0] != PN_PEP_TYPE_COMMON) { LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n", (unsigned)hdr->data[0]); @@ -716,8 +717,8 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) return -EINVAL; lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) - && !skb_queue_empty(&pn->ctrlreq_queue)) + if (sock_flag(sk, SOCK_URGINLINE) && + !skb_queue_empty(&pn->ctrlreq_queue)) answ = skb_peek(&pn->ctrlreq_queue)->len; else if (!skb_queue_empty(&sk->sk_receive_queue)) answ = skb_peek(&sk->sk_receive_queue)->len; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 5f42f30d..d94ca913 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -225,6 +225,9 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what, { struct net_device *dev = arg; + if (!net_eq(dev_net(dev), &init_net)) + return 0; + switch (what) { case NETDEV_REGISTER: if (dev->type == ARPHRD_PHONET) @@ -246,7 +249,11 @@ static struct notifier_block phonet_device_notifier = { /* Per-namespace Phonet devices handling */ static int phonet_init_net(struct net *net) { - struct phonet_net *pnn = kmalloc(sizeof(*pnn), GFP_KERNEL); + struct phonet_net *pnn; + + if (!net_eq(net, &init_net)) + return 0; + pnn = kmalloc(sizeof(*pnn), GFP_KERNEL); if (!pnn) return -ENOMEM; @@ -263,9 +270,13 @@ static int phonet_init_net(struct net *net) static void phonet_exit_net(struct net *net) { - struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_net *pnn; struct net_device *dev; + if (!net_eq(net, &init_net)) + return; + pnn = net_generic(net, phonet_net_id); + rtnl_lock(); for_each_netdev(net, dev) phonet_device_destroy(dev); diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index d21fd357..7acab1ea 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -68,6 +68,8 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) int err; u8 pnaddr; + if (!net_eq(net, &init_net)) + return -EOPNOTSUPP; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -124,12 +126,16 @@ nla_put_failure: static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct phonet_device_list *pndevs; struct phonet_device *pnd; int dev_idx = 0, dev_start_idx = cb->args[0]; int addr_idx = 0, addr_start_idx = cb->args[1]; - pndevs = phonet_device_list(sock_net(skb->sk)); + if (!net_eq(net, &init_net)) + goto skip; + + pndevs = phonet_device_list(net); spin_lock_bh(&pndevs->lock); list_for_each_entry(pnd, &pndevs->list, list) { u8 addr; @@ -154,6 +160,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) out: spin_unlock_bh(&pndevs->lock); +skip: cb->args[0] = dev_idx; cb->args[1] = addr_idx; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index aa5b5a97..d6224a6d 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -82,8 +82,8 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) if (pn->resource != res) continue; } - if (pn_addr(pn->sobject) - && pn_addr(pn->sobject) != pn_addr(obj)) + if (pn_addr(pn->sobject) && + pn_addr(pn->sobject) != pn_addr(obj)) continue; rval = sknode; diff --git a/net/socket.c b/net/socket.c index ed84099d..caff26a3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1687,6 +1687,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, struct iovec iov; int fput_needed; + if (len > INT_MAX) + len = INT_MAX; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1744,6 +1746,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, int err, err2; int fput_needed; + if (size > INT_MAX) + size = INT_MAX; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 19c17e4a..cb72e91f 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -97,7 +97,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; -static struct sock *unix_get_socket(struct file *filp) +struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; struct inode *inode = filp->f_path.dentry->d_inode; @@ -269,9 +269,16 @@ static void inc_inflight_move_tail(struct unix_sock *u) } static bool gc_in_progress = false; +#define UNIX_INFLIGHT_TRIGGER_GC 16000 void wait_for_unix_gc(void) { + /* + * If number of inflight sockets is insane, + * force a garbage collect right now. + */ + if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); }