mirror of
https://github.com/xcat2/xNBA.git
synced 2025-01-05 19:15:05 +00:00
Merge branch '3leaf'
This commit is contained in:
commit
1620b3512c
@ -152,6 +152,7 @@ SRCDIRS += drivers/scsi
|
||||
SRCDIRS += drivers/ata
|
||||
SRCDIRS += drivers/nvs
|
||||
SRCDIRS += drivers/bitbash
|
||||
SRCDIRS += drivers/infiniband
|
||||
SRCDIRS += interface/pxe
|
||||
SRCDIRS += tests
|
||||
SRCDIRS += crypto crypto/axtls crypto/matrixssl
|
||||
|
3460
src/drivers/infiniband/MT25218_PRM.h
Normal file
3460
src/drivers/infiniband/MT25218_PRM.h
Normal file
File diff suppressed because it is too large
Load Diff
2129
src/drivers/infiniband/arbel.c
Normal file
2129
src/drivers/infiniband/arbel.c
Normal file
File diff suppressed because it is too large
Load Diff
461
src/drivers/infiniband/arbel.h
Normal file
461
src/drivers/infiniband/arbel.h
Normal file
@ -0,0 +1,461 @@
|
||||
#ifndef _ARBEL_H
|
||||
#define _ARBEL_H
|
||||
|
||||
/** @file
|
||||
*
|
||||
* Mellanox Arbel Infiniband HCA driver
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <gpxe/uaccess.h>
|
||||
#include "mlx_bitops.h"
|
||||
#include "MT25218_PRM.h"
|
||||
|
||||
/*
|
||||
* Hardware constants
|
||||
*
|
||||
*/
|
||||
|
||||
/* PCI BARs */
|
||||
#define ARBEL_PCI_CONFIG_BAR PCI_BASE_ADDRESS_0
|
||||
#define ARBEL_PCI_CONFIG_BAR_SIZE 0x100000
|
||||
#define ARBEL_PCI_UAR_BAR PCI_BASE_ADDRESS_2
|
||||
#define ARBEL_PCI_UAR_IDX 1
|
||||
#define ARBEL_PCI_UAR_SIZE 0x1000
|
||||
|
||||
/* UAR context table (UCE) resource types */
|
||||
#define ARBEL_UAR_RES_NONE 0x00
|
||||
#define ARBEL_UAR_RES_CQ_CI 0x01
|
||||
#define ARBEL_UAR_RES_CQ_ARM 0x02
|
||||
#define ARBEL_UAR_RES_SQ 0x03
|
||||
#define ARBEL_UAR_RES_RQ 0x04
|
||||
#define ARBEL_UAR_RES_GROUP_SEP 0x07
|
||||
|
||||
/* Work queue entry and completion queue entry opcodes */
|
||||
#define ARBEL_OPCODE_SEND 0x0a
|
||||
#define ARBEL_OPCODE_RECV_ERROR 0xfe
|
||||
#define ARBEL_OPCODE_SEND_ERROR 0xff
|
||||
|
||||
/* HCA command register opcodes */
|
||||
#define ARBEL_HCR_QUERY_DEV_LIM 0x0003
|
||||
#define ARBEL_HCR_QUERY_FW 0x0004
|
||||
#define ARBEL_HCR_INIT_HCA 0x0007
|
||||
#define ARBEL_HCR_CLOSE_HCA 0x0008
|
||||
#define ARBEL_HCR_INIT_IB 0x0009
|
||||
#define ARBEL_HCR_CLOSE_IB 0x000a
|
||||
#define ARBEL_HCR_SW2HW_MPT 0x000d
|
||||
#define ARBEL_HCR_MAP_EQ 0x0012
|
||||
#define ARBEL_HCR_SW2HW_EQ 0x0013
|
||||
#define ARBEL_HCR_HW2SW_EQ 0x0014
|
||||
#define ARBEL_HCR_SW2HW_CQ 0x0016
|
||||
#define ARBEL_HCR_HW2SW_CQ 0x0017
|
||||
#define ARBEL_HCR_RST2INIT_QPEE 0x0019
|
||||
#define ARBEL_HCR_INIT2RTR_QPEE 0x001a
|
||||
#define ARBEL_HCR_RTR2RTS_QPEE 0x001b
|
||||
#define ARBEL_HCR_2RST_QPEE 0x0021
|
||||
#define ARBEL_HCR_MAD_IFC 0x0024
|
||||
#define ARBEL_HCR_READ_MGM 0x0025
|
||||
#define ARBEL_HCR_WRITE_MGM 0x0026
|
||||
#define ARBEL_HCR_MGID_HASH 0x0027
|
||||
#define ARBEL_HCR_RUN_FW 0x0ff6
|
||||
#define ARBEL_HCR_DISABLE_LAM 0x0ff7
|
||||
#define ARBEL_HCR_ENABLE_LAM 0x0ff8
|
||||
#define ARBEL_HCR_UNMAP_ICM 0x0ff9
|
||||
#define ARBEL_HCR_MAP_ICM 0x0ffa
|
||||
#define ARBEL_HCR_UNMAP_ICM_AUX 0x0ffb
|
||||
#define ARBEL_HCR_MAP_ICM_AUX 0x0ffc
|
||||
#define ARBEL_HCR_SET_ICM_SIZE 0x0ffd
|
||||
#define ARBEL_HCR_UNMAP_FA 0x0ffe
|
||||
#define ARBEL_HCR_MAP_FA 0x0fff
|
||||
|
||||
/* Service types */
|
||||
#define ARBEL_ST_UD 0x03
|
||||
|
||||
/* MTUs */
|
||||
#define ARBEL_MTU_2048 0x04
|
||||
|
||||
#define ARBEL_NO_EQ 64
|
||||
|
||||
#define ARBEL_INVALID_LKEY 0x00000100UL
|
||||
|
||||
#define ARBEL_PAGE_SIZE 4096
|
||||
|
||||
#define ARBEL_DB_POST_SND_OFFSET 0x10
|
||||
|
||||
/*
|
||||
* Datatypes that seem to be missing from the autogenerated documentation
|
||||
*
|
||||
*/
|
||||
struct arbelprm_mgm_hash_st {
|
||||
pseudo_bit_t reserved0[0x00020];
|
||||
/* -------------- */
|
||||
pseudo_bit_t hash[0x00010];
|
||||
pseudo_bit_t reserved1[0x00010];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
struct arbelprm_scalar_parameter_st {
|
||||
pseudo_bit_t reserved0[0x00020];
|
||||
/* -------------- */
|
||||
pseudo_bit_t value[0x00020];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
/*
|
||||
* Wrapper structures for hardware datatypes
|
||||
*
|
||||
*/
|
||||
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_access_lam );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_context );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_eqc );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_init_hca );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_init_ib );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_mad_ifc );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_mpt );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_qp_ee_state_transitions );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_query_fw );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_scalar_parameter );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_virtual_physical_mapping );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ctrl_send );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_data_ptr );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_next );
|
||||
struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud );
|
||||
|
||||
/*
|
||||
* Composite hardware datatypes
|
||||
*
|
||||
*/
|
||||
|
||||
#define ARBEL_MAX_GATHER 1
|
||||
|
||||
struct arbelprm_ud_send_wqe {
|
||||
struct arbelprm_wqe_segment_next next;
|
||||
struct arbelprm_wqe_segment_ctrl_send ctrl;
|
||||
struct arbelprm_wqe_segment_ud ud;
|
||||
struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_GATHER];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
#define ARBEL_MAX_SCATTER 1
|
||||
|
||||
struct arbelprm_recv_wqe {
|
||||
/* The autogenerated header is inconsistent between send and
|
||||
* receive WQEs. The "ctrl" structure for receive WQEs is
|
||||
* defined to include the "next" structure. Since the "ctrl"
|
||||
* part of the "ctrl" structure contains only "reserved, must
|
||||
* be zero" bits, we ignore its definition and provide
|
||||
* something more usable.
|
||||
*/
|
||||
struct arbelprm_recv_wqe_segment_next next;
|
||||
uint32_t ctrl[2]; /* All "reserved, must be zero" */
|
||||
struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_SCATTER];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
union arbelprm_completion_entry {
|
||||
struct arbelprm_completion_queue_entry normal;
|
||||
struct arbelprm_completion_with_error error;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
union arbelprm_doorbell_record {
|
||||
struct arbelprm_cq_arm_db_record cq_arm;
|
||||
struct arbelprm_cq_ci_db_record cq_ci;
|
||||
struct arbelprm_qp_db_record qp;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
union arbelprm_doorbell_register {
|
||||
struct arbelprm_send_doorbell send;
|
||||
uint32_t dword[2];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
union arbelprm_mad {
|
||||
struct arbelprm_mad_ifc ifc;
|
||||
union ib_mad mad;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
/*
|
||||
* gPXE-specific definitions
|
||||
*
|
||||
*/
|
||||
|
||||
/** Arbel device limits */
|
||||
struct arbel_dev_limits {
|
||||
/** Number of reserved QPs */
|
||||
unsigned int reserved_qps;
|
||||
/** QP context entry size */
|
||||
size_t qpc_entry_size;
|
||||
/** Extended QP context entry size */
|
||||
size_t eqpc_entry_size;
|
||||
/** Number of reserved SRQs */
|
||||
unsigned int reserved_srqs;
|
||||
/** SRQ context entry size */
|
||||
size_t srqc_entry_size;
|
||||
/** Number of reserved EEs */
|
||||
unsigned int reserved_ees;
|
||||
/** EE context entry size */
|
||||
size_t eec_entry_size;
|
||||
/** Extended EE context entry size */
|
||||
size_t eeec_entry_size;
|
||||
/** Number of reserved CQs */
|
||||
unsigned int reserved_cqs;
|
||||
/** CQ context entry size */
|
||||
size_t cqc_entry_size;
|
||||
/** Number of reserved MTTs */
|
||||
unsigned int reserved_mtts;
|
||||
/** MTT entry size */
|
||||
size_t mtt_entry_size;
|
||||
/** Number of reserved MRWs */
|
||||
unsigned int reserved_mrws;
|
||||
/** MPT entry size */
|
||||
size_t mpt_entry_size;
|
||||
/** Number of reserved RDBs */
|
||||
unsigned int reserved_rdbs;
|
||||
/** EQ context entry size */
|
||||
size_t eqc_entry_size;
|
||||
/** Number of reserved UARs */
|
||||
unsigned int reserved_uars;
|
||||
};
|
||||
|
||||
/** Alignment of Arbel send work queue entries */
|
||||
#define ARBEL_SEND_WQE_ALIGN 128
|
||||
|
||||
/** An Arbel send work queue entry */
|
||||
union arbel_send_wqe {
|
||||
struct arbelprm_ud_send_wqe ud;
|
||||
uint8_t force_align[ARBEL_SEND_WQE_ALIGN];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
/** An Arbel send work queue */
|
||||
struct arbel_send_work_queue {
|
||||
/** Doorbell record number */
|
||||
unsigned int doorbell_idx;
|
||||
/** Work queue entries */
|
||||
union arbel_send_wqe *wqe;
|
||||
/** Size of work queue */
|
||||
size_t wqe_size;
|
||||
};
|
||||
|
||||
/** Alignment of Arbel receive work queue entries */
|
||||
#define ARBEL_RECV_WQE_ALIGN 64
|
||||
|
||||
/** An Arbel receive work queue entry */
|
||||
union arbel_recv_wqe {
|
||||
struct arbelprm_recv_wqe recv;
|
||||
uint8_t force_align[ARBEL_RECV_WQE_ALIGN];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
/** An Arbel receive work queue */
|
||||
struct arbel_recv_work_queue {
|
||||
/** Doorbell record number */
|
||||
unsigned int doorbell_idx;
|
||||
/** Work queue entries */
|
||||
union arbel_recv_wqe *wqe;
|
||||
/** Size of work queue */
|
||||
size_t wqe_size;
|
||||
};
|
||||
|
||||
/** Maximum number of allocatable queue pairs
|
||||
*
|
||||
* This is a policy decision, not a device limit.
|
||||
*/
|
||||
#define ARBEL_MAX_QPS 8
|
||||
|
||||
/** Base queue pair number */
|
||||
#define ARBEL_QPN_BASE 0x550000
|
||||
|
||||
/** An Arbel queue pair */
|
||||
struct arbel_queue_pair {
|
||||
/** Send work queue */
|
||||
struct arbel_send_work_queue send;
|
||||
/** Receive work queue */
|
||||
struct arbel_recv_work_queue recv;
|
||||
};
|
||||
|
||||
/** Maximum number of allocatable completion queues
|
||||
*
|
||||
* This is a policy decision, not a device limit.
|
||||
*/
|
||||
#define ARBEL_MAX_CQS 8
|
||||
|
||||
/** An Arbel completion queue */
|
||||
struct arbel_completion_queue {
|
||||
/** Consumer counter doorbell record number */
|
||||
unsigned int ci_doorbell_idx;
|
||||
/** Arm queue doorbell record number */
|
||||
unsigned int arm_doorbell_idx;
|
||||
/** Completion queue entries */
|
||||
union arbelprm_completion_entry *cqe;
|
||||
/** Size of completion queue */
|
||||
size_t cqe_size;
|
||||
};
|
||||
|
||||
/** An Arbel resource bitmask */
|
||||
typedef uint32_t arbel_bitmask_t;
|
||||
|
||||
/** Size of an Arbel resource bitmask */
|
||||
#define ARBEL_BITMASK_SIZE(max_entries) \
|
||||
( ( (max_entries) + ( 8 * sizeof ( arbel_bitmask_t ) ) - 1 ) / \
|
||||
( 8 * sizeof ( arbel_bitmask_t ) ) )
|
||||
|
||||
/** An Arbel device */
|
||||
struct arbel {
|
||||
/** PCI configuration registers */
|
||||
void *config;
|
||||
/** PCI user Access Region */
|
||||
void *uar;
|
||||
|
||||
/** Command input mailbox */
|
||||
void *mailbox_in;
|
||||
/** Command output mailbox */
|
||||
void *mailbox_out;
|
||||
|
||||
/** Firmware area in external memory */
|
||||
userptr_t firmware_area;
|
||||
/** ICM size */
|
||||
size_t icm_len;
|
||||
/** ICM AUX size */
|
||||
size_t icm_aux_len;
|
||||
/** ICM area */
|
||||
userptr_t icm;
|
||||
|
||||
/** Doorbell records */
|
||||
union arbelprm_doorbell_record *db_rec;
|
||||
/** Reserved LKey
|
||||
*
|
||||
* Used to get unrestricted memory access.
|
||||
*/
|
||||
unsigned long reserved_lkey;
|
||||
|
||||
/** Completion queue in-use bitmask */
|
||||
arbel_bitmask_t cq_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_CQS ) ];
|
||||
/** Queue pair in-use bitmask */
|
||||
arbel_bitmask_t qp_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_QPS ) ];
|
||||
|
||||
/** Device limits */
|
||||
struct arbel_dev_limits limits;
|
||||
};
|
||||
|
||||
/** Global protection domain */
|
||||
#define ARBEL_GLOBAL_PD 0x123456
|
||||
|
||||
/** Memory key prefix */
|
||||
#define ARBEL_MKEY_PREFIX 0x77000000UL
|
||||
|
||||
/*
|
||||
* HCA commands
|
||||
*
|
||||
*/
|
||||
|
||||
#define ARBEL_HCR_BASE 0x80680
|
||||
#define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) )
|
||||
#define ARBEL_HCR_MAX_WAIT_MS 2000
|
||||
#define ARBEL_MBOX_ALIGN 4096
|
||||
#define ARBEL_MBOX_SIZE 512
|
||||
|
||||
/* HCA command is split into
|
||||
*
|
||||
* bits 11:0 Opcode
|
||||
* bit 12 Input uses mailbox
|
||||
* bit 13 Output uses mailbox
|
||||
* bits 22:14 Input parameter length (in dwords)
|
||||
* bits 31:23 Output parameter length (in dwords)
|
||||
*
|
||||
* Encoding the information in this way allows us to cut out several
|
||||
* parameters to the arbel_command() call.
|
||||
*/
|
||||
#define ARBEL_HCR_IN_MBOX 0x00001000UL
|
||||
#define ARBEL_HCR_OUT_MBOX 0x00002000UL
|
||||
#define ARBEL_HCR_OPCODE( _command ) ( (_command) & 0xfff )
|
||||
#define ARBEL_HCR_IN_LEN( _command ) ( ( (_command) >> 12 ) & 0x7fc )
|
||||
#define ARBEL_HCR_OUT_LEN( _command ) ( ( (_command) >> 21 ) & 0x7fc )
|
||||
|
||||
/** Build HCR command from component parts */
|
||||
#define ARBEL_HCR_INOUT_CMD( _opcode, _in_mbox, _in_len, \
|
||||
_out_mbox, _out_len ) \
|
||||
( (_opcode) | \
|
||||
( (_in_mbox) ? ARBEL_HCR_IN_MBOX : 0 ) | \
|
||||
( ( (_in_len) / 4 ) << 14 ) | \
|
||||
( (_out_mbox) ? ARBEL_HCR_OUT_MBOX : 0 ) | \
|
||||
( ( (_out_len) / 4 ) << 23 ) )
|
||||
|
||||
#define ARBEL_HCR_IN_CMD( _opcode, _in_mbox, _in_len ) \
|
||||
ARBEL_HCR_INOUT_CMD ( _opcode, _in_mbox, _in_len, 0, 0 )
|
||||
|
||||
#define ARBEL_HCR_OUT_CMD( _opcode, _out_mbox, _out_len ) \
|
||||
ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, _out_mbox, _out_len )
|
||||
|
||||
#define ARBEL_HCR_VOID_CMD( _opcode ) \
|
||||
ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, 0, 0 )
|
||||
|
||||
/*
|
||||
* Doorbell record allocation
|
||||
*
|
||||
* The doorbell record map looks like:
|
||||
*
|
||||
* ARBEL_MAX_CQS * Arm completion queue doorbell
|
||||
* ARBEL_MAX_QPS * Send work request doorbell
|
||||
* Group separator
|
||||
* ...(empty space)...
|
||||
* ARBEL_MAX_QPS * Receive work request doorbell
|
||||
* ARBEL_MAX_CQS * Completion queue consumer counter update doorbell
|
||||
*/
|
||||
|
||||
#define ARBEL_MAX_DOORBELL_RECORDS 512
|
||||
#define ARBEL_GROUP_SEPARATOR_DOORBELL ( ARBEL_MAX_CQS + ARBEL_MAX_QPS )
|
||||
|
||||
/**
|
||||
* Get arm completion queue doorbell index
|
||||
*
|
||||
* @v cqn_offset Completion queue number offset
|
||||
* @ret doorbell_idx Doorbell index
|
||||
*/
|
||||
static inline unsigned int
|
||||
arbel_cq_arm_doorbell_idx ( unsigned int cqn_offset ) {
|
||||
return cqn_offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get send work request doorbell index
|
||||
*
|
||||
* @v qpn_offset Queue pair number offset
|
||||
* @ret doorbell_idx Doorbell index
|
||||
*/
|
||||
static inline unsigned int
|
||||
arbel_send_doorbell_idx ( unsigned int qpn_offset ) {
|
||||
return ( ARBEL_MAX_CQS + qpn_offset );
|
||||
}
|
||||
|
||||
/**
|
||||
* Get receive work request doorbell index
|
||||
*
|
||||
* @v qpn_offset Queue pair number offset
|
||||
* @ret doorbell_idx Doorbell index
|
||||
*/
|
||||
static inline unsigned int
|
||||
arbel_recv_doorbell_idx ( unsigned int qpn_offset ) {
|
||||
return ( ARBEL_MAX_DOORBELL_RECORDS - ARBEL_MAX_CQS - qpn_offset - 1 );
|
||||
}
|
||||
|
||||
/**
|
||||
* Get completion queue consumer counter doorbell index
|
||||
*
|
||||
* @v cqn_offset Completion queue number offset
|
||||
* @ret doorbell_idx Doorbell index
|
||||
*/
|
||||
static inline unsigned int
|
||||
arbel_cq_ci_doorbell_idx ( unsigned int cqn_offset ) {
|
||||
return ( ARBEL_MAX_DOORBELL_RECORDS - cqn_offset - 1 );
|
||||
}
|
||||
|
||||
#endif /* _ARBEL_H */
|
209
src/drivers/infiniband/mlx_bitops.h
Normal file
209
src/drivers/infiniband/mlx_bitops.h
Normal file
@ -0,0 +1,209 @@
|
||||
#ifndef _MLX_BITOPS_H
|
||||
#define _MLX_BITOPS_H
|
||||
|
||||
/*
|
||||
* Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Mellanox bit operations
|
||||
*
|
||||
*/
|
||||
|
||||
/* Datatype used to represent a bit in the Mellanox autogenerated headers */
|
||||
typedef unsigned char pseudo_bit_t;
|
||||
|
||||
/**
|
||||
* Wrapper structure for pseudo_bit_t structures
|
||||
*
|
||||
* This structure provides a wrapper around the autogenerated
|
||||
* pseudo_bit_t structures. It has the correct size, and also
|
||||
* encapsulates type information about the underlying pseudo_bit_t
|
||||
* structure, which allows the MLX_FILL etc. macros to work without
|
||||
* requiring explicit type information.
|
||||
*/
|
||||
#define MLX_DECLARE_STRUCT( _structure ) \
|
||||
_structure { \
|
||||
union { \
|
||||
uint8_t bytes[ sizeof ( struct _structure ## _st ) / 8 ]; \
|
||||
uint32_t dwords[ sizeof ( struct _structure ## _st ) / 32 ]; \
|
||||
struct _structure ## _st *dummy[0]; \
|
||||
} u; \
|
||||
}
|
||||
|
||||
/** Get pseudo_bit_t structure type from wrapper structure pointer */
|
||||
#define MLX_PSEUDO_STRUCT( _ptr ) \
|
||||
typeof ( *((_ptr)->u.dummy[0]) )
|
||||
|
||||
/** Bit offset of a field within a pseudo_bit_t structure */
|
||||
#define MLX_BIT_OFFSET( _structure_st, _field ) \
|
||||
offsetof ( _structure_st, _field )
|
||||
|
||||
/** Dword offset of a field within a pseudo_bit_t structure */
|
||||
#define MLX_DWORD_OFFSET( _structure_st, _field ) \
|
||||
( MLX_BIT_OFFSET ( _structure_st, _field ) / 32 )
|
||||
|
||||
/** Dword bit offset of a field within a pseudo_bit_t structure
|
||||
*
|
||||
* Yes, using mod-32 would work, but would lose the check for the
|
||||
* error of specifying a mismatched field name and dword index.
|
||||
*/
|
||||
#define MLX_DWORD_BIT_OFFSET( _structure_st, _index, _field ) \
|
||||
( MLX_BIT_OFFSET ( _structure_st, _field ) - ( 32 * (_index) ) )
|
||||
|
||||
/** Bit width of a field within a pseudo_bit_t structure */
|
||||
#define MLX_BIT_WIDTH( _structure_st, _field ) \
|
||||
sizeof ( ( ( _structure_st * ) NULL )->_field )
|
||||
|
||||
/** Bit mask for a field within a pseudo_bit_t structure */
|
||||
#define MLX_BIT_MASK( _structure_st, _field ) \
|
||||
( ( ~( ( uint32_t ) 0 ) ) >> \
|
||||
( 32 - MLX_BIT_WIDTH ( _structure_st, _field ) ) )
|
||||
|
||||
/*
|
||||
* Assemble native-endian dword from named fields and values
|
||||
*
|
||||
*/
|
||||
|
||||
#define MLX_ASSEMBLE_1( _structure_st, _index, _field, _value ) \
|
||||
( (_value) << MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) )
|
||||
|
||||
#define MLX_ASSEMBLE_2( _structure_st, _index, _field, _value, ... ) \
|
||||
( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \
|
||||
MLX_ASSEMBLE_1 ( _structure_st, _index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_ASSEMBLE_3( _structure_st, _index, _field, _value, ... ) \
|
||||
( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \
|
||||
MLX_ASSEMBLE_2 ( _structure_st, _index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_ASSEMBLE_4( _structure_st, _index, _field, _value, ... ) \
|
||||
( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \
|
||||
MLX_ASSEMBLE_3 ( _structure_st, _index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_ASSEMBLE_5( _structure_st, _index, _field, _value, ... ) \
|
||||
( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \
|
||||
MLX_ASSEMBLE_4 ( _structure_st, _index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_ASSEMBLE_6( _structure_st, _index, _field, _value, ... ) \
|
||||
( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \
|
||||
MLX_ASSEMBLE_5 ( _structure_st, _index, __VA_ARGS__ ) )
|
||||
|
||||
/*
|
||||
* Build native-endian (positive) dword bitmasks from named fields
|
||||
*
|
||||
*/
|
||||
|
||||
#define MLX_MASK_1( _structure_st, _index, _field ) \
|
||||
( MLX_BIT_MASK ( _structure_st, _field ) << \
|
||||
MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) )
|
||||
|
||||
#define MLX_MASK_2( _structure_st, _index, _field, ... ) \
|
||||
( MLX_MASK_1 ( _structure_st, _index, _field ) | \
|
||||
MLX_MASK_1 ( _structure_st, _index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_MASK_3( _structure_st, _index, _field, ... ) \
|
||||
( MLX_MASK_1 ( _structure_st, _index, _field ) | \
|
||||
MLX_MASK_2 ( _structure_st, _index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_MASK_4( _structure_st, _index, _field, ... ) \
|
||||
( MLX_MASK_1 ( _structure_st, _index, _field ) | \
|
||||
MLX_MASK_3 ( _structure_st, _index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_MASK_5( _structure_st, _index, _field, ... ) \
|
||||
( MLX_MASK_1 ( _structure_st, _index, _field ) | \
|
||||
MLX_MASK_4 ( _structure_st, _index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_MASK_6( _structure_st, _index, _field, ... ) \
|
||||
( MLX_MASK_1 ( _structure_st, _index, _field ) | \
|
||||
MLX_MASK_5 ( _structure_st, _index, __VA_ARGS__ ) )
|
||||
|
||||
/*
|
||||
* Populate big-endian dwords from named fields and values
|
||||
*
|
||||
*/
|
||||
|
||||
#define MLX_FILL( _ptr, _index, _assembled ) \
|
||||
do { \
|
||||
uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \
|
||||
uint32_t __assembled = (_assembled); \
|
||||
*__ptr = cpu_to_be32 ( __assembled ); \
|
||||
} while ( 0 )
|
||||
|
||||
#define MLX_FILL_1( _ptr, _index, ... ) \
|
||||
MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ),\
|
||||
_index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_FILL_2( _ptr, _index, ... ) \
|
||||
MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_2 ( MLX_PSEUDO_STRUCT ( _ptr ),\
|
||||
_index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_FILL_3( _ptr, _index, ... ) \
|
||||
MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_3 ( MLX_PSEUDO_STRUCT ( _ptr ),\
|
||||
_index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_FILL_4( _ptr, _index, ... ) \
|
||||
MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ),\
|
||||
_index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_FILL_5( _ptr, _index, ... ) \
|
||||
MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_5 ( MLX_PSEUDO_STRUCT ( _ptr ),\
|
||||
_index, __VA_ARGS__ ) )
|
||||
|
||||
#define MLX_FILL_6( _ptr, _index, ... ) \
|
||||
MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_6 ( MLX_PSEUDO_STRUCT ( _ptr ),\
|
||||
_index, __VA_ARGS__ ) )
|
||||
|
||||
/*
|
||||
* Modify big-endian dword using named field and value
|
||||
*
|
||||
*/
|
||||
|
||||
#define MLX_SET( _ptr, _field, _value ) \
|
||||
do { \
|
||||
unsigned int __index = \
|
||||
MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \
|
||||
uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \
|
||||
uint32_t __value = be32_to_cpu ( *__ptr ); \
|
||||
__value &= ~( MLX_MASK_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \
|
||||
__index, _field ) ); \
|
||||
__value |= MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \
|
||||
__index, _field, _value ); \
|
||||
*__ptr = cpu_to_be32 ( __value ); \
|
||||
} while ( 0 )
|
||||
|
||||
/*
|
||||
* Extract value of named field
|
||||
*
|
||||
*/
|
||||
|
||||
#define MLX_GET( _ptr, _field ) \
|
||||
( { \
|
||||
unsigned int __index = \
|
||||
MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \
|
||||
uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \
|
||||
uint32_t __value = be32_to_cpu ( *__ptr ); \
|
||||
__value >>= \
|
||||
MLX_DWORD_BIT_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), \
|
||||
__index, _field ); \
|
||||
__value &= \
|
||||
MLX_BIT_MASK ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \
|
||||
__value; \
|
||||
} )
|
||||
|
||||
#endif /* _MLX_BITOPS_H */
|
930
src/drivers/net/ipoib.c
Normal file
930
src/drivers/net/ipoib.c
Normal file
@ -0,0 +1,930 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <byteswap.h>
|
||||
#include <errno.h>
|
||||
#include "timer.h"
|
||||
#include <gpxe/if_arp.h>
|
||||
#include <gpxe/iobuf.h>
|
||||
#include <gpxe/netdevice.h>
|
||||
#include <gpxe/infiniband.h>
|
||||
#include <gpxe/ipoib.h>
|
||||
|
||||
/** @file
|
||||
*
|
||||
* IP over Infiniband
|
||||
*/
|
||||
|
||||
/** IPoIB MTU */
|
||||
#define IPOIB_MTU 2048
|
||||
|
||||
/** Number of IPoIB data send work queue entries */
|
||||
#define IPOIB_DATA_NUM_SEND_WQES 2
|
||||
|
||||
/** Number of IPoIB data receive work queue entries */
|
||||
#define IPOIB_DATA_NUM_RECV_WQES 4
|
||||
|
||||
/** Number of IPoIB data completion entries */
|
||||
#define IPOIB_DATA_NUM_CQES 8
|
||||
|
||||
/** Number of IPoIB metadata send work queue entries */
|
||||
#define IPOIB_META_NUM_SEND_WQES 2
|
||||
|
||||
/** Number of IPoIB metadata receive work queue entries */
|
||||
#define IPOIB_META_NUM_RECV_WQES 2
|
||||
|
||||
/** Number of IPoIB metadata completion entries */
|
||||
#define IPOIB_META_NUM_CQES 8
|
||||
|
||||
/** An IPoIB queue set */
|
||||
struct ipoib_queue_set {
|
||||
/** Completion queue */
|
||||
struct ib_completion_queue *cq;
|
||||
/** Queue pair */
|
||||
struct ib_queue_pair *qp;
|
||||
/** Receive work queue fill level */
|
||||
unsigned int recv_fill;
|
||||
/** Receive work queue maximum fill level */
|
||||
unsigned int recv_max_fill;
|
||||
};
|
||||
|
||||
/** An IPoIB device */
|
||||
struct ipoib_device {
|
||||
/** Network device */
|
||||
struct net_device *netdev;
|
||||
/** Underlying Infiniband device */
|
||||
struct ib_device *ibdev;
|
||||
/** Data queue set */
|
||||
struct ipoib_queue_set data;
|
||||
/** Data queue set */
|
||||
struct ipoib_queue_set meta;
|
||||
/** Broadcast GID */
|
||||
struct ib_gid broadcast_gid;
|
||||
/** Broadcast LID */
|
||||
unsigned int broadcast_lid;
|
||||
/** Joined to broadcast group */
|
||||
int broadcast_joined;
|
||||
/** Data queue key */
|
||||
unsigned long data_qkey;
|
||||
};
|
||||
|
||||
/**
|
||||
* IPoIB path cache entry
|
||||
*
|
||||
* This serves a similar role to the ARP cache for Ethernet. (ARP
|
||||
* *is* used on IPoIB; we have two caches to maintain.)
|
||||
*/
|
||||
struct ipoib_cached_path {
|
||||
/** Destination GID */
|
||||
struct ib_gid gid;
|
||||
/** Destination LID */
|
||||
unsigned int dlid;
|
||||
/** Service level */
|
||||
unsigned int sl;
|
||||
/** Rate */
|
||||
unsigned int rate;
|
||||
};
|
||||
|
||||
/** Number of IPoIB path cache entries */
|
||||
#define IPOIB_NUM_CACHED_PATHS 2
|
||||
|
||||
/** IPoIB path cache */
|
||||
static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS];
|
||||
|
||||
/** Oldest IPoIB path cache entry index */
|
||||
static unsigned int ipoib_path_cache_idx = 0;
|
||||
|
||||
/** TID half used to identify get path record replies */
|
||||
#define IPOIB_TID_GET_PATH_REC 0x11111111UL
|
||||
|
||||
/** TID half used to identify multicast member record replies */
|
||||
#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
|
||||
|
||||
/** IPoIB metadata TID */
|
||||
static uint32_t ipoib_meta_tid = 0;
|
||||
|
||||
/** IPv4 broadcast GID */
|
||||
static const struct ib_gid ipv4_broadcast_gid = {
|
||||
{ { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }
|
||||
};
|
||||
|
||||
/** Maximum time we will wait for the broadcast join to succeed */
|
||||
#define IPOIB_JOIN_MAX_DELAY_MS 1000
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* IPoIB link layer
|
||||
*
|
||||
****************************************************************************
|
||||
*/
|
||||
|
||||
/** Broadcast QPN used in IPoIB MAC addresses
|
||||
*
|
||||
* This is a guaranteed invalid real QPN
|
||||
*/
|
||||
#define IPOIB_BROADCAST_QPN 0xffffffffUL
|
||||
|
||||
/** Broadcast IPoIB address */
|
||||
static struct ipoib_mac ipoib_broadcast = {
|
||||
.qpn = ntohl ( IPOIB_BROADCAST_QPN ),
|
||||
};
|
||||
|
||||
/**
|
||||
* Transmit IPoIB packet
|
||||
*
|
||||
* @v iobuf I/O buffer
|
||||
* @v netdev Network device
|
||||
* @v net_protocol Network-layer protocol
|
||||
* @v ll_dest Link-layer destination address
|
||||
*
|
||||
* Prepends the IPoIB link-layer header and transmits the packet.
|
||||
*/
|
||||
static int ipoib_tx ( struct io_buffer *iobuf, struct net_device *netdev,
|
||||
struct net_protocol *net_protocol,
|
||||
const void *ll_dest ) {
|
||||
struct ipoib_hdr *ipoib_hdr =
|
||||
iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
|
||||
|
||||
/* Build IPoIB header */
|
||||
memcpy ( &ipoib_hdr->pseudo.peer, ll_dest,
|
||||
sizeof ( ipoib_hdr->pseudo.peer ) );
|
||||
ipoib_hdr->real.proto = net_protocol->net_proto;
|
||||
ipoib_hdr->real.reserved = 0;
|
||||
|
||||
/* Hand off to network device */
|
||||
return netdev_tx ( netdev, iobuf );
|
||||
}
|
||||
|
||||
/**
|
||||
* Process received IPoIB packet
|
||||
*
|
||||
* @v iobuf I/O buffer
|
||||
* @v netdev Network device
|
||||
*
|
||||
* Strips off the IPoIB link-layer header and passes up to the
|
||||
* network-layer protocol.
|
||||
*/
|
||||
static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) {
|
||||
struct ipoib_hdr *ipoib_hdr = iobuf->data;
|
||||
|
||||
/* Sanity check */
|
||||
if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
|
||||
DBG ( "IPoIB packet too short for link-layer header\n" );
|
||||
DBG_HD ( iobuf->data, iob_len ( iobuf ) );
|
||||
free_iob ( iobuf );
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Strip off IPoIB header */
|
||||
iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
|
||||
|
||||
/* Hand off to network-layer protocol */
|
||||
return net_rx ( iobuf, netdev, ipoib_hdr->real.proto,
|
||||
&ipoib_hdr->pseudo.peer );
|
||||
}
|
||||
|
||||
/**
|
||||
* Transcribe IPoIB address
|
||||
*
|
||||
* @v ll_addr Link-layer address
|
||||
* @ret string Link-layer address in human-readable format
|
||||
*/
|
||||
const char * ipoib_ntoa ( const void *ll_addr ) {
|
||||
static char buf[45];
|
||||
const struct ipoib_mac *mac = ll_addr;
|
||||
|
||||
snprintf ( buf, sizeof ( buf ), "%08lx:%08lx:%08lx:%08lx:%08lx",
|
||||
htonl ( mac->qpn ), htonl ( mac->gid.u.dwords[0] ),
|
||||
htonl ( mac->gid.u.dwords[1] ),
|
||||
htonl ( mac->gid.u.dwords[2] ),
|
||||
htonl ( mac->gid.u.dwords[3] ) );
|
||||
return buf;
|
||||
}
|
||||
|
||||
/** IPoIB protocol */
|
||||
struct ll_protocol ipoib_protocol __ll_protocol = {
|
||||
.name = "IPoIB",
|
||||
.ll_proto = htons ( ARPHRD_INFINIBAND ),
|
||||
.ll_addr_len = IPOIB_ALEN,
|
||||
.ll_header_len = IPOIB_HLEN,
|
||||
.ll_broadcast = ( uint8_t * ) &ipoib_broadcast,
|
||||
.tx = ipoib_tx,
|
||||
.rx = ipoib_rx,
|
||||
.ntoa = ipoib_ntoa,
|
||||
};
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* IPoIB network device
|
||||
*
|
||||
****************************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* Destroy queue set
|
||||
*
|
||||
* @v ipoib IPoIB device
|
||||
* @v qset Queue set
|
||||
*/
|
||||
static void ipoib_destroy_qset ( struct ipoib_device *ipoib,
|
||||
struct ipoib_queue_set *qset ) {
|
||||
struct ib_device *ibdev = ipoib->ibdev;
|
||||
|
||||
if ( qset->qp )
|
||||
ib_destroy_qp ( ibdev, qset->qp );
|
||||
if ( qset->cq )
|
||||
ib_destroy_cq ( ibdev, qset->cq );
|
||||
memset ( qset, 0, sizeof ( *qset ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Create queue set
|
||||
*
|
||||
* @v ipoib IPoIB device
|
||||
* @v qset Queue set
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static int ipoib_create_qset ( struct ipoib_device *ipoib,
|
||||
struct ipoib_queue_set *qset,
|
||||
unsigned int num_cqes,
|
||||
unsigned int num_send_wqes,
|
||||
unsigned int num_recv_wqes,
|
||||
unsigned long qkey ) {
|
||||
struct ib_device *ibdev = ipoib->ibdev;
|
||||
int rc;
|
||||
|
||||
/* Store queue parameters */
|
||||
qset->recv_max_fill = num_recv_wqes;
|
||||
|
||||
/* Allocate completion queue */
|
||||
qset->cq = ib_create_cq ( ibdev, num_cqes );
|
||||
if ( ! qset->cq ) {
|
||||
DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
|
||||
ipoib );
|
||||
rc = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Allocate queue pair */
|
||||
qset->qp = ib_create_qp ( ibdev, num_send_wqes, qset->cq,
|
||||
num_recv_wqes, qset->cq, qkey );
|
||||
if ( ! qset->qp ) {
|
||||
DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
|
||||
ipoib );
|
||||
rc = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
qset->qp->owner_priv = ipoib->netdev;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
ipoib_destroy_qset ( ipoib, qset );
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find path cache entry by GID
|
||||
*
|
||||
* @v gid GID
|
||||
* @ret entry Path cache entry, or NULL
|
||||
*/
|
||||
static struct ipoib_cached_path *
|
||||
ipoib_find_cached_path ( struct ib_gid *gid ) {
|
||||
struct ipoib_cached_path *path;
|
||||
unsigned int i;
|
||||
|
||||
for ( i = 0 ; i < IPOIB_NUM_CACHED_PATHS ; i++ ) {
|
||||
path = &ipoib_path_cache[i];
|
||||
if ( memcmp ( &path->gid, gid, sizeof ( *gid ) ) == 0 )
|
||||
return path;
|
||||
}
|
||||
DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n",
|
||||
htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ),
|
||||
htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ) );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transmit path record request
|
||||
*
|
||||
* @v ipoib IPoIB device
|
||||
* @v gid Destination GID
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static int ipoib_get_path_record ( struct ipoib_device *ipoib,
|
||||
struct ib_gid *gid ) {
|
||||
struct ib_device *ibdev = ipoib->ibdev;
|
||||
struct io_buffer *iobuf;
|
||||
struct ib_mad_path_record *path_record;
|
||||
struct ib_address_vector av;
|
||||
int rc;
|
||||
|
||||
/* Allocate I/O buffer */
|
||||
iobuf = alloc_iob ( sizeof ( *path_record ) );
|
||||
if ( ! iobuf )
|
||||
return -ENOMEM;
|
||||
iob_put ( iobuf, sizeof ( *path_record ) );
|
||||
path_record = iobuf->data;
|
||||
memset ( path_record, 0, sizeof ( *path_record ) );
|
||||
|
||||
/* Construct path record request */
|
||||
path_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
|
||||
path_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
|
||||
path_record->mad_hdr.class_version = 2;
|
||||
path_record->mad_hdr.method = IB_MGMT_METHOD_GET;
|
||||
path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC );
|
||||
path_record->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC;
|
||||
path_record->mad_hdr.tid[1] = ipoib_meta_tid++;
|
||||
path_record->sa_hdr.comp_mask[1] =
|
||||
htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID );
|
||||
memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) );
|
||||
memcpy ( &path_record->sgid, &ibdev->port_gid,
|
||||
sizeof ( path_record->sgid ) );
|
||||
|
||||
/* Construct address vector */
|
||||
memset ( &av, 0, sizeof ( av ) );
|
||||
av.dlid = ibdev->sm_lid;
|
||||
av.dest_qp = IB_SA_QPN;
|
||||
av.qkey = IB_GLOBAL_QKEY;
|
||||
|
||||
/* Post send request */
|
||||
if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
|
||||
iobuf ) ) != 0 ) {
|
||||
DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
|
||||
ipoib, strerror ( rc ) );
|
||||
free_iob ( iobuf );
|
||||
return rc;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transmit multicast group membership request
|
||||
*
|
||||
* @v ipoib IPoIB device
|
||||
* @v gid Multicast GID
|
||||
* @v join Join (rather than leave) group
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static int ipoib_mc_member_record ( struct ipoib_device *ipoib,
|
||||
struct ib_gid *gid, int join ) {
|
||||
struct ib_device *ibdev = ipoib->ibdev;
|
||||
struct io_buffer *iobuf;
|
||||
struct ib_mad_mc_member_record *mc_member_record;
|
||||
struct ib_address_vector av;
|
||||
int rc;
|
||||
|
||||
/* Allocate I/O buffer */
|
||||
iobuf = alloc_iob ( sizeof ( *mc_member_record ) );
|
||||
if ( ! iobuf )
|
||||
return -ENOMEM;
|
||||
iob_put ( iobuf, sizeof ( *mc_member_record ) );
|
||||
mc_member_record = iobuf->data;
|
||||
memset ( mc_member_record, 0, sizeof ( *mc_member_record ) );
|
||||
|
||||
/* Construct path record request */
|
||||
mc_member_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
|
||||
mc_member_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
|
||||
mc_member_record->mad_hdr.class_version = 2;
|
||||
mc_member_record->mad_hdr.method =
|
||||
( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
|
||||
mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
|
||||
mc_member_record->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC;
|
||||
mc_member_record->mad_hdr.tid[1] = ipoib_meta_tid++;
|
||||
mc_member_record->sa_hdr.comp_mask[1] =
|
||||
htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
|
||||
IB_SA_MCMEMBER_REC_JOIN_STATE );
|
||||
mc_member_record->scope__join_state = 1;
|
||||
memcpy ( &mc_member_record->mgid, gid,
|
||||
sizeof ( mc_member_record->mgid ) );
|
||||
memcpy ( &mc_member_record->port_gid, &ibdev->port_gid,
|
||||
sizeof ( mc_member_record->port_gid ) );
|
||||
|
||||
/* Construct address vector */
|
||||
memset ( &av, 0, sizeof ( av ) );
|
||||
av.dlid = ibdev->sm_lid;
|
||||
av.dest_qp = IB_SA_QPN;
|
||||
av.qkey = IB_GLOBAL_QKEY;
|
||||
|
||||
/* Post send request */
|
||||
if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
|
||||
iobuf ) ) != 0 ) {
|
||||
DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
|
||||
ipoib, strerror ( rc ) );
|
||||
free_iob ( iobuf );
|
||||
return rc;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transmit packet via IPoIB network device
|
||||
*
|
||||
* @v netdev Network device
|
||||
* @v iobuf I/O buffer
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static int ipoib_transmit ( struct net_device *netdev,
|
||||
struct io_buffer *iobuf ) {
|
||||
struct ipoib_device *ipoib = netdev->priv;
|
||||
struct ib_device *ibdev = ipoib->ibdev;
|
||||
struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data;
|
||||
struct ib_address_vector av;
|
||||
struct ib_gid *gid;
|
||||
struct ipoib_cached_path *path;
|
||||
int rc;
|
||||
|
||||
/* Sanity check */
|
||||
if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) {
|
||||
DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
|
||||
return -EINVAL;
|
||||
}
|
||||
iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) );
|
||||
|
||||
/* Construct address vector */
|
||||
memset ( &av, 0, sizeof ( av ) );
|
||||
av.qkey = IB_GLOBAL_QKEY;
|
||||
av.gid_present = 1;
|
||||
if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) {
|
||||
/* Broadcast address */
|
||||
av.dest_qp = IB_BROADCAST_QPN;
|
||||
av.dlid = ipoib->broadcast_lid;
|
||||
gid = &ipoib->broadcast_gid;
|
||||
} else {
|
||||
/* Unicast - look in path cache */
|
||||
path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid );
|
||||
if ( ! path ) {
|
||||
/* No path entry - get path record */
|
||||
rc = ipoib_get_path_record ( ipoib,
|
||||
&ipoib_pshdr->peer.gid );
|
||||
netdev_tx_complete ( netdev, iobuf );
|
||||
return rc;
|
||||
}
|
||||
av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn );
|
||||
av.dlid = path->dlid;
|
||||
av.rate = path->rate;
|
||||
av.sl = path->sl;
|
||||
gid = &ipoib_pshdr->peer.gid;
|
||||
}
|
||||
memcpy ( &av.gid, gid, sizeof ( av.gid ) );
|
||||
|
||||
return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf );
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle IPoIB data send completion
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v completion Completion
|
||||
* @v iobuf I/O buffer
|
||||
*/
|
||||
static void ipoib_data_complete_send ( struct ib_device *ibdev __unused,
|
||||
struct ib_queue_pair *qp,
|
||||
struct ib_completion *completion,
|
||||
struct io_buffer *iobuf ) {
|
||||
struct net_device *netdev = qp->owner_priv;
|
||||
|
||||
netdev_tx_complete_err ( netdev, iobuf,
|
||||
( completion->syndrome ? -EIO : 0 ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle IPoIB data receive completion
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v completion Completion
|
||||
* @v iobuf I/O buffer
|
||||
*/
|
||||
static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused,
|
||||
struct ib_queue_pair *qp,
|
||||
struct ib_completion *completion,
|
||||
struct io_buffer *iobuf ) {
|
||||
struct net_device *netdev = qp->owner_priv;
|
||||
struct ipoib_device *ipoib = netdev->priv;
|
||||
struct ipoib_pseudo_hdr *ipoib_pshdr;
|
||||
|
||||
if ( completion->syndrome ) {
|
||||
netdev_rx_err ( netdev, iobuf, -EIO );
|
||||
goto done;
|
||||
}
|
||||
|
||||
iob_put ( iobuf, completion->len );
|
||||
if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) {
|
||||
DBGC ( ipoib, "IPoIB %p received data packet too short to "
|
||||
"contain GRH\n", ipoib );
|
||||
DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
|
||||
netdev_rx_err ( netdev, iobuf, -EIO );
|
||||
goto done;
|
||||
}
|
||||
iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) );
|
||||
|
||||
if ( iob_len ( iobuf ) < sizeof ( struct ipoib_real_hdr ) ) {
|
||||
DBGC ( ipoib, "IPoIB %p received data packet too short to "
|
||||
"contain IPoIB header\n", ipoib );
|
||||
DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
|
||||
netdev_rx_err ( netdev, iobuf, -EIO );
|
||||
goto done;
|
||||
}
|
||||
|
||||
ipoib_pshdr = iob_push ( iobuf, sizeof ( *ipoib_pshdr ) );
|
||||
/* FIXME: fill in a MAC address for the sake of AoE! */
|
||||
|
||||
netdev_rx ( netdev, iobuf );
|
||||
|
||||
done:
|
||||
ipoib->data.recv_fill--;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle IPoIB metadata send completion
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v completion Completion
|
||||
* @v iobuf I/O buffer
|
||||
*/
|
||||
static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
|
||||
struct ib_queue_pair *qp,
|
||||
struct ib_completion *completion,
|
||||
struct io_buffer *iobuf ) {
|
||||
struct net_device *netdev = qp->owner_priv;
|
||||
struct ipoib_device *ipoib = netdev->priv;
|
||||
|
||||
if ( completion->syndrome ) {
|
||||
DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n",
|
||||
ipoib, completion->syndrome );
|
||||
}
|
||||
free_iob ( iobuf );
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle received IPoIB path record
|
||||
*
|
||||
* @v ipoib IPoIB device
|
||||
* @v path_record Path record
|
||||
*/
|
||||
static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused,
|
||||
struct ib_mad_path_record *path_record ) {
|
||||
struct ipoib_cached_path *path;
|
||||
|
||||
/* Update path cache entry */
|
||||
path = &ipoib_path_cache[ipoib_path_cache_idx];
|
||||
memcpy ( &path->gid, &path_record->dgid, sizeof ( path->gid ) );
|
||||
path->dlid = ntohs ( path_record->dlid );
|
||||
path->sl = ( path_record->reserved__sl & 0x0f );
|
||||
path->rate = ( path_record->rate_selector__rate & 0x3f );
|
||||
|
||||
DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n",
|
||||
htonl ( path->gid.u.dwords[0] ), htonl ( path->gid.u.dwords[1] ),
|
||||
htonl ( path->gid.u.dwords[2] ), htonl ( path->gid.u.dwords[3] ),
|
||||
path->dlid, path->sl, path->rate );
|
||||
|
||||
/* Update path cache index */
|
||||
ipoib_path_cache_idx++;
|
||||
if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS )
|
||||
ipoib_path_cache_idx = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle received IPoIB multicast membership record
|
||||
*
|
||||
* @v ipoib IPoIB device
|
||||
* @v mc_member_record Multicast membership record
|
||||
*/
|
||||
static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
|
||||
struct ib_mad_mc_member_record *mc_member_record ) {
|
||||
/* Record parameters */
|
||||
ipoib->broadcast_joined =
|
||||
( mc_member_record->scope__join_state & 0x0f );
|
||||
ipoib->data_qkey = ntohl ( mc_member_record->qkey );
|
||||
ipoib->broadcast_lid = ntohs ( mc_member_record->mlid );
|
||||
DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
|
||||
ipoib, ( ipoib->broadcast_joined ? "joined" : "left" ),
|
||||
ipoib->data_qkey, ipoib->broadcast_lid );
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle IPoIB metadata receive completion
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v completion Completion
|
||||
* @v iobuf I/O buffer
|
||||
*/
|
||||
static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
|
||||
struct ib_queue_pair *qp,
|
||||
struct ib_completion *completion,
|
||||
struct io_buffer *iobuf ) {
|
||||
struct net_device *netdev = qp->owner_priv;
|
||||
struct ipoib_device *ipoib = netdev->priv;
|
||||
union ib_mad *mad;
|
||||
|
||||
if ( completion->syndrome ) {
|
||||
DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n",
|
||||
ipoib, completion->syndrome );
|
||||
goto done;
|
||||
}
|
||||
|
||||
iob_put ( iobuf, completion->len );
|
||||
if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) {
|
||||
DBGC ( ipoib, "IPoIB %p received metadata packet too short "
|
||||
"to contain GRH\n", ipoib );
|
||||
DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
|
||||
goto done;
|
||||
}
|
||||
iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) );
|
||||
if ( iob_len ( iobuf ) < sizeof ( *mad ) ) {
|
||||
DBGC ( ipoib, "IPoIB %p received metadata packet too short "
|
||||
"to contain reply\n", ipoib );
|
||||
DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
|
||||
goto done;
|
||||
}
|
||||
mad = iobuf->data;
|
||||
|
||||
if ( mad->mad_hdr.status != 0 ) {
|
||||
DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n",
|
||||
ipoib, ntohs ( mad->mad_hdr.status ) );
|
||||
goto done;
|
||||
}
|
||||
|
||||
switch ( mad->mad_hdr.tid[0] ) {
|
||||
case IPOIB_TID_GET_PATH_REC:
|
||||
ipoib_recv_path_record ( ipoib, &mad->path_record );
|
||||
break;
|
||||
case IPOIB_TID_MC_MEMBER_REC:
|
||||
ipoib_recv_mc_member_record ( ipoib, &mad->mc_member_record );
|
||||
break;
|
||||
default:
|
||||
DBGC ( ipoib, "IPoIB %p unwanted response:\n",
|
||||
ipoib );
|
||||
DBGC_HD ( ipoib, mad, sizeof ( *mad ) );
|
||||
break;
|
||||
}
|
||||
|
||||
done:
|
||||
ipoib->meta.recv_fill--;
|
||||
free_iob ( iobuf );
|
||||
}
|
||||
|
||||
/**
|
||||
* Refill IPoIB receive ring
|
||||
*
|
||||
* @v ipoib IPoIB device
|
||||
*/
|
||||
static void ipoib_refill_recv ( struct ipoib_device *ipoib,
|
||||
struct ipoib_queue_set *qset ) {
|
||||
struct ib_device *ibdev = ipoib->ibdev;
|
||||
struct io_buffer *iobuf;
|
||||
int rc;
|
||||
|
||||
while ( qset->recv_fill < qset->recv_max_fill ) {
|
||||
iobuf = alloc_iob ( IPOIB_MTU );
|
||||
if ( ! iobuf )
|
||||
break;
|
||||
if ( ( rc = ib_post_recv ( ibdev, qset->qp, iobuf ) ) != 0 ) {
|
||||
free_iob ( iobuf );
|
||||
break;
|
||||
}
|
||||
qset->recv_fill++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll IPoIB network device
|
||||
*
|
||||
* @v netdev Network device
|
||||
*/
|
||||
static void ipoib_poll ( struct net_device *netdev ) {
|
||||
struct ipoib_device *ipoib = netdev->priv;
|
||||
struct ib_device *ibdev = ipoib->ibdev;
|
||||
|
||||
ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
|
||||
ipoib_meta_complete_recv );
|
||||
ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send,
|
||||
ipoib_data_complete_recv );
|
||||
ipoib_refill_recv ( ipoib, &ipoib->meta );
|
||||
ipoib_refill_recv ( ipoib, &ipoib->data );
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable/disable interrupts on IPoIB network device
|
||||
*
|
||||
* @v netdev Network device
|
||||
* @v enable Interrupts should be enabled
|
||||
*/
|
||||
static void ipoib_irq ( struct net_device *netdev __unused,
|
||||
int enable __unused ) {
|
||||
/* No implementation */
|
||||
}
|
||||
|
||||
/**
|
||||
* Open IPoIB network device
|
||||
*
|
||||
* @v netdev Network device
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static int ipoib_open ( struct net_device *netdev ) {
|
||||
struct ipoib_device *ipoib = netdev->priv;
|
||||
struct ib_device *ibdev = ipoib->ibdev;
|
||||
int rc;
|
||||
|
||||
/* Attach to broadcast multicast GID */
|
||||
if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp,
|
||||
&ipoib->broadcast_gid ) ) != 0 ) {
|
||||
DBG ( "Could not attach to broadcast GID: %s\n",
|
||||
strerror ( rc ) );
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Fill receive rings */
|
||||
ipoib_refill_recv ( ipoib, &ipoib->meta );
|
||||
ipoib_refill_recv ( ipoib, &ipoib->data );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close IPoIB network device
|
||||
*
|
||||
* @v netdev Network device
|
||||
*/
|
||||
static void ipoib_close ( struct net_device *netdev ) {
|
||||
struct ipoib_device *ipoib = netdev->priv;
|
||||
struct ib_device *ibdev = ipoib->ibdev;
|
||||
|
||||
/* Detach from broadcast multicast GID */
|
||||
ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib->broadcast_gid );
|
||||
|
||||
/* FIXME: should probably flush the receive ring */
|
||||
}
|
||||
|
||||
/** IPoIB network device operations */
|
||||
static struct net_device_operations ipoib_operations = {
|
||||
.open = ipoib_open,
|
||||
.close = ipoib_close,
|
||||
.transmit = ipoib_transmit,
|
||||
.poll = ipoib_poll,
|
||||
.irq = ipoib_irq,
|
||||
};
|
||||
|
||||
/**
|
||||
* Join IPoIB broadcast group
|
||||
*
|
||||
* @v ipoib IPoIB device
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
|
||||
struct ib_device *ibdev = ipoib->ibdev;
|
||||
unsigned int delay_ms;
|
||||
int rc;
|
||||
|
||||
/* Make sure we have some receive descriptors */
|
||||
ipoib_refill_recv ( ipoib, &ipoib->meta );
|
||||
|
||||
/* Send join request */
|
||||
if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
|
||||
1 ) ) != 0 ) {
|
||||
DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
|
||||
ipoib, strerror ( rc ) );
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Wait for join to complete. Ideally we wouldn't delay for
|
||||
* this long, but we need the queue key before we can set up
|
||||
* the data queue pair, which we need before we can know the
|
||||
* MAC address.
|
||||
*/
|
||||
for ( delay_ms = IPOIB_JOIN_MAX_DELAY_MS ; delay_ms ; delay_ms-- ) {
|
||||
mdelay ( 1 );
|
||||
ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
|
||||
ipoib_meta_complete_recv );
|
||||
ipoib_refill_recv ( ipoib, &ipoib->meta );
|
||||
if ( ipoib->broadcast_joined )
|
||||
return 0;
|
||||
}
|
||||
DBGC ( ipoib, "IPoIB %p timed out waiting for broadcast join\n",
|
||||
ipoib );
|
||||
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe IPoIB device
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
int ipoib_probe ( struct ib_device *ibdev ) {
|
||||
struct net_device *netdev;
|
||||
struct ipoib_device *ipoib;
|
||||
struct ipoib_mac *mac;
|
||||
int rc;
|
||||
|
||||
/* Allocate network device */
|
||||
netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
|
||||
if ( ! netdev )
|
||||
return -ENOMEM;
|
||||
netdev_init ( netdev, &ipoib_operations );
|
||||
ipoib = netdev->priv;
|
||||
ib_set_ownerdata ( ibdev, netdev );
|
||||
netdev->dev = ibdev->dev;
|
||||
memset ( ipoib, 0, sizeof ( *ipoib ) );
|
||||
ipoib->netdev = netdev;
|
||||
ipoib->ibdev = ibdev;
|
||||
|
||||
/* Calculate broadcast GID */
|
||||
memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
|
||||
sizeof ( ipoib->broadcast_gid ) );
|
||||
ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
|
||||
|
||||
/* Allocate metadata queue set */
|
||||
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
|
||||
IPOIB_META_NUM_CQES,
|
||||
IPOIB_META_NUM_SEND_WQES,
|
||||
IPOIB_META_NUM_RECV_WQES,
|
||||
IB_GLOBAL_QKEY ) ) != 0 ) {
|
||||
DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
|
||||
ipoib, strerror ( rc ) );
|
||||
goto err_create_meta_qset;
|
||||
}
|
||||
|
||||
/* Join broadcast group */
|
||||
if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
|
||||
DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
|
||||
ipoib, strerror ( rc ) );
|
||||
goto err_join_broadcast_group;
|
||||
}
|
||||
|
||||
/* Allocate data queue set */
|
||||
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
|
||||
IPOIB_DATA_NUM_CQES,
|
||||
IPOIB_DATA_NUM_SEND_WQES,
|
||||
IPOIB_DATA_NUM_RECV_WQES,
|
||||
ipoib->data_qkey ) ) != 0 ) {
|
||||
DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
|
||||
ipoib, strerror ( rc ) );
|
||||
goto err_create_data_qset;
|
||||
}
|
||||
|
||||
/* Construct MAC address */
|
||||
mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
|
||||
mac->qpn = htonl ( ipoib->data.qp->qpn );
|
||||
memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
|
||||
|
||||
/* Register network device */
|
||||
if ( ( rc = register_netdev ( netdev ) ) != 0 )
|
||||
goto err_register_netdev;
|
||||
|
||||
return 0;
|
||||
|
||||
err_register_netdev:
|
||||
ipoib_destroy_qset ( ipoib, &ipoib->data );
|
||||
err_join_broadcast_group:
|
||||
err_create_data_qset:
|
||||
ipoib_destroy_qset ( ipoib, &ipoib->meta );
|
||||
err_create_meta_qset:
|
||||
netdev_nullify ( netdev );
|
||||
netdev_put ( netdev );
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove IPoIB device
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
*/
|
||||
void ipoib_remove ( struct ib_device *ibdev ) {
|
||||
struct net_device *netdev = ib_get_ownerdata ( ibdev );
|
||||
struct ipoib_device *ipoib = netdev->priv;
|
||||
|
||||
unregister_netdev ( netdev );
|
||||
ipoib_destroy_qset ( ipoib, &ipoib->data );
|
||||
ipoib_destroy_qset ( ipoib, &ipoib->meta );
|
||||
netdev_nullify ( netdev );
|
||||
netdev_put ( netdev );
|
||||
}
|
578
src/include/gpxe/infiniband.h
Normal file
578
src/include/gpxe/infiniband.h
Normal file
@ -0,0 +1,578 @@
|
||||
#ifndef _GPXE_INFINIBAND_H
|
||||
#define _GPXE_INFINIBAND_H
|
||||
|
||||
/** @file
|
||||
*
|
||||
* Infiniband protocol
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <gpxe/device.h>
|
||||
|
||||
/** Subnet administrator QPN */
|
||||
#define IB_SA_QPN 1
|
||||
|
||||
/** Broadcast QPN */
|
||||
#define IB_BROADCAST_QPN 0xffffffUL
|
||||
|
||||
/** Subnet administrator queue key */
|
||||
#define IB_GLOBAL_QKEY 0x80010000UL
|
||||
|
||||
/** An Infiniband Global Identifier */
|
||||
struct ib_gid {
|
||||
union {
|
||||
uint8_t bytes[16];
|
||||
uint16_t words[8];
|
||||
uint32_t dwords[4];
|
||||
} u;
|
||||
};
|
||||
|
||||
/** An Infiniband Global Route Header */
|
||||
struct ib_global_route_header {
|
||||
/** IP version, traffic class, and flow label
|
||||
*
|
||||
* 4 bits : Version of the GRH
|
||||
* 8 bits : Traffic class
|
||||
* 20 bits : Flow label
|
||||
*/
|
||||
uint32_t ipver_tclass_flowlabel;
|
||||
/** Payload length */
|
||||
uint16_t paylen;
|
||||
/** Next header */
|
||||
uint8_t nxthdr;
|
||||
/** Hop limit */
|
||||
uint8_t hoplmt;
|
||||
/** Source GID */
|
||||
struct ib_gid sgid;
|
||||
/** Destiniation GID */
|
||||
struct ib_gid dgid;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
struct ib_device;
|
||||
struct ib_queue_pair;
|
||||
struct ib_completion_queue;
|
||||
|
||||
/** An Infiniband Work Queue */
|
||||
struct ib_work_queue {
|
||||
/** Containing queue pair */
|
||||
struct ib_queue_pair *qp;
|
||||
/** "Is a send queue" flag */
|
||||
int is_send;
|
||||
/** Associated completion queue */
|
||||
struct ib_completion_queue *cq;
|
||||
/** List of work queues on this completion queue */
|
||||
struct list_head list;
|
||||
/** Number of work queue entries */
|
||||
unsigned int num_wqes;
|
||||
/** Next work queue entry index
|
||||
*
|
||||
* This is the index of the next entry to be filled (i.e. the
|
||||
* first empty entry). This value is not bounded by num_wqes;
|
||||
* users must logical-AND with (num_wqes-1) to generate an
|
||||
* array index.
|
||||
*/
|
||||
unsigned long next_idx;
|
||||
/** I/O buffers assigned to work queue */
|
||||
struct io_buffer **iobufs;
|
||||
/** Device private data */
|
||||
void *dev_priv;
|
||||
};
|
||||
|
||||
/** An Infiniband Queue Pair */
|
||||
struct ib_queue_pair {
|
||||
/** Queue Pair Number */
|
||||
unsigned long qpn;
|
||||
/** Queue key */
|
||||
unsigned long qkey;
|
||||
/** Send queue */
|
||||
struct ib_work_queue send;
|
||||
/** Receive queue */
|
||||
struct ib_work_queue recv;
|
||||
/** Device private data */
|
||||
void *dev_priv;
|
||||
/** Queue owner private data */
|
||||
void *owner_priv;
|
||||
};
|
||||
|
||||
/** An Infiniband Completion Queue */
|
||||
struct ib_completion_queue {
|
||||
/** Completion queue number */
|
||||
unsigned long cqn;
|
||||
/** Number of completion queue entries */
|
||||
unsigned int num_cqes;
|
||||
/** Next completion queue entry index
|
||||
*
|
||||
* This is the index of the next entry to be filled (i.e. the
|
||||
* first empty entry). This value is not bounded by num_wqes;
|
||||
* users must logical-AND with (num_wqes-1) to generate an
|
||||
* array index.
|
||||
*/
|
||||
unsigned long next_idx;
|
||||
/** List of work queues completing to this queue */
|
||||
struct list_head work_queues;
|
||||
/** Device private data */
|
||||
void *dev_priv;
|
||||
};
|
||||
|
||||
/** An Infiniband completion */
|
||||
struct ib_completion {
|
||||
/** Syndrome
|
||||
*
|
||||
* If non-zero, then the completion is in error.
|
||||
*/
|
||||
unsigned int syndrome;
|
||||
/** Length */
|
||||
size_t len;
|
||||
};
|
||||
|
||||
/** An Infiniband completion handler
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v completion Completion
|
||||
* @v iobuf I/O buffer
|
||||
*/
|
||||
typedef void ( * ib_completer_t ) ( struct ib_device *ibdev,
|
||||
struct ib_queue_pair *qp,
|
||||
struct ib_completion *completion,
|
||||
struct io_buffer *iobuf );
|
||||
|
||||
/** An Infiniband Address Vector */
|
||||
struct ib_address_vector {
|
||||
/** Destination Queue Pair */
|
||||
unsigned int dest_qp;
|
||||
/** Queue key */
|
||||
unsigned long qkey;
|
||||
/** Destination Local ID */
|
||||
unsigned int dlid;
|
||||
/** Rate */
|
||||
unsigned int rate;
|
||||
/** Service level */
|
||||
unsigned int sl;
|
||||
/** GID is present */
|
||||
unsigned int gid_present;
|
||||
/** GID */
|
||||
struct ib_gid gid;
|
||||
};
|
||||
|
||||
/**
|
||||
* Infiniband device operations
|
||||
*
|
||||
* These represent a subset of the Infiniband Verbs.
|
||||
*/
|
||||
struct ib_device_operations {
|
||||
/** Create completion queue
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v cq Completion queue
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
int ( * create_cq ) ( struct ib_device *ibdev,
|
||||
struct ib_completion_queue *cq );
|
||||
/** Destroy completion queue
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v cq Completion queue
|
||||
*/
|
||||
void ( * destroy_cq ) ( struct ib_device *ibdev,
|
||||
struct ib_completion_queue *cq );
|
||||
/** Create queue pair
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
int ( * create_qp ) ( struct ib_device *ibdev,
|
||||
struct ib_queue_pair *qp );
|
||||
/** Destroy queue pair
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
*/
|
||||
void ( * destroy_qp ) ( struct ib_device *ibdev,
|
||||
struct ib_queue_pair *qp );
|
||||
/** Post send work queue entry
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v av Address vector
|
||||
* @v iobuf I/O buffer
|
||||
* @ret rc Return status code
|
||||
*
|
||||
* If this method returns success, the I/O buffer remains
|
||||
* owned by the queue pair. If this method returns failure,
|
||||
* the I/O buffer is immediately released; the failure is
|
||||
* interpreted as "failure to enqueue buffer".
|
||||
*/
|
||||
int ( * post_send ) ( struct ib_device *ibdev,
|
||||
struct ib_queue_pair *qp,
|
||||
struct ib_address_vector *av,
|
||||
struct io_buffer *iobuf );
|
||||
/** Post receive work queue entry
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v iobuf I/O buffer
|
||||
* @ret rc Return status code
|
||||
*
|
||||
* If this method returns success, the I/O buffer remains
|
||||
* owned by the queue pair. If this method returns failure,
|
||||
* the I/O buffer is immediately released; the failure is
|
||||
* interpreted as "failure to enqueue buffer".
|
||||
*/
|
||||
int ( * post_recv ) ( struct ib_device *ibdev,
|
||||
struct ib_queue_pair *qp,
|
||||
struct io_buffer *iobuf );
|
||||
/** Poll completion queue
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v cq Completion queue
|
||||
* @v complete_send Send completion handler
|
||||
* @v complete_recv Receive completion handler
|
||||
*
|
||||
* The completion handler takes ownership of the I/O buffer.
|
||||
*/
|
||||
void ( * poll_cq ) ( struct ib_device *ibdev,
|
||||
struct ib_completion_queue *cq,
|
||||
ib_completer_t complete_send,
|
||||
ib_completer_t complete_recv );
|
||||
/** Attach to multicast group
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v gid Multicast GID
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
int ( * mcast_attach ) ( struct ib_device *ibdev,
|
||||
struct ib_queue_pair *qp,
|
||||
struct ib_gid *gid );
|
||||
/** Detach from multicast group
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v gid Multicast GID
|
||||
*/
|
||||
void ( * mcast_detach ) ( struct ib_device *ibdev,
|
||||
struct ib_queue_pair *qp,
|
||||
struct ib_gid *gid );
|
||||
};
|
||||
|
||||
/** An Infiniband device */
|
||||
struct ib_device {
|
||||
/** Port GID */
|
||||
struct ib_gid port_gid;
|
||||
/** Subnet manager LID */
|
||||
unsigned long sm_lid;
|
||||
/** Partition key */
|
||||
unsigned int pkey;
|
||||
/** Underlying device */
|
||||
struct device *dev;
|
||||
/** Infiniband operations */
|
||||
struct ib_device_operations *op;
|
||||
/** Device private data */
|
||||
void *dev_priv;
|
||||
/** Owner private data */
|
||||
void *owner_priv;
|
||||
};
|
||||
|
||||
extern struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev,
|
||||
unsigned int num_cqes );
|
||||
extern void ib_destroy_cq ( struct ib_device *ibdev,
|
||||
struct ib_completion_queue *cq );
|
||||
extern struct ib_queue_pair *
|
||||
ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes,
|
||||
struct ib_completion_queue *send_cq, unsigned int num_recv_wqes,
|
||||
struct ib_completion_queue *recv_cq, unsigned long qkey );
|
||||
extern void ib_destroy_qp ( struct ib_device *ibdev,
|
||||
struct ib_queue_pair *qp );
|
||||
extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
|
||||
unsigned long qpn, int is_send );
|
||||
extern struct ib_device * alloc_ibdev ( size_t priv_size );
|
||||
extern void free_ibdev ( struct ib_device *ibdev );
|
||||
|
||||
/**
|
||||
* Post send work queue entry
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v av Address vector
|
||||
* @v iobuf I/O buffer
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) int
|
||||
ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
|
||||
struct ib_address_vector *av, struct io_buffer *iobuf ) {
|
||||
return ibdev->op->post_send ( ibdev, qp, av, iobuf );
|
||||
}
|
||||
|
||||
/**
|
||||
* Post receive work queue entry
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v iobuf I/O buffer
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) int
|
||||
ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
|
||||
struct io_buffer *iobuf ) {
|
||||
return ibdev->op->post_recv ( ibdev, qp, iobuf );
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll completion queue
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v cq Completion queue
|
||||
* @v complete_send Send completion handler
|
||||
* @v complete_recv Receive completion handler
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) void
|
||||
ib_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq,
|
||||
ib_completer_t complete_send, ib_completer_t complete_recv ) {
|
||||
ibdev->op->poll_cq ( ibdev, cq, complete_send, complete_recv );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Attach to multicast group
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v gid Multicast GID
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) int
|
||||
ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
|
||||
struct ib_gid *gid ) {
|
||||
return ibdev->op->mcast_attach ( ibdev, qp, gid );
|
||||
}
|
||||
|
||||
/**
|
||||
* Detach from multicast group
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
* @v gid Multicast GID
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) void
|
||||
ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
|
||||
struct ib_gid *gid ) {
|
||||
ibdev->op->mcast_detach ( ibdev, qp, gid );
|
||||
}
|
||||
|
||||
/**
|
||||
* Set Infiniband owner-private data
|
||||
*
|
||||
* @v pci Infiniband device
|
||||
* @v priv Private data
|
||||
*/
|
||||
static inline void ib_set_ownerdata ( struct ib_device *ibdev,
|
||||
void *owner_priv ) {
|
||||
ibdev->owner_priv = owner_priv;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Infiniband owner-private data
|
||||
*
|
||||
* @v pci Infiniband device
|
||||
* @ret priv Private data
|
||||
*/
|
||||
static inline void * ib_get_ownerdata ( struct ib_device *ibdev ) {
|
||||
return ibdev->owner_priv;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* Management datagrams
|
||||
*
|
||||
* Portions Copyright (c) 2004 Mellanox Technologies Ltd. All rights
|
||||
* reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Management base version */
|
||||
#define IB_MGMT_BASE_VERSION 1
|
||||
|
||||
/* Management classes */
|
||||
#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01
|
||||
#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81
|
||||
#define IB_MGMT_CLASS_SUBN_ADM 0x03
|
||||
#define IB_MGMT_CLASS_PERF_MGMT 0x04
|
||||
#define IB_MGMT_CLASS_BM 0x05
|
||||
#define IB_MGMT_CLASS_DEVICE_MGMT 0x06
|
||||
#define IB_MGMT_CLASS_CM 0x07
|
||||
#define IB_MGMT_CLASS_SNMP 0x08
|
||||
#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30
|
||||
#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F
|
||||
|
||||
/* Management methods */
|
||||
#define IB_MGMT_METHOD_GET 0x01
|
||||
#define IB_MGMT_METHOD_SET 0x02
|
||||
#define IB_MGMT_METHOD_GET_RESP 0x81
|
||||
#define IB_MGMT_METHOD_SEND 0x03
|
||||
#define IB_MGMT_METHOD_TRAP 0x05
|
||||
#define IB_MGMT_METHOD_REPORT 0x06
|
||||
#define IB_MGMT_METHOD_REPORT_RESP 0x86
|
||||
#define IB_MGMT_METHOD_TRAP_REPRESS 0x07
|
||||
#define IB_MGMT_METHOD_DELETE 0x15
|
||||
#define IB_MGMT_METHOD_RESP 0x80
|
||||
|
||||
/* Subnet management attributes */
|
||||
#define IB_SMP_ATTR_NOTICE 0x0002
|
||||
#define IB_SMP_ATTR_NODE_DESC 0x0010
|
||||
#define IB_SMP_ATTR_NODE_INFO 0x0011
|
||||
#define IB_SMP_ATTR_SWITCH_INFO 0x0012
|
||||
#define IB_SMP_ATTR_GUID_INFO 0x0014
|
||||
#define IB_SMP_ATTR_PORT_INFO 0x0015
|
||||
#define IB_SMP_ATTR_PKEY_TABLE 0x0016
|
||||
#define IB_SMP_ATTR_SL_TO_VL_TABLE 0x0017
|
||||
#define IB_SMP_ATTR_VL_ARB_TABLE 0x0018
|
||||
#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE 0x0019
|
||||
#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE 0x001A
|
||||
#define IB_SMP_ATTR_MCAST_FORWARD_TABLE 0x001B
|
||||
#define IB_SMP_ATTR_SM_INFO 0x0020
|
||||
#define IB_SMP_ATTR_VENDOR_DIAG 0x0030
|
||||
#define IB_SMP_ATTR_LED_INFO 0x0031
|
||||
#define IB_SMP_ATTR_VENDOR_MASK 0xFF00
|
||||
|
||||
#define IB_SA_ATTR_MC_MEMBER_REC 0x38
|
||||
#define IB_SA_ATTR_PATH_REC 0x35
|
||||
|
||||
#define IB_SA_MCMEMBER_REC_MGID (1<<0)
|
||||
#define IB_SA_MCMEMBER_REC_PORT_GID (1<<1)
|
||||
#define IB_SA_MCMEMBER_REC_QKEY (1<<2)
|
||||
#define IB_SA_MCMEMBER_REC_MLID (1<<3)
|
||||
#define IB_SA_MCMEMBER_REC_MTU_SELECTOR (1<<4)
|
||||
#define IB_SA_MCMEMBER_REC_MTU (1<<5)
|
||||
#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS (1<<6)
|
||||
#define IB_SA_MCMEMBER_REC_PKEY (1<<7)
|
||||
#define IB_SA_MCMEMBER_REC_RATE_SELECTOR (1<<8)
|
||||
#define IB_SA_MCMEMBER_REC_RATE (1<<9)
|
||||
#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR (1<<10)
|
||||
#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME (1<<11)
|
||||
#define IB_SA_MCMEMBER_REC_SL (1<<12)
|
||||
#define IB_SA_MCMEMBER_REC_FLOW_LABEL (1<<13)
|
||||
#define IB_SA_MCMEMBER_REC_HOP_LIMIT (1<<14)
|
||||
#define IB_SA_MCMEMBER_REC_SCOPE (1<<15)
|
||||
#define IB_SA_MCMEMBER_REC_JOIN_STATE (1<<16)
|
||||
#define IB_SA_MCMEMBER_REC_PROXY_JOIN (1<<17)
|
||||
|
||||
#define IB_SA_PATH_REC_DGID (1<<2)
|
||||
#define IB_SA_PATH_REC_SGID (1<<3)
|
||||
|
||||
struct ib_mad_hdr {
|
||||
uint8_t base_version;
|
||||
uint8_t mgmt_class;
|
||||
uint8_t class_version;
|
||||
uint8_t method;
|
||||
uint16_t status;
|
||||
uint16_t class_specific;
|
||||
uint32_t tid[2];
|
||||
uint16_t attr_id;
|
||||
uint16_t resv;
|
||||
uint32_t attr_mod;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
struct ib_sa_hdr {
|
||||
uint32_t sm_key[2];
|
||||
uint16_t reserved;
|
||||
uint16_t attrib_offset;
|
||||
uint32_t comp_mask[2];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
struct ib_rmpp_hdr {
|
||||
uint32_t raw[3];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
struct ib_mad_data {
|
||||
struct ib_mad_hdr mad_hdr;
|
||||
uint8_t data[232];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
struct ib_mad_guid_info {
|
||||
struct ib_mad_hdr mad_hdr;
|
||||
uint32_t mkey[2];
|
||||
uint32_t reserved[8];
|
||||
uint8_t gid_local[8];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
struct ib_mad_port_info {
|
||||
struct ib_mad_hdr mad_hdr;
|
||||
uint32_t mkey[2];
|
||||
uint32_t reserved[8];
|
||||
uint32_t mkey2[2];
|
||||
uint8_t gid_prefix[8];
|
||||
uint16_t lid;
|
||||
uint16_t mastersm_lid;
|
||||
uint32_t cap_mask;
|
||||
uint16_t diag_code;
|
||||
uint16_t mkey_lease_period;
|
||||
uint8_t local_port_num;
|
||||
uint8_t link_width_enabled;
|
||||
uint8_t link_width_supported;
|
||||
uint8_t link_width_active;
|
||||
uint8_t port_state__link_speed_supported;
|
||||
uint8_t link_down_def_state__port_phys_state;
|
||||
uint8_t lmc__r1__mkey_prot_bits;
|
||||
uint8_t link_speed_enabled__link_speed_active;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
struct ib_mad_pkey_table {
|
||||
struct ib_mad_hdr mad_hdr;
|
||||
uint32_t mkey[2];
|
||||
uint32_t reserved[8];
|
||||
uint16_t pkey[16][2];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
struct ib_mad_path_record {
|
||||
struct ib_mad_hdr mad_hdr;
|
||||
struct ib_rmpp_hdr rmpp_hdr;
|
||||
struct ib_sa_hdr sa_hdr;
|
||||
uint32_t reserved0[2];
|
||||
struct ib_gid dgid;
|
||||
struct ib_gid sgid;
|
||||
uint16_t dlid;
|
||||
uint16_t slid;
|
||||
uint32_t hop_limit__flow_label__raw_traffic;
|
||||
uint32_t pkey__numb_path__reversible__tclass;
|
||||
uint8_t reserved1;
|
||||
uint8_t reserved__sl;
|
||||
uint8_t mtu_selector__mtu;
|
||||
uint8_t rate_selector__rate;
|
||||
uint32_t preference__packet_lifetime__packet_lifetime_selector;
|
||||
uint32_t reserved2[35];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
struct ib_mad_mc_member_record {
|
||||
struct ib_mad_hdr mad_hdr;
|
||||
struct ib_rmpp_hdr rmpp_hdr;
|
||||
struct ib_sa_hdr sa_hdr;
|
||||
struct ib_gid mgid;
|
||||
struct ib_gid port_gid;
|
||||
uint32_t qkey;
|
||||
uint16_t mlid;
|
||||
uint8_t mtu_selector__mtu;
|
||||
uint8_t tclass;
|
||||
uint16_t pkey;
|
||||
uint8_t rate_selector__rate;
|
||||
uint8_t packet_lifetime_selector__packet_lifetime;
|
||||
uint32_t sl__flow_label__hop_limit;
|
||||
uint8_t scope__join_state;
|
||||
uint8_t proxy_join__reserved;
|
||||
uint16_t reserved0;
|
||||
uint32_t reserved1[37];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
union ib_mad {
|
||||
struct ib_mad_hdr mad_hdr;
|
||||
struct ib_mad_data data;
|
||||
struct ib_mad_guid_info guid_info;
|
||||
struct ib_mad_port_info port_info;
|
||||
struct ib_mad_pkey_table pkey_table;
|
||||
struct ib_mad_path_record path_record;
|
||||
struct ib_mad_mc_member_record mc_member_record;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
#endif /* _GPXE_INFINIBAND_H */
|
78
src/include/gpxe/ipoib.h
Normal file
78
src/include/gpxe/ipoib.h
Normal file
@ -0,0 +1,78 @@
|
||||
#ifndef _GPXE_IPOIB_H
|
||||
#define _GPXE_IPOIB_H
|
||||
|
||||
/** @file
|
||||
*
|
||||
* IP over Infiniband
|
||||
*/
|
||||
|
||||
#include <gpxe/infiniband.h>
|
||||
|
||||
/** IPoIB MAC address length */
|
||||
#define IPOIB_ALEN 20
|
||||
|
||||
/** An IPoIB MAC address */
|
||||
struct ipoib_mac {
|
||||
/** Queue pair number
|
||||
*
|
||||
* MSB must be zero; QPNs are only 24-bit.
|
||||
*/
|
||||
uint32_t qpn;
|
||||
/** Port GID */
|
||||
struct ib_gid gid;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
/** IPoIB link-layer header length */
|
||||
#define IPOIB_HLEN 24
|
||||
|
||||
/**
|
||||
* IPoIB link-layer header pseudo portion
|
||||
*
|
||||
* This part doesn't actually exist on the wire, but it provides a
|
||||
* convenient way to fit into the typical network device model.
|
||||
*/
|
||||
struct ipoib_pseudo_hdr {
|
||||
/** Peer address */
|
||||
struct ipoib_mac peer;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
/** IPoIB link-layer header real portion */
|
||||
struct ipoib_real_hdr {
|
||||
/** Network-layer protocol */
|
||||
uint16_t proto;
|
||||
/** Reserved, must be zero */
|
||||
uint16_t reserved;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
/** An IPoIB link-layer header */
|
||||
struct ipoib_hdr {
|
||||
/** Pseudo portion */
|
||||
struct ipoib_pseudo_hdr pseudo;
|
||||
/** Real portion */
|
||||
struct ipoib_real_hdr real;
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
extern struct ll_protocol ipoib_protocol;
|
||||
|
||||
extern const char * ipoib_ntoa ( const void *ll_addr );
|
||||
|
||||
/**
|
||||
* Allocate IPoIB device
|
||||
*
|
||||
* @v priv_size Size of driver private data
|
||||
* @ret netdev Network device, or NULL
|
||||
*/
|
||||
static inline struct net_device * alloc_ipoibdev ( size_t priv_size ) {
|
||||
struct net_device *netdev;
|
||||
|
||||
netdev = alloc_netdev ( priv_size );
|
||||
if ( netdev ) {
|
||||
netdev->ll_protocol = &ipoib_protocol;
|
||||
}
|
||||
return netdev;
|
||||
}
|
||||
|
||||
extern int ipoib_probe ( struct ib_device *ibdev );
|
||||
extern void ipoib_remove ( struct ib_device *ibdev );
|
||||
|
||||
#endif /* _GPXE_IPOIB_H */
|
@ -275,7 +275,8 @@ struct tcp_options {
|
||||
* actually use 65536, we use a window size of (65536-4) to ensure
|
||||
* that payloads remain dword-aligned.
|
||||
*/
|
||||
#define TCP_MAX_WINDOW_SIZE ( 65536 - 4 )
|
||||
//#define TCP_MAX_WINDOW_SIZE ( 65536 - 4 )
|
||||
#define TCP_MAX_WINDOW_SIZE 4096
|
||||
|
||||
/**
|
||||
* Path MTU
|
||||
|
210
src/net/infiniband.c
Normal file
210
src/net/infiniband.c
Normal file
@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <byteswap.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
#include <gpxe/list.h>
|
||||
#include <gpxe/if_arp.h>
|
||||
#include <gpxe/netdevice.h>
|
||||
#include <gpxe/iobuf.h>
|
||||
#include <gpxe/infiniband.h>
|
||||
|
||||
/** @file
|
||||
*
|
||||
* Infiniband protocol
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* Create completion queue
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v num_cqes Number of completion queue entries
|
||||
* @ret cq New completion queue
|
||||
*/
|
||||
struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev,
|
||||
unsigned int num_cqes ) {
|
||||
struct ib_completion_queue *cq;
|
||||
int rc;
|
||||
|
||||
DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
|
||||
|
||||
/* Allocate and initialise data structure */
|
||||
cq = zalloc ( sizeof ( *cq ) );
|
||||
if ( ! cq )
|
||||
return NULL;
|
||||
cq->num_cqes = num_cqes;
|
||||
INIT_LIST_HEAD ( &cq->work_queues );
|
||||
|
||||
/* Perform device-specific initialisation and get CQN */
|
||||
if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
|
||||
DBGC ( ibdev, "IBDEV %p could not initialise completion "
|
||||
"queue: %s\n", ibdev, strerror ( rc ) );
|
||||
free ( cq );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
|
||||
"with CQN %#lx\n", ibdev, num_cqes, cq, cq->dev_priv, cq->cqn );
|
||||
return cq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy completion queue
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v cq Completion queue
|
||||
*/
|
||||
void ib_destroy_cq ( struct ib_device *ibdev,
|
||||
struct ib_completion_queue *cq ) {
|
||||
DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
|
||||
ibdev, cq->cqn );
|
||||
assert ( list_empty ( &cq->work_queues ) );
|
||||
ibdev->op->destroy_cq ( ibdev, cq );
|
||||
free ( cq );
|
||||
}
|
||||
|
||||
/**
|
||||
* Create queue pair
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v num_send_wqes Number of send work queue entries
|
||||
* @v send_cq Send completion queue
|
||||
* @v num_recv_wqes Number of receive work queue entries
|
||||
* @v recv_cq Receive completion queue
|
||||
* @v qkey Queue key
|
||||
* @ret qp Queue pair
|
||||
*/
|
||||
struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
|
||||
unsigned int num_send_wqes,
|
||||
struct ib_completion_queue *send_cq,
|
||||
unsigned int num_recv_wqes,
|
||||
struct ib_completion_queue *recv_cq,
|
||||
unsigned long qkey ) {
|
||||
struct ib_queue_pair *qp;
|
||||
size_t total_size;
|
||||
int rc;
|
||||
|
||||
DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
|
||||
|
||||
/* Allocate and initialise data structure */
|
||||
total_size = ( sizeof ( *qp ) +
|
||||
( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
|
||||
( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
|
||||
qp = zalloc ( total_size );
|
||||
if ( ! qp )
|
||||
return NULL;
|
||||
qp->qkey = qkey;
|
||||
qp->send.qp = qp;
|
||||
qp->send.is_send = 1;
|
||||
qp->send.cq = send_cq;
|
||||
list_add ( &qp->send.list, &send_cq->work_queues );
|
||||
qp->send.num_wqes = num_send_wqes;
|
||||
qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
|
||||
qp->recv.qp = qp;
|
||||
qp->recv.cq = recv_cq;
|
||||
list_add ( &qp->recv.list, &recv_cq->work_queues );
|
||||
qp->recv.num_wqes = num_recv_wqes;
|
||||
qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
|
||||
( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
|
||||
|
||||
/* Perform device-specific initialisation and get QPN */
|
||||
if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
|
||||
DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
|
||||
"%s\n", ibdev, strerror ( rc ) );
|
||||
free ( qp );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
|
||||
ibdev, qp, qp->dev_priv, qp->qpn );
|
||||
DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
|
||||
ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
|
||||
qp->recv.iobufs );
|
||||
DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
|
||||
ibdev, qp->qpn, num_send_wqes, qp->recv.iobufs,
|
||||
( ( ( void * ) qp ) + total_size ) );
|
||||
return qp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy queue pair
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
* @v qp Queue pair
|
||||
*/
|
||||
void ib_destroy_qp ( struct ib_device *ibdev,
|
||||
struct ib_queue_pair *qp ) {
|
||||
DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n",
|
||||
ibdev, qp->qpn );
|
||||
ibdev->op->destroy_qp ( ibdev, qp );
|
||||
list_del ( &qp->send.list );
|
||||
list_del ( &qp->recv.list );
|
||||
free ( qp );
|
||||
}
|
||||
|
||||
/**
|
||||
* Find work queue belonging to completion queue
|
||||
*
|
||||
* @v cq Completion queue
|
||||
* @v qpn Queue pair number
|
||||
* @v is_send Find send work queue (rather than receive)
|
||||
* @ret wq Work queue, or NULL if not found
|
||||
*/
|
||||
struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
|
||||
unsigned long qpn, int is_send ) {
|
||||
struct ib_work_queue *wq;
|
||||
|
||||
list_for_each_entry ( wq, &cq->work_queues, list ) {
|
||||
if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
|
||||
return wq;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate Infiniband device
|
||||
*
|
||||
* @v priv_size Size of private data area
|
||||
* @ret ibdev Infiniband device, or NULL
|
||||
*/
|
||||
struct ib_device * alloc_ibdev ( size_t priv_size ) {
|
||||
struct ib_device *ibdev;
|
||||
size_t total_len;
|
||||
|
||||
total_len = ( sizeof ( *ibdev ) + priv_size );
|
||||
ibdev = zalloc ( total_len );
|
||||
if ( ibdev ) {
|
||||
ibdev->dev_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
|
||||
}
|
||||
return ibdev;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free Infiniband device
|
||||
*
|
||||
* @v ibdev Infiniband device
|
||||
*/
|
||||
void free_ibdev ( struct ib_device *ibdev ) {
|
||||
free ( ibdev );
|
||||
}
|
Loading…
Reference in New Issue
Block a user