diff --git a/src/Makefile b/src/Makefile index 0682dc5a..5327a520 100644 --- a/src/Makefile +++ b/src/Makefile @@ -152,6 +152,7 @@ SRCDIRS += drivers/scsi SRCDIRS += drivers/ata SRCDIRS += drivers/nvs SRCDIRS += drivers/bitbash +SRCDIRS += drivers/infiniband SRCDIRS += interface/pxe SRCDIRS += tests SRCDIRS += crypto crypto/axtls crypto/matrixssl diff --git a/src/drivers/infiniband/MT25218_PRM.h b/src/drivers/infiniband/MT25218_PRM.h new file mode 100644 index 00000000..19ca92cd --- /dev/null +++ b/src/drivers/infiniband/MT25218_PRM.h @@ -0,0 +1,3460 @@ +/* + This software is available to you under a choice of one of two + licenses. You may choose to be licensed under the terms of the GNU + General Public License (GPL) Version 2, available at + , or the OpenIB.org BSD + license, available in the LICENSE.TXT file accompanying this + software. These details are also available at + . + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. +*/ + +/*** + *** This file was generated at "Tue Nov 22 15:21:23 2005" + *** by: + *** % csp_bf -copyright=/mswg/misc/license-header.txt -prefix arbelprm_ -bits -fixnames MT25218_PRM.csp + ***/ + +#ifndef H_prefix_arbelprm_bits_fixnames_MT25218_PRM_csp_H +#define H_prefix_arbelprm_bits_fixnames_MT25218_PRM_csp_H + +/* UD Address Vector */ + +struct arbelprm_ud_address_vector_st { /* Little Endian */ + pseudo_bit_t pd[0x00018]; /* Protection Domain */ + pseudo_bit_t port_number[0x00002]; /* Port number + 1 - Port 1 + 2 - Port 2 + other - reserved */ + pseudo_bit_t reserved0[0x00006]; +/* -------------- */ + pseudo_bit_t rlid[0x00010]; /* Remote (Destination) LID */ + pseudo_bit_t my_lid_path_bits[0x00007];/* Source LID - the lower 7 bits (upper bits are taken from PortInfo) */ + pseudo_bit_t g[0x00001]; /* Global address enable - if set, GRH will be formed for packet header */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t hop_limit[0x00008]; /* IPv6 hop limit */ + pseudo_bit_t max_stat_rate[0x00003];/* Maximum static rate control. + 0 - 4X injection rate + 1 - 1X injection rate + other - reserved + */ + pseudo_bit_t reserved2[0x00001]; + pseudo_bit_t msg[0x00002]; /* Max Message size, size is 256*2^MSG bytes */ + pseudo_bit_t reserved3[0x00002]; + pseudo_bit_t mgid_index[0x00006]; /* Index to port GID table + mgid_index = (port_number-1) * 2^log_max_gid + gid_index + Where: + 1. log_max_gid is taken from QUERY_DEV_LIM command + 2. gid_index is the index to the GID table */ + pseudo_bit_t reserved4[0x0000a]; +/* -------------- */ + pseudo_bit_t flow_label[0x00014]; /* IPv6 flow label */ + pseudo_bit_t tclass[0x00008]; /* IPv6 TClass */ + pseudo_bit_t sl[0x00004]; /* InfiniBand Service Level (SL) */ +/* -------------- */ + pseudo_bit_t rgid_127_96[0x00020]; /* Remote GID[127:96] */ +/* -------------- */ + pseudo_bit_t rgid_95_64[0x00020]; /* Remote GID[95:64] */ +/* -------------- */ + pseudo_bit_t rgid_63_32[0x00020]; /* Remote GID[63:32] */ +/* -------------- */ + pseudo_bit_t rgid_31_0[0x00020]; /* Remote GID[31:0] if G bit is set. Must be set to 0x2 if G bit is cleared. */ +/* -------------- */ +}; + +/* Send doorbell */ + +struct arbelprm_send_doorbell_st { /* Little Endian */ + pseudo_bit_t nopcode[0x00005]; /* Opcode of descriptor to be executed */ + pseudo_bit_t f[0x00001]; /* Fence bit. If set, descriptor is fenced */ + pseudo_bit_t reserved0[0x00002]; + pseudo_bit_t wqe_counter[0x00010]; /* Modulo-64K counter of WQEs posted to the QP since its creation excluding the newly posted WQEs in this doorbell. Should be zero for the first doorbell on the QP */ + pseudo_bit_t wqe_cnt[0x00008]; /* Number of WQEs posted with this doorbell. Must be grater then zero. */ +/* -------------- */ + pseudo_bit_t nds[0x00006]; /* Next descriptor size (in 16-byte chunks) */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t qpn[0x00018]; /* QP number this doorbell is rung on */ +/* -------------- */ +}; + +/* ACCESS_LAM_inject_errors_input_modifier */ + +struct arbelprm_access_lam_inject_errors_input_modifier_st { /* Little Endian */ + pseudo_bit_t index3[0x00007]; + pseudo_bit_t q3[0x00001]; + pseudo_bit_t index2[0x00007]; + pseudo_bit_t q2[0x00001]; + pseudo_bit_t index1[0x00007]; + pseudo_bit_t q1[0x00001]; + pseudo_bit_t index0[0x00007]; + pseudo_bit_t q0[0x00001]; +/* -------------- */ +}; + +/* ACCESS_LAM_inject_errors_input_parameter */ + +struct arbelprm_access_lam_inject_errors_input_parameter_st { /* Little Endian */ + pseudo_bit_t ba[0x00002]; /* Bank Address */ + pseudo_bit_t da[0x00002]; /* Dimm Address */ + pseudo_bit_t reserved0[0x0001c]; +/* -------------- */ + pseudo_bit_t ra[0x00010]; /* Row Address */ + pseudo_bit_t ca[0x00010]; /* Column Address */ +/* -------------- */ +}; + +/* */ + +struct arbelprm_recv_wqe_segment_next_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00006]; + pseudo_bit_t nda_31_6[0x0001a]; /* Next WQE address, low 32 bit. WQE address must be aligned to 64-byte boundary (6 LSB are forced ZERO). */ +/* -------------- */ + pseudo_bit_t nds[0x00006]; /* Next WQE size in OctoWords (16 bytes). + Zero value in NDS field signals end of WQEs? chain. + */ + pseudo_bit_t reserved1[0x0001a]; +/* -------------- */ +}; + +/* Send wqe segment data inline */ + +struct arbelprm_wqe_segment_data_inline_st { /* Little Endian */ + pseudo_bit_t byte_count[0x0000a]; /* Not including padding for 16Byte chunks */ + pseudo_bit_t reserved0[0x00015]; + pseudo_bit_t always1[0x00001]; +/* -------------- */ + pseudo_bit_t data[0x00018]; /* Data may be more this segment size - in 16Byte chunks */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t reserved2[0x00040]; +/* -------------- */ +}; + +/* Send wqe segment data ptr */ + +struct arbelprm_wqe_segment_data_ptr_st { /* Little Endian */ + pseudo_bit_t byte_count[0x0001f]; + pseudo_bit_t always0[0x00001]; +/* -------------- */ + pseudo_bit_t l_key[0x00020]; +/* -------------- */ + pseudo_bit_t local_address_h[0x00020]; +/* -------------- */ + pseudo_bit_t local_address_l[0x00020]; +/* -------------- */ +}; + +/* Send wqe segment rd */ + +struct arbelprm_local_invalidate_segment_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ + pseudo_bit_t mem_key[0x00018]; + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t reserved2[0x000a0]; +/* -------------- */ +}; + +/* Fast_Registration_Segment */ + +struct arbelprm_fast_registration_segment_st { /* Little Endian */ + pseudo_bit_t reserved0[0x0001b]; + pseudo_bit_t lr[0x00001]; /* If set - Local Read access will be enabled */ + pseudo_bit_t lw[0x00001]; /* If set - Local Write access will be enabled */ + pseudo_bit_t rr[0x00001]; /* If set - Remote Read access will be enabled */ + pseudo_bit_t rw[0x00001]; /* If set - Remote Write access will be enabled */ + pseudo_bit_t a[0x00001]; /* If set - Remote Atomic access will be enabled */ +/* -------------- */ + pseudo_bit_t pbl_ptr_63_32[0x00020];/* Physical address pointer [63:32] to the physical buffer list */ +/* -------------- */ + pseudo_bit_t mem_key[0x00020]; /* Memory Key on which the fast registration is executed on. */ +/* -------------- */ + pseudo_bit_t page_size[0x00005]; /* Page size used for the region. Actual size is [4K]*2^Page_size bytes. + page_size should be less than 20. */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t zb[0x00001]; /* Zero Based Region */ + pseudo_bit_t pbl_ptr_31_8[0x00018]; /* Physical address pointer [31:8] to the physical buffer list */ +/* -------------- */ + pseudo_bit_t start_address_h[0x00020];/* Start Address[63:32] - Virtual Address where this region starts */ +/* -------------- */ + pseudo_bit_t start_address_l[0x00020];/* Start Address[31:0] - Virtual Address where this region starts */ +/* -------------- */ + pseudo_bit_t reg_len_h[0x00020]; /* Region Length[63:32] */ +/* -------------- */ + pseudo_bit_t reg_len_l[0x00020]; /* Region Length[31:0] */ +/* -------------- */ +}; + +/* Send wqe segment atomic */ + +struct arbelprm_wqe_segment_atomic_st { /* Little Endian */ + pseudo_bit_t swap_add_h[0x00020]; +/* -------------- */ + pseudo_bit_t swap_add_l[0x00020]; +/* -------------- */ + pseudo_bit_t compare_h[0x00020]; +/* -------------- */ + pseudo_bit_t compare_l[0x00020]; +/* -------------- */ +}; + +/* Send wqe segment remote address */ + +struct arbelprm_wqe_segment_remote_address_st { /* Little Endian */ + pseudo_bit_t remote_virt_addr_h[0x00020]; +/* -------------- */ + pseudo_bit_t remote_virt_addr_l[0x00020]; +/* -------------- */ + pseudo_bit_t rkey[0x00020]; +/* -------------- */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ +}; + +/* end wqe segment bind */ + +struct arbelprm_wqe_segment_bind_st { /* Little Endian */ + pseudo_bit_t reserved0[0x0001d]; + pseudo_bit_t rr[0x00001]; /* If set, Remote Read Enable for bound window. */ + pseudo_bit_t rw[0x00001]; /* If set, Remote Write Enable for bound window. + */ + pseudo_bit_t a[0x00001]; /* If set, Atomic Enable for bound window. */ +/* -------------- */ + pseudo_bit_t reserved1[0x0001e]; + pseudo_bit_t zb[0x00001]; /* If set, Window is Zero Based. */ + pseudo_bit_t type[0x00001]; /* Window type. + 0 - Type one window + 1 - Type two window + */ +/* -------------- */ + pseudo_bit_t new_rkey[0x00020]; /* The new RKey of window to bind */ +/* -------------- */ + pseudo_bit_t region_lkey[0x00020]; /* Local key of region, which window will be bound to */ +/* -------------- */ + pseudo_bit_t start_address_h[0x00020]; +/* -------------- */ + pseudo_bit_t start_address_l[0x00020]; +/* -------------- */ + pseudo_bit_t length_h[0x00020]; +/* -------------- */ + pseudo_bit_t length_l[0x00020]; +/* -------------- */ +}; + +/* Send wqe segment ud */ + +struct arbelprm_wqe_segment_ud_st { /* Little Endian */ + struct arbelprm_ud_address_vector_st ud_address_vector;/* UD Address Vector */ +/* -------------- */ + pseudo_bit_t destination_qp[0x00018]; + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t q_key[0x00020]; +/* -------------- */ + pseudo_bit_t reserved1[0x00040]; +/* -------------- */ +}; + +/* Send wqe segment rd */ + +struct arbelprm_wqe_segment_rd_st { /* Little Endian */ + pseudo_bit_t destination_qp[0x00018]; + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t q_key[0x00020]; +/* -------------- */ + pseudo_bit_t reserved1[0x00040]; +/* -------------- */ +}; + +/* Send wqe segment ctrl */ + +struct arbelprm_wqe_segment_ctrl_send_st { /* Little Endian */ + pseudo_bit_t always1[0x00001]; + pseudo_bit_t s[0x00001]; /* Solicited Event bit. If set, SE (Solicited Event) bit is set in the (last packet of) message. */ + pseudo_bit_t e[0x00001]; /* Event bit. If set, event is generated upon WQE?s completion, if QP is allowed to generate an event. Every WQE with E-bit set generates an event. The C bit must be set on unsignalled QPs if the E bit is set. */ + pseudo_bit_t c[0x00001]; /* Completion Queue bit. Valid for unsignalled QPs only. If set, the CQ is updated upon WQE?s completion */ + pseudo_bit_t ip[0x00001]; /* When set, InfiniHost III Ex will calculate the IP checksum of the IP header that is present immediately after the IPoverIB encapsulation header. In the case of multiple headers (encapsulation), InfiniHost III Ex will calculate the checksum only for the first IP header following the IPoverIB encapsulation header. Not Valid for IPv6 packets */ + pseudo_bit_t tcp_udp[0x00001]; /* When set, InfiniHost III Ex will calculate the TCP/UDP checksum of the packet that is present immediately after the IP header. In the case of multiple headers (encapsulation), InfiniHost III Ex will calculate the checksum only for the first TCP header following the IP header. This bit may be set only if the entire TCP/UDP segment is present in one IB packet */ + pseudo_bit_t reserved0[0x00001]; + pseudo_bit_t so[0x00001]; /* Strong Ordering - when set, the WQE will be executed only after all previous WQEs have been executed. Can be set for RC WQEs only. This bit must be set in type two BIND, Fast Registration and Local invalidate operations. */ + pseudo_bit_t reserved1[0x00018]; +/* -------------- */ + pseudo_bit_t immediate[0x00020]; /* If the OpCode encodes an operation with Immediate (RDMA-write/SEND), This field will hold the Immediate data to be sent. If the OpCode encodes send and invalidate operations, this field holds the Invalidation key to be inserted into the packet; otherwise, this field is reserved. */ +/* -------------- */ +}; + +/* Send wqe segment next */ + +struct arbelprm_wqe_segment_next_st { /* Little Endian */ + pseudo_bit_t nopcode[0x00005]; /* Next Opcode: OpCode to be used in the next WQE. Encodes the type of operation to be executed on the QP: + ?00000? - NOP. WQE with this opcode creates a completion, but does nothing else + ?01000? - RDMA-write + ?01001? - RDMA-Write with Immediate + ?10000? - RDMA-read + ?10001? - Atomic Compare & swap + ?10010? - Atomic Fetch & Add + ?11000? - Bind memory window + + The encoding for the following operations depends on the QP type: + For RC, UC and RD QP: + ?01010? - SEND + ?01011? - SEND with Immediate + + For UD QP: + the encoding depends on the values of bit[31] of the Q_key field in the Datagram Segment (see Table 39, ?Unreliable Datagram Segment Format - Pointers,? on page 101) of + both the current WQE and the next WQE, as follows: + + If the last WQE Q_Key bit[31] is clear and the next WQE Q_key bit[31] is set : + ?01000? - SEND + ?01001? - SEND with Immediate + + otherwise (if the next WQE Q_key bit[31] is cleared, or the last WQE Q_Key bit[31] is set): + ?01010? - SEND + ?01011? - SEND with Immediate + + All other opcode values are RESERVED, and will result in invalid operation execution. */ + pseudo_bit_t reserved0[0x00001]; + pseudo_bit_t nda_31_6[0x0001a]; /* Next WQE address, low 32 bit. WQE address must be aligned to 64-byte boundary (6 LSB are forced ZERO). */ +/* -------------- */ + pseudo_bit_t nds[0x00006]; /* Next WQE size in OctoWords (16 bytes). + Zero value in NDS field signals end of WQEs? chain. + */ + pseudo_bit_t f[0x00001]; /* Fence bit. If set, next WQE will start execution only after all previous Read/Atomic WQEs complete. */ + pseudo_bit_t always1[0x00001]; + pseudo_bit_t reserved1[0x00018]; +/* -------------- */ +}; + +/* Address Path */ + +struct arbelprm_address_path_st { /* Little Endian */ + pseudo_bit_t pkey_index[0x00007]; /* PKey table index */ + pseudo_bit_t reserved0[0x00011]; + pseudo_bit_t port_number[0x00002]; /* Specific port associated with this QP/EE. + 1 - Port 1 + 2 - Port 2 + other - reserved */ + pseudo_bit_t reserved1[0x00006]; +/* -------------- */ + pseudo_bit_t rlid[0x00010]; /* Remote (Destination) LID */ + pseudo_bit_t my_lid_path_bits[0x00007];/* Source LID - the lower 7 bits (upper bits are taken from PortInfo) */ + pseudo_bit_t g[0x00001]; /* Global address enable - if set, GRH will be formed for packet header */ + pseudo_bit_t reserved2[0x00005]; + pseudo_bit_t rnr_retry[0x00003]; /* RNR retry count (see C9-132 in IB spec Vol 1) + 0-6 - number of retries + 7 - infinite */ +/* -------------- */ + pseudo_bit_t hop_limit[0x00008]; /* IPv6 hop limit */ + pseudo_bit_t max_stat_rate[0x00003];/* Maximum static rate control. + 0 - 100% injection rate + 1 - 25% injection rate + 2 - 12.5% injection rate + 3 - 50% injection rate + other - reserved */ + pseudo_bit_t reserved3[0x00005]; + pseudo_bit_t mgid_index[0x00006]; /* Index to port GID table */ + pseudo_bit_t reserved4[0x00005]; + pseudo_bit_t ack_timeout[0x00005]; /* Local ACK timeout - Transport timer for activation of retransmission mechanism. Refer to IB spec Vol1 9.7.6.1.3 for further details. + The transport timer is set to 4.096us*2^ack_timeout, if ack_timeout is 0 then transport timer is disabled. */ +/* -------------- */ + pseudo_bit_t flow_label[0x00014]; /* IPv6 flow label */ + pseudo_bit_t tclass[0x00008]; /* IPv6 TClass */ + pseudo_bit_t sl[0x00004]; /* InfiniBand Service Level (SL) */ +/* -------------- */ + pseudo_bit_t rgid_127_96[0x00020]; /* Remote GID[127:96] */ +/* -------------- */ + pseudo_bit_t rgid_95_64[0x00020]; /* Remote GID[95:64] */ +/* -------------- */ + pseudo_bit_t rgid_63_32[0x00020]; /* Remote GID[63:32] */ +/* -------------- */ + pseudo_bit_t rgid_31_0[0x00020]; /* Remote GID[31:0] */ +/* -------------- */ +}; + +/* HCA Command Register (HCR) */ + +struct arbelprm_hca_command_register_st { /* Little Endian */ + pseudo_bit_t in_param_h[0x00020]; /* Input Parameter: parameter[63:32] or pointer[63:32] to input mailbox (see command description) */ +/* -------------- */ + pseudo_bit_t in_param_l[0x00020]; /* Input Parameter: parameter[31:0] or pointer[31:0] to input mailbox (see command description) */ +/* -------------- */ + pseudo_bit_t input_modifier[0x00020];/* Input Parameter Modifier */ +/* -------------- */ + pseudo_bit_t out_param_h[0x00020]; /* Output Parameter: parameter[63:32] or pointer[63:32] to output mailbox (see command description) */ +/* -------------- */ + pseudo_bit_t out_param_l[0x00020]; /* Output Parameter: parameter[31:0] or pointer[31:0] to output mailbox (see command description) */ +/* -------------- */ + pseudo_bit_t reserved0[0x00010]; + pseudo_bit_t token[0x00010]; /* Software assigned token to the command, to uniquely identify it. The token is returned to the software in the EQE reported. */ +/* -------------- */ + pseudo_bit_t opcode[0x0000c]; /* Command opcode */ + pseudo_bit_t opcode_modifier[0x00004];/* Opcode Modifier, see specific description for each command. */ + pseudo_bit_t reserved1[0x00006]; + pseudo_bit_t e[0x00001]; /* Event Request + 0 - Don't report event (software will poll the GO bit) + 1 - Report event to EQ when the command completes */ + pseudo_bit_t go[0x00001]; /* Go (0=Software ownership for the HCR, 1=Hardware ownership for the HCR) + Software can write to the HCR only if Go bit is cleared. + Software must set the Go bit to trigger the HW to execute the command. Software must not write to this register value other than 1 for the Go bit. */ + pseudo_bit_t status[0x00008]; /* Command execution status report. Valid only if command interface in under SW ownership (Go bit is cleared) + 0 - command completed without error. If different than zero, command execution completed with error. Syndrom encoding is depended on command executed and is defined for each command */ +/* -------------- */ +}; + +/* CQ Doorbell */ + +struct arbelprm_cq_cmd_doorbell_st { /* Little Endian */ + pseudo_bit_t cqn[0x00018]; /* CQ number accessed */ + pseudo_bit_t cmd[0x00003]; /* Command to be executed on CQ + 0x0 - Reserved + 0x1 - Request notification for next Solicited completion event. CQ_param specifies the current CQ Consumer Counter. + 0x2 - Request notification for next Solicited or Unsolicited completion event. CQ_param specifies the current CQ Consumer Counter. + 0x3 - Request notification for multiple completions (Arm-N). CQ_param specifies the value of the CQ Counter that when reached by HW (i.e. HW generates a CQE into this Counter) Event will be generated + Other - Reserved */ + pseudo_bit_t reserved0[0x00001]; + pseudo_bit_t cmd_sn[0x00002]; /* Command Sequence Number - This field should be incremented upon receiving completion notification of the respective CQ. + This transition is done by ringing Request notification for next Solicited, Request notification for next Solicited or Unsolicited + completion or Request notification for multiple completions doorbells after receiving completion notification. + This field is initialized to Zero */ + pseudo_bit_t reserved1[0x00002]; +/* -------------- */ + pseudo_bit_t cq_param[0x00020]; /* parameter to be used by CQ command */ +/* -------------- */ +}; + +/* RD-send doorbell */ + +struct arbelprm_rd_send_doorbell_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t een[0x00018]; /* End-to-end context number (reliable datagram) + Must be zero for Nop and Bind operations */ +/* -------------- */ + pseudo_bit_t reserved1[0x00008]; + pseudo_bit_t qpn[0x00018]; /* QP number this doorbell is rung on */ +/* -------------- */ + struct arbelprm_send_doorbell_st send_doorbell;/* Send Parameters */ +/* -------------- */ +}; + +/* Multicast Group Member QP */ + +struct arbelprm_mgmqp_st { /* Little Endian */ + pseudo_bit_t qpn_i[0x00018]; /* QPN_i: QP number which is a member in this multicast group. Valid only if Qi bit is set. Length of the QPN_i list is set in INIT_HCA */ + pseudo_bit_t reserved0[0x00007]; + pseudo_bit_t qi[0x00001]; /* Qi: QPN_i is valid */ +/* -------------- */ +}; + +/* vsd */ + +struct arbelprm_vsd_st { /* Little Endian */ + pseudo_bit_t vsd_dw0[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw1[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw2[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw3[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw4[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw5[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw6[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw7[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw8[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw9[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw10[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw11[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw12[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw13[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw14[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw15[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw16[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw17[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw18[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw19[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw20[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw21[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw22[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw23[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw24[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw25[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw26[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw27[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw28[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw29[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw30[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw31[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw32[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw33[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw34[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw35[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw36[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw37[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw38[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw39[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw40[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw41[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw42[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw43[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw44[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw45[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw46[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw47[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw48[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw49[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw50[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw51[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw52[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw53[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw54[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw55[0x00020]; +/* -------------- */ +}; + +/* ACCESS_LAM_inject_errors */ + +struct arbelprm_access_lam_inject_errors_st { /* Little Endian */ + struct arbelprm_access_lam_inject_errors_input_parameter_st access_lam_inject_errors_input_parameter; +/* -------------- */ + struct arbelprm_access_lam_inject_errors_input_modifier_st access_lam_inject_errors_input_modifier; +/* -------------- */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ +}; + +/* Logical DIMM Information */ + +struct arbelprm_dimminfo_st { /* Little Endian */ + pseudo_bit_t dimmsize[0x00010]; /* Size of DIMM in units of 2^20 Bytes. This value is valid only when DIMMStatus is 0. */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t dimmstatus[0x00001]; /* DIMM Status + 0 - Enabled + 1 - Disabled + */ + pseudo_bit_t dh[0x00001]; /* When set, the DIMM is Hidden and can not be accessed from the PCI bus. */ + pseudo_bit_t wo[0x00001]; /* When set, the DIMM is write only. + If data integrity is configured (other than none), the DIMM must be + only targeted by write transactions where the address and size are multiples of 16 bytes. */ + pseudo_bit_t reserved1[0x00005]; +/* -------------- */ + pseudo_bit_t spd[0x00001]; /* 0 - DIMM SPD was read from DIMM + 1 - DIMM SPD was read from InfiniHost-III-EX NVMEM */ + pseudo_bit_t sladr[0x00003]; /* SPD Slave Address 3 LSBits. + Valid only if spd bit is 0. */ + pseudo_bit_t sock_num[0x00002]; /* DIMM socket number (for double sided DIMM one of the two numbers will be reported) */ + pseudo_bit_t syn[0x00004]; /* Error syndrome (valid regardless of status value) + 0 - DIMM has no error + 1 - SPD error (e.g. checksum error, no response, error while reading) + 2 - DIMM out of bounds (e.g. DIMM rows number is not between 7 and 14, DIMM type is not 2) + 3 - DIMM conflict (e.g. mix of registered and unbuffered DIMMs, CAS latency conflict) + 5 - DIMM size trimmed due to configuration (size exceeds) + other - Error, reserved + */ + pseudo_bit_t reserved2[0x00016]; +/* -------------- */ + pseudo_bit_t reserved3[0x00040]; +/* -------------- */ + pseudo_bit_t dimm_start_adr_h[0x00020];/* DIMM memory start address [63:32]. This value is valid only when DIMMStatus is 0. */ +/* -------------- */ + pseudo_bit_t dimm_start_adr_l[0x00020];/* DIMM memory start address [31:0]. This value is valid only when DIMMStatus is 0. */ +/* -------------- */ + pseudo_bit_t reserved4[0x00040]; +/* -------------- */ +}; + +/* UAR Parameters */ + +struct arbelprm_uar_params_st { /* Little Endian */ + pseudo_bit_t uar_base_addr_h[0x00020];/* UAR Base (pyhsical) Address [63:32] (QUERY_HCA only) */ +/* -------------- */ + pseudo_bit_t reserved0[0x00014]; + pseudo_bit_t uar_base_addr_l[0x0000c];/* UAR Base (pyhsical) Address [31:20] (QUERY_HCA only) */ +/* -------------- */ + pseudo_bit_t uar_page_sz[0x00008]; /* This field defines the size of each UAR page. + Size of UAR Page is 4KB*2^UAR_Page_Size */ + pseudo_bit_t log_max_uars[0x00004]; /* Number of UARs supported is 2^log_max_UARs */ + pseudo_bit_t reserved1[0x00004]; + pseudo_bit_t log_uar_entry_sz[0x00006];/* Size of UAR Context entry is 2^log_uar_sz in 4KByte pages */ + pseudo_bit_t reserved2[0x0000a]; +/* -------------- */ + pseudo_bit_t reserved3[0x00020]; +/* -------------- */ + pseudo_bit_t uar_scratch_base_addr_h[0x00020];/* Base address of UAR scratchpad [63:32]. + Number of entries in table is 2^log_max_uars. + Table must be aligned to its size */ +/* -------------- */ + pseudo_bit_t uar_scratch_base_addr_l[0x00020];/* Base address of UAR scratchpad [31:0]. + Number of entries in table is 2^log_max_uars. + Table must be aligned to its size. */ +/* -------------- */ + pseudo_bit_t uar_context_base_addr_h[0x00020];/* Base address of UAR Context [63:32]. + Number of entries in table is 2^log_max_uars. + Table must be aligned to its size. */ +/* -------------- */ + pseudo_bit_t uar_context_base_addr_l[0x00020];/* Base address of UAR Context [31:0]. + Number of entries in table is 2^log_max_uars. + Table must be aligned to its size. */ +/* -------------- */ +}; + +/* Translation and Protection Tables Parameters */ + +struct arbelprm_tptparams_st { /* Little Endian */ + pseudo_bit_t mpt_base_adr_h[0x00020];/* MPT - Memory Protection Table base physical address [63:32]. + Entry size is 64 bytes. + Table must be aligned to its size. + Address may be set to 0xFFFFFFFF if address translation and protection is not supported. */ +/* -------------- */ + pseudo_bit_t mpt_base_adr_l[0x00020];/* MPT - Memory Protection Table base physical address [31:0]. + Entry size is 64 bytes. + Table must be aligned to its size. + Address may be set to 0xFFFFFFFF if address translation and protection is not supported. */ +/* -------------- */ + pseudo_bit_t log_mpt_sz[0x00006]; /* Log (base 2) of the number of region/windows entries in the MPT table. */ + pseudo_bit_t reserved0[0x00002]; + pseudo_bit_t pfto[0x00005]; /* Page Fault RNR Timeout - + The field returned in RNR Naks generated when a page fault is detected. + It has no effect when on-demand-paging is not used. */ + pseudo_bit_t reserved1[0x00013]; +/* -------------- */ + pseudo_bit_t reserved2[0x00020]; +/* -------------- */ + pseudo_bit_t mtt_base_addr_h[0x00020];/* MTT - Memory Translation table base physical address [63:32]. + Table must be aligned to its size. + Address may be set to 0xFFFFFFFF if address translation and protection is not supported. */ +/* -------------- */ + pseudo_bit_t mtt_base_addr_l[0x00020];/* MTT - Memory Translation table base physical address [31:0]. + Table must be aligned to its size. + Address may be set to 0xFFFFFFFF if address translation and protection is not supported. */ +/* -------------- */ + pseudo_bit_t reserved3[0x00040]; +/* -------------- */ +}; + +/* Multicast Support Parameters */ + +struct arbelprm_multicastparam_st { /* Little Endian */ + pseudo_bit_t mc_base_addr_h[0x00020];/* Base Address of the Multicast Table [63:32]. + The base address must be aligned to the entry size. + Address may be set to 0xFFFFFFFF if multicast is not supported. */ +/* -------------- */ + pseudo_bit_t mc_base_addr_l[0x00020];/* Base Address of the Multicast Table [31:0]. + The base address must be aligned to the entry size. + Address may be set to 0xFFFFFFFF if multicast is not supported. */ +/* -------------- */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ + pseudo_bit_t log_mc_table_entry_sz[0x00010];/* Log2 of the Size of multicast group member (MGM) entry. + Must be greater than 5 (to allow CTRL and GID sections). + That implies the number of QPs per MC table entry. */ + pseudo_bit_t reserved1[0x00010]; +/* -------------- */ + pseudo_bit_t mc_table_hash_sz[0x00011];/* Number of entries in multicast DGID hash table (must be power of 2) + INIT_HCA - the required number of entries + QUERY_HCA - the actual number of entries assigned by firmware (will be less than or equal to the amount required in INIT_HCA) */ + pseudo_bit_t reserved2[0x0000f]; +/* -------------- */ + pseudo_bit_t log_mc_table_sz[0x00005];/* Log2 of the overall number of MC entries in the MCG table (includes both hash and auxiliary tables) */ + pseudo_bit_t reserved3[0x00013]; + pseudo_bit_t mc_hash_fn[0x00003]; /* Multicast hash function + 0 - Default hash function + other - reserved */ + pseudo_bit_t reserved4[0x00005]; +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ +}; + +/* QPC/EEC/CQC/EQC/RDB Parameters */ + +struct arbelprm_qpcbaseaddr_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ + pseudo_bit_t qpc_base_addr_h[0x00020];/* QPC Base Address [63:32] + Table must be aligned on its size */ +/* -------------- */ + pseudo_bit_t log_num_of_qp[0x00005];/* Log base 2 of number of supported QPs */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t qpc_base_addr_l[0x00019];/* QPC Base Address [31:7] + Table must be aligned on its size */ +/* -------------- */ + pseudo_bit_t reserved2[0x00040]; +/* -------------- */ + pseudo_bit_t eec_base_addr_h[0x00020];/* EEC Base Address [63:32] + Table must be aligned on its size. + Address may be set to 0xFFFFFFFF if RD is not supported. */ +/* -------------- */ + pseudo_bit_t log_num_of_ee[0x00005];/* Log base 2 of number of supported EEs. */ + pseudo_bit_t reserved3[0x00002]; + pseudo_bit_t eec_base_addr_l[0x00019];/* EEC Base Address [31:7] + Table must be aligned on its size + Address may be set to 0xFFFFFFFF if RD is not supported. */ +/* -------------- */ + pseudo_bit_t srqc_base_addr_h[0x00020];/* SRQ Context Base Address [63:32] + Table must be aligned on its size + Address may be set to 0xFFFFFFFF if SRQ is not supported. */ +/* -------------- */ + pseudo_bit_t log_num_of_srq[0x00005];/* Log base 2 of number of supported SRQs. */ + pseudo_bit_t srqc_base_addr_l[0x0001b];/* SRQ Context Base Address [31:5] + Table must be aligned on its size + Address may be set to 0xFFFFFFFF if SRQ is not supported. */ +/* -------------- */ + pseudo_bit_t cqc_base_addr_h[0x00020];/* CQC Base Address [63:32] + Table must be aligned on its size */ +/* -------------- */ + pseudo_bit_t log_num_of_cq[0x00005];/* Log base 2 of number of supported CQs. */ + pseudo_bit_t reserved4[0x00001]; + pseudo_bit_t cqc_base_addr_l[0x0001a];/* CQC Base Address [31:6] + Table must be aligned on its size */ +/* -------------- */ + pseudo_bit_t reserved5[0x00040]; +/* -------------- */ + pseudo_bit_t eqpc_base_addr_h[0x00020];/* Extended QPC Base Address [63:32] + Table has same number of entries as QPC table. + Table must be aligned to entry size. */ +/* -------------- */ + pseudo_bit_t eqpc_base_addr_l[0x00020];/* Extended QPC Base Address [31:0] + Table has same number of entries as QPC table. + Table must be aligned to entry size. */ +/* -------------- */ + pseudo_bit_t reserved6[0x00040]; +/* -------------- */ + pseudo_bit_t eeec_base_addr_h[0x00020];/* Extended EEC Base Address [63:32] + Table has same number of entries as EEC table. + Table must be aligned to entry size. + Address may be set to 0xFFFFFFFF if RD is not supported. */ +/* -------------- */ + pseudo_bit_t eeec_base_addr_l[0x00020];/* Extended EEC Base Address [31:0] + Table has same number of entries as EEC table. + Table must be aligned to entry size. + Address may be set to 0xFFFFFFFF if RD is not supported. */ +/* -------------- */ + pseudo_bit_t reserved7[0x00040]; +/* -------------- */ + pseudo_bit_t eqc_base_addr_h[0x00020];/* EQC Base Address [63:32] + Address may be set to 0xFFFFFFFF if EQs are not supported. + Table must be aligned to entry size. */ +/* -------------- */ + pseudo_bit_t log_num_eq[0x00004]; /* Log base 2 of number of supported EQs. + Must be 6 or less in InfiniHost-III-EX. */ + pseudo_bit_t reserved8[0x00002]; + pseudo_bit_t eqc_base_addr_l[0x0001a];/* EQC Base Address [31:6] + Address may be set to 0xFFFFFFFF if EQs are not supported. + Table must be aligned to entry size. */ +/* -------------- */ + pseudo_bit_t reserved9[0x00040]; +/* -------------- */ + pseudo_bit_t rdb_base_addr_h[0x00020];/* Base address of table that holds remote read and remote atomic requests [63:32]. + Address may be set to 0xFFFFFFFF if remote RDMA reads are not supported. + Please refer to QP and EE chapter for further explanation on RDB allocation. */ +/* -------------- */ + pseudo_bit_t rdb_base_addr_l[0x00020];/* Base address of table that holds remote read and remote atomic requests [31:0]. + Table must be aligned to RDB entry size (32 bytes). + Address may be set to zero if remote RDMA reads are not supported. + Please refer to QP and EE chapter for further explanation on RDB allocation. */ +/* -------------- */ + pseudo_bit_t reserved10[0x00040]; +/* -------------- */ +}; + +/* Header_Log_Register */ + +struct arbelprm_header_log_register_st { /* Little Endian */ + pseudo_bit_t place_holder[0x00020]; +/* -------------- */ + pseudo_bit_t reserved0[0x00060]; +/* -------------- */ +}; + +/* Performance Monitors */ + +struct arbelprm_performance_monitors_st { /* Little Endian */ + pseudo_bit_t e0[0x00001]; /* Enables counting of respective performance counter */ + pseudo_bit_t e1[0x00001]; /* Enables counting of respective performance counter */ + pseudo_bit_t e2[0x00001]; /* Enables counting of respective performance counter */ + pseudo_bit_t reserved0[0x00001]; + pseudo_bit_t r0[0x00001]; /* If written to as '1 - resets respective performance counter, if written to az '0 - no change to matter */ + pseudo_bit_t r1[0x00001]; /* If written to as '1 - resets respective performance counter, if written to az '0 - no change to matter */ + pseudo_bit_t r2[0x00001]; /* If written to as '1 - resets respective performance counter, if written to az '0 - no change to matter */ + pseudo_bit_t reserved1[0x00001]; + pseudo_bit_t i0[0x00001]; /* Interrupt enable on respective counter overflow. '1 - interrupt enabled, '0 - interrupt disabled. */ + pseudo_bit_t i1[0x00001]; /* Interrupt enable on respective counter overflow. '1 - interrupt enabled, '0 - interrupt disabled. */ + pseudo_bit_t i2[0x00001]; /* Interrupt enable on respective counter overflow. '1 - interrupt enabled, '0 - interrupt disabled. */ + pseudo_bit_t reserved2[0x00001]; + pseudo_bit_t f0[0x00001]; /* Overflow flag. If set, overflow occurred on respective counter. Cleared if written to as '1 */ + pseudo_bit_t f1[0x00001]; /* Overflow flag. If set, overflow occurred on respective counter. Cleared if written to as '1 */ + pseudo_bit_t f2[0x00001]; /* Overflow flag. If set, overflow occurred on respective counter. Cleared if written to as '1 */ + pseudo_bit_t reserved3[0x00001]; + pseudo_bit_t ev_cnt1[0x00005]; /* Specifies event to be counted by Event_counter1 See XXX for events' definition. */ + pseudo_bit_t reserved4[0x00003]; + pseudo_bit_t ev_cnt2[0x00005]; /* Specifies event to be counted by Event_counter2 See XXX for events' definition. */ + pseudo_bit_t reserved5[0x00003]; +/* -------------- */ + pseudo_bit_t clock_counter[0x00020]; +/* -------------- */ + pseudo_bit_t event_counter1[0x00020]; +/* -------------- */ + pseudo_bit_t event_counter2[0x00020];/* Read/write event counter, counting events specified by EvCntl and EvCnt2 fields repsectively. When the event counter reaches is maximum value of 0xFFFFFF, the next event will cause it to roll over to zero, set F1 or F2 bit respectively and generate interrupt by I1 I2 bit respectively. */ +/* -------------- */ +}; + +/* Receive segment format */ + +struct arbelprm_wqe_segment_ctrl_recv_st { /* Little Endian */ + struct arbelprm_recv_wqe_segment_next_st wqe_segment_next; +/* -------------- */ + pseudo_bit_t reserved0[0x00002]; + pseudo_bit_t reserved1[0x00001]; + pseudo_bit_t reserved2[0x00001]; + pseudo_bit_t reserved3[0x0001c]; +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ +}; + +/* MLX WQE segment format */ + +struct arbelprm_wqe_segment_ctrl_mlx_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00002]; + pseudo_bit_t e[0x00001]; /* WQE event */ + pseudo_bit_t c[0x00001]; /* Create CQE (for "requested signalling" QP) */ + pseudo_bit_t icrc[0x00002]; /* icrc field detemines what to do with the last dword of the packet: 0 - Calculate ICRC and put it instead of last dword. Last dword must be 0x0. 1,2 - reserved. 3 - Leave last dword as is. Last dword must not be 0x0. */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t sl[0x00004]; + pseudo_bit_t max_statrate[0x00004]; + pseudo_bit_t slr[0x00001]; /* 0= take slid from port. 1= take slid from given headers */ + pseudo_bit_t v15[0x00001]; /* Send packet over VL15 */ + pseudo_bit_t reserved2[0x0000e]; +/* -------------- */ + pseudo_bit_t vcrc[0x00010]; /* Packet's VCRC (if not 0 - otherwise computed by HW) */ + pseudo_bit_t rlid[0x00010]; /* Destination LID (must match given headers) */ +/* -------------- */ + pseudo_bit_t reserved3[0x00040]; +/* -------------- */ +}; + +/* Send WQE segment format */ + +struct arbelprm_send_wqe_segment_st { /* Little Endian */ + struct arbelprm_wqe_segment_next_st wqe_segment_next;/* Send wqe segment next */ +/* -------------- */ + struct arbelprm_wqe_segment_ctrl_send_st wqe_segment_ctrl_send;/* Send wqe segment ctrl */ +/* -------------- */ + struct arbelprm_wqe_segment_rd_st wqe_segment_rd;/* Send wqe segment rd */ +/* -------------- */ + struct arbelprm_wqe_segment_ud_st wqe_segment_ud;/* Send wqe segment ud */ +/* -------------- */ + struct arbelprm_wqe_segment_bind_st wqe_segment_bind;/* Send wqe segment bind */ +/* -------------- */ + pseudo_bit_t reserved0[0x00180]; +/* -------------- */ + struct arbelprm_wqe_segment_remote_address_st wqe_segment_remote_address;/* Send wqe segment remote address */ +/* -------------- */ + struct arbelprm_wqe_segment_atomic_st wqe_segment_atomic;/* Send wqe segment atomic */ +/* -------------- */ + struct arbelprm_fast_registration_segment_st fast_registration_segment;/* Fast Registration Segment */ +/* -------------- */ + struct arbelprm_local_invalidate_segment_st local_invalidate_segment;/* local invalidate segment */ +/* -------------- */ + struct arbelprm_wqe_segment_data_ptr_st wqe_segment_data_ptr;/* Send wqe segment data ptr */ +/* -------------- */ + struct arbelprm_wqe_segment_data_inline_st wqe_segment_data_inline;/* Send wqe segment data inline */ +/* -------------- */ + pseudo_bit_t reserved1[0x00200]; +/* -------------- */ +}; + +/* QP and EE Context Entry */ + +struct arbelprm_queue_pair_ee_context_entry_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t de[0x00001]; /* Send/Receive Descriptor Event enable - if set, events can be generated upon descriptors' completion on send/receive queue (controlled by E bit in WQE). Invalid in EE context */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t pm_state[0x00002]; /* Path migration state (Migrated, Armed or Rearm) + 11-Migrated + 00-Armed + 01-Rearm + 10-Reserved + Should be set to 11 for UD QPs and for QPs which do not support APM */ + pseudo_bit_t reserved2[0x00003]; + pseudo_bit_t st[0x00003]; /* Service type (invalid in EE context): + 000-Reliable Connection + 001-Unreliable Connection + 010-Reliable Datagram + 011-Unreliable Datagram + 111-MLX transport (raw bits injection). Used for management QPs and RAW */ + pseudo_bit_t reserved3[0x00009]; + pseudo_bit_t state[0x00004]; /* QP/EE state: + 0 - RST + 1 - INIT + 2 - RTR + 3 - RTS + 4 - SQEr + 5 - SQD (Send Queue Drained) + 6 - ERR + 7 - Send Queue Draining + 8 - Reserved + 9 - Suspended + A- F - Reserved + (Valid for QUERY_QPEE and ERR2RST_QPEE commands only) */ +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ + pseudo_bit_t sched_queue[0x00004]; /* Schedule queue to be used for WQE scheduling to execution. Determines QOS for this QP. */ + pseudo_bit_t rlky[0x00001]; /* When set this QP can use the Reserved L_Key */ + pseudo_bit_t reserved5[0x00003]; + pseudo_bit_t log_sq_stride[0x00003];/* Stride on the send queue. WQ entry is 16*(2^log_SQ_stride) bytes. + Stride must be equal or bigger then 64 bytes (minimum log_RQ_stride value allowed is 2). */ + pseudo_bit_t log_sq_size[0x00004]; /* Log2 of the Number of WQEs in the Send Queue. */ + pseudo_bit_t reserved6[0x00001]; + pseudo_bit_t log_rq_stride[0x00003];/* Stride on the receive queue. WQ entry is 16*(2^log_RQ_stride) bytes. + Stride must be equal or bigger then 64 bytes (minimum log_RQ_stride value allowed is 2). */ + pseudo_bit_t log_rq_size[0x00004]; /* Log2 of the Number of WQEs in the Receive Queue. */ + pseudo_bit_t reserved7[0x00001]; + pseudo_bit_t msg_max[0x00005]; /* Max message size allowed on the QP. Maximum message size is 2^msg_Max. + Must be equal to MTU for UD and MLX QPs. */ + pseudo_bit_t mtu[0x00003]; /* MTU of the QP (Must be the same for both paths: primary and alternative): + 0x1 - 256 bytes + 0x2 - 512 + 0x3 - 1024 + 0x4 - 2048 + other - reserved + + Should be configured to 0x4 for UD and MLX QPs. */ +/* -------------- */ + pseudo_bit_t usr_page[0x00018]; /* QP (see "non_privileged Access to the HCA Hardware"). Not valid (reserved) in EE context. */ + pseudo_bit_t reserved8[0x00008]; +/* -------------- */ + pseudo_bit_t local_qpn_een[0x00018];/* Local QP/EE number Lower bits determine position of this record in QPC table, and - thus - constrained + This field is valid for QUERY and ERR2RST commands only. */ + pseudo_bit_t reserved9[0x00008]; +/* -------------- */ + pseudo_bit_t remote_qpn_een[0x00018];/* Remote QP/EE number */ + pseudo_bit_t reserved10[0x00008]; +/* -------------- */ + pseudo_bit_t reserved11[0x00040]; +/* -------------- */ + struct arbelprm_address_path_st primary_address_path;/* Primary address path for the QP/EE */ +/* -------------- */ + struct arbelprm_address_path_st alternative_address_path;/* Alternate address path for the QP/EE */ +/* -------------- */ + pseudo_bit_t rdd[0x00018]; /* Reliable Datagram Domain */ + pseudo_bit_t reserved12[0x00008]; +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* QP protection domain. Not valid (reserved) in EE context. */ + pseudo_bit_t reserved13[0x00008]; +/* -------------- */ + pseudo_bit_t wqe_base_adr_h[0x00020];/* Bits 63:32 of WQE address for both SQ and RQ. + Reserved for EE context. */ +/* -------------- */ + pseudo_bit_t wqe_lkey[0x00020]; /* memory key (L-Key) to be used to access WQEs. Not valid (reserved) in EE context. */ +/* -------------- */ + pseudo_bit_t reserved14[0x00003]; + pseudo_bit_t ssc[0x00001]; /* Send Signaled Completion + 1 - all send WQEs generate CQEs. + 0 - only send WQEs with C bit set generate completion. + Not valid (reserved) in EE context. */ + pseudo_bit_t sic[0x00001]; /* If set - Ignore end to end credits on send queue. Not valid (reserved) in EE context. */ + pseudo_bit_t cur_retry_cnt[0x00003];/* Current transport retry counter (QUERY_QPEE only). + The current transport retry counter can vary from retry_count down to 1, where 1 means that the last retry attempt is currently executing. */ + pseudo_bit_t cur_rnr_retry[0x00003];/* Current RNR retry counter (QUERY_QPEE only). + The current RNR retry counter can vary from rnr_retry to 1, where 1 means that the last retry attempt is currently executing. */ + pseudo_bit_t fre[0x00001]; /* Fast Registration Work Request Enabled. (Reserved for EE) */ + pseudo_bit_t reserved15[0x00001]; + pseudo_bit_t sae[0x00001]; /* If set - Atomic operations enabled on send queue. Not valid (reserved) in EE context. */ + pseudo_bit_t swe[0x00001]; /* If set - RDMA - write enabled on send queue. Not valid (reserved) in EE context. */ + pseudo_bit_t sre[0x00001]; /* If set - RDMA - read enabled on send queue. Not valid (reserved) in EE context. */ + pseudo_bit_t retry_count[0x00003]; /* Transport timeout Retry count */ + pseudo_bit_t reserved16[0x00002]; + pseudo_bit_t sra_max[0x00003]; /* Maximum number of outstanding RDMA-read/Atomic operations allowed in the send queue. Maximum number is 2^SRA_Max. Must be zero in EE context. */ + pseudo_bit_t flight_lim[0x00004]; /* Number of outstanding (in-flight) messages on the wire allowed for this send queue. + Number of outstanding messages is 2^Flight_Lim. + Use 0xF for unlimited number of outstanding messages. */ + pseudo_bit_t ack_req_freq[0x00004]; /* ACK required frequency. ACK required bit will be set in every 2^AckReqFreq packets at least. Not valid for RD QP. */ +/* -------------- */ + pseudo_bit_t reserved17[0x00020]; +/* -------------- */ + pseudo_bit_t next_send_psn[0x00018];/* Next PSN to be sent */ + pseudo_bit_t reserved18[0x00008]; +/* -------------- */ + pseudo_bit_t cqn_snd[0x00018]; /* CQ number completions from the send queue to be reported to. Not valid (reserved) in EE context. */ + pseudo_bit_t reserved19[0x00008]; +/* -------------- */ + pseudo_bit_t reserved20[0x00006]; + pseudo_bit_t snd_wqe_base_adr_l[0x0001a];/* While opening (creating) the WQ, this field should contain the address of first descriptor to be posted. Not valid (reserved) in EE context. */ +/* -------------- */ + pseudo_bit_t snd_db_record_index[0x00020];/* Index in the UAR Context Table Entry. + HW uses this index as an offset from the UAR Context Table Entry in order to read this SQ doorbell record. + The entry is obtained via the usr_page field. + Not valid for EE. */ +/* -------------- */ + pseudo_bit_t last_acked_psn[0x00018];/* The last acknowledged PSN for the requester (QUERY_QPEE only) */ + pseudo_bit_t reserved21[0x00008]; +/* -------------- */ + pseudo_bit_t ssn[0x00018]; /* Requester Send Sequence Number (QUERY_QPEE only) */ + pseudo_bit_t reserved22[0x00008]; +/* -------------- */ + pseudo_bit_t reserved23[0x00003]; + pseudo_bit_t rsc[0x00001]; /* 1 - all receive WQEs generate CQEs. + 0 - only receive WQEs with C bit set generate completion. + Not valid (reserved) in EE context. + */ + pseudo_bit_t ric[0x00001]; /* Invalid Credits. + 1 - place "Invalid Credits" to ACKs sent from this queue. + 0 - ACKs report the actual number of end to end credits on the connection. + Not valid (reserved) in EE context. + Must be set to 1 on QPs which are attached to SRQ. */ + pseudo_bit_t reserved24[0x00008]; + pseudo_bit_t rae[0x00001]; /* If set - Atomic operations enabled. on receive queue. Not valid (reserved) in EE context. */ + pseudo_bit_t rwe[0x00001]; /* If set - RDMA - write enabled on receive queue. Not valid (reserved) in EE context. */ + pseudo_bit_t rre[0x00001]; /* If set - RDMA - read enabled on receive queue. Not valid (reserved) in EE context. */ + pseudo_bit_t reserved25[0x00005]; + pseudo_bit_t rra_max[0x00003]; /* Maximum number of outstanding RDMA-read/Atomic operations allowed on receive queue is 2^RRA_Max. + Must be 0 for EE context. */ + pseudo_bit_t reserved26[0x00008]; +/* -------------- */ + pseudo_bit_t next_rcv_psn[0x00018]; /* Next (expected) PSN on receive */ + pseudo_bit_t min_rnr_nak[0x00005]; /* Minimum RNR NAK timer value (TTTTT field encoding according to the IB spec Vol1 9.7.5.2.8). + Not valid (reserved) in EE context. */ + pseudo_bit_t reserved27[0x00003]; +/* -------------- */ + pseudo_bit_t reserved28[0x00005]; + pseudo_bit_t ra_buff_indx[0x0001b]; /* Index to outstanding read/atomic buffer. + This field constructs the address to the RDB for maintaining the incoming RDMA read and atomic requests. */ +/* -------------- */ + pseudo_bit_t cqn_rcv[0x00018]; /* CQ number completions from receive queue to be reported to. Not valid (reserved) in EE context. */ + pseudo_bit_t reserved29[0x00008]; +/* -------------- */ + pseudo_bit_t reserved30[0x00006]; + pseudo_bit_t rcv_wqe_base_adr_l[0x0001a];/* While opening (creating) the WQ, this field should contain the address of first descriptor to be posted. Not valid (reserved) in EE context. */ +/* -------------- */ + pseudo_bit_t rcv_db_record_index[0x00020];/* Index in the UAR Context Table Entry containing the doorbell record for the receive queue. + HW uses this index as an offset from the UAR Context Table Entry in order to read this RQ doorbell record. + The entry is obtained via the usr_page field. + Not valid for EE. */ +/* -------------- */ + pseudo_bit_t q_key[0x00020]; /* Q_Key to be validated against received datagrams. + On send datagrams, if Q_Key[31] specified in the WQE is set, then this Q_Key will be transmitted in the outgoing message. + Not valid (reserved) in EE context. */ +/* -------------- */ + pseudo_bit_t srqn[0x00018]; /* SRQN - Shared Receive Queue Number - specifies the SRQ number from which the QP dequeues receive descriptors. + SRQN is valid only if SRQ bit is set. Not valid (reserved) in EE context. */ + pseudo_bit_t srq[0x00001]; /* SRQ - Shared Receive Queue. If this bit is set, then the QP is associated with a SRQ. Not valid (reserved) in EE context. */ + pseudo_bit_t reserved31[0x00007]; +/* -------------- */ + pseudo_bit_t rmsn[0x00018]; /* Responder current message sequence number (QUERY_QPEE only) */ + pseudo_bit_t reserved32[0x00008]; +/* -------------- */ + pseudo_bit_t sq_wqe_counter[0x00010];/* A 16bits counter that is incremented for each WQE posted to the SQ. + Must be 0x0 in SQ initialization. + (QUERY_QPEE only). */ + pseudo_bit_t rq_wqe_counter[0x00010];/* A 16bits counter that is incremented for each WQE posted to the RQ. + Must be 0x0 in RQ initialization. + (QUERY_QPEE only). */ +/* -------------- */ + pseudo_bit_t reserved33[0x00040]; +/* -------------- */ +}; + +/* Clear Interrupt [63:0] */ + +struct arbelprm_clr_int_st { /* Little Endian */ + pseudo_bit_t clr_int_h[0x00020]; /* Clear Interrupt [63:32] + Write transactions to this register will clear (de-assert) the virtual interrupt output pins of InfiniHost-III-EX. The value to be written in this register is obtained by executing QUERY_ADAPTER command on command interface after system boot. + This register is write-only. Reading from this register will cause undefined result + */ +/* -------------- */ + pseudo_bit_t clr_int_l[0x00020]; /* Clear Interrupt [31:0] + Write transactions to this register will clear (de-assert) the virtual interrupt output pins of InfiniHost-III-EX. The value to be written in this register is obtained by executing QUERY_ADAPTER command on command interface after system boot. + This register is write-only. Reading from this register will cause undefined result */ +/* -------------- */ +}; + +/* EQ_Arm_DB_Region */ + +struct arbelprm_eq_arm_db_region_st { /* Little Endian */ + pseudo_bit_t eq_x_arm_h[0x00020]; /* EQ[63:32] X state. + This register is used to Arm EQs when setting the appropriate bits. */ +/* -------------- */ + pseudo_bit_t eq_x_arm_l[0x00020]; /* EQ[31:0] X state. + This register is used to Arm EQs when setting the appropriate bits. */ +/* -------------- */ +}; + +/* EQ Set CI DBs Table */ + +struct arbelprm_eq_set_ci_table_st { /* Little Endian */ + pseudo_bit_t eq0_set_ci[0x00020]; /* EQ0_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t eq1_set_ci[0x00020]; /* EQ1_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ + pseudo_bit_t eq2_set_ci[0x00020]; /* EQ2_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved2[0x00020]; +/* -------------- */ + pseudo_bit_t eq3_set_ci[0x00020]; /* EQ3_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved3[0x00020]; +/* -------------- */ + pseudo_bit_t eq4_set_ci[0x00020]; /* EQ4_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ + pseudo_bit_t eq5_set_ci[0x00020]; /* EQ5_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ + pseudo_bit_t eq6_set_ci[0x00020]; /* EQ6_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved6[0x00020]; +/* -------------- */ + pseudo_bit_t eq7_set_ci[0x00020]; /* EQ7_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved7[0x00020]; +/* -------------- */ + pseudo_bit_t eq8_set_ci[0x00020]; /* EQ8_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved8[0x00020]; +/* -------------- */ + pseudo_bit_t eq9_set_ci[0x00020]; /* EQ9_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved9[0x00020]; +/* -------------- */ + pseudo_bit_t eq10_set_ci[0x00020]; /* EQ10_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved10[0x00020]; +/* -------------- */ + pseudo_bit_t eq11_set_ci[0x00020]; /* EQ11_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved11[0x00020]; +/* -------------- */ + pseudo_bit_t eq12_set_ci[0x00020]; /* EQ12_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved12[0x00020]; +/* -------------- */ + pseudo_bit_t eq13_set_ci[0x00020]; /* EQ13_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved13[0x00020]; +/* -------------- */ + pseudo_bit_t eq14_set_ci[0x00020]; /* EQ14_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved14[0x00020]; +/* -------------- */ + pseudo_bit_t eq15_set_ci[0x00020]; /* EQ15_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved15[0x00020]; +/* -------------- */ + pseudo_bit_t eq16_set_ci[0x00020]; /* EQ16_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved16[0x00020]; +/* -------------- */ + pseudo_bit_t eq17_set_ci[0x00020]; /* EQ17_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved17[0x00020]; +/* -------------- */ + pseudo_bit_t eq18_set_ci[0x00020]; /* EQ18_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved18[0x00020]; +/* -------------- */ + pseudo_bit_t eq19_set_ci[0x00020]; /* EQ19_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved19[0x00020]; +/* -------------- */ + pseudo_bit_t eq20_set_ci[0x00020]; /* EQ20_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved20[0x00020]; +/* -------------- */ + pseudo_bit_t eq21_set_ci[0x00020]; /* EQ21_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved21[0x00020]; +/* -------------- */ + pseudo_bit_t eq22_set_ci[0x00020]; /* EQ22_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved22[0x00020]; +/* -------------- */ + pseudo_bit_t eq23_set_ci[0x00020]; /* EQ23_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved23[0x00020]; +/* -------------- */ + pseudo_bit_t eq24_set_ci[0x00020]; /* EQ24_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved24[0x00020]; +/* -------------- */ + pseudo_bit_t eq25_set_ci[0x00020]; /* EQ25_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved25[0x00020]; +/* -------------- */ + pseudo_bit_t eq26_set_ci[0x00020]; /* EQ26_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved26[0x00020]; +/* -------------- */ + pseudo_bit_t eq27_set_ci[0x00020]; /* EQ27_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved27[0x00020]; +/* -------------- */ + pseudo_bit_t eq28_set_ci[0x00020]; /* EQ28_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved28[0x00020]; +/* -------------- */ + pseudo_bit_t eq29_set_ci[0x00020]; /* EQ29_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved29[0x00020]; +/* -------------- */ + pseudo_bit_t eq30_set_ci[0x00020]; /* EQ30_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved30[0x00020]; +/* -------------- */ + pseudo_bit_t eq31_set_ci[0x00020]; /* EQ31_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved31[0x00020]; +/* -------------- */ + pseudo_bit_t eq32_set_ci[0x00020]; /* EQ32_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved32[0x00020]; +/* -------------- */ + pseudo_bit_t eq33_set_ci[0x00020]; /* EQ33_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved33[0x00020]; +/* -------------- */ + pseudo_bit_t eq34_set_ci[0x00020]; /* EQ34_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved34[0x00020]; +/* -------------- */ + pseudo_bit_t eq35_set_ci[0x00020]; /* EQ35_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved35[0x00020]; +/* -------------- */ + pseudo_bit_t eq36_set_ci[0x00020]; /* EQ36_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved36[0x00020]; +/* -------------- */ + pseudo_bit_t eq37_set_ci[0x00020]; /* EQ37_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved37[0x00020]; +/* -------------- */ + pseudo_bit_t eq38_set_ci[0x00020]; /* EQ38_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved38[0x00020]; +/* -------------- */ + pseudo_bit_t eq39_set_ci[0x00020]; /* EQ39_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved39[0x00020]; +/* -------------- */ + pseudo_bit_t eq40_set_ci[0x00020]; /* EQ40_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved40[0x00020]; +/* -------------- */ + pseudo_bit_t eq41_set_ci[0x00020]; /* EQ41_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved41[0x00020]; +/* -------------- */ + pseudo_bit_t eq42_set_ci[0x00020]; /* EQ42_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved42[0x00020]; +/* -------------- */ + pseudo_bit_t eq43_set_ci[0x00020]; /* EQ43_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved43[0x00020]; +/* -------------- */ + pseudo_bit_t eq44_set_ci[0x00020]; /* EQ44_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved44[0x00020]; +/* -------------- */ + pseudo_bit_t eq45_set_ci[0x00020]; /* EQ45_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved45[0x00020]; +/* -------------- */ + pseudo_bit_t eq46_set_ci[0x00020]; /* EQ46_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved46[0x00020]; +/* -------------- */ + pseudo_bit_t eq47_set_ci[0x00020]; /* EQ47_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved47[0x00020]; +/* -------------- */ + pseudo_bit_t eq48_set_ci[0x00020]; /* EQ48_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved48[0x00020]; +/* -------------- */ + pseudo_bit_t eq49_set_ci[0x00020]; /* EQ49_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved49[0x00020]; +/* -------------- */ + pseudo_bit_t eq50_set_ci[0x00020]; /* EQ50_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved50[0x00020]; +/* -------------- */ + pseudo_bit_t eq51_set_ci[0x00020]; /* EQ51_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved51[0x00020]; +/* -------------- */ + pseudo_bit_t eq52_set_ci[0x00020]; /* EQ52_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved52[0x00020]; +/* -------------- */ + pseudo_bit_t eq53_set_ci[0x00020]; /* EQ53_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved53[0x00020]; +/* -------------- */ + pseudo_bit_t eq54_set_ci[0x00020]; /* EQ54_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved54[0x00020]; +/* -------------- */ + pseudo_bit_t eq55_set_ci[0x00020]; /* EQ55_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved55[0x00020]; +/* -------------- */ + pseudo_bit_t eq56_set_ci[0x00020]; /* EQ56_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved56[0x00020]; +/* -------------- */ + pseudo_bit_t eq57_set_ci[0x00020]; /* EQ57_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved57[0x00020]; +/* -------------- */ + pseudo_bit_t eq58_set_ci[0x00020]; /* EQ58_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved58[0x00020]; +/* -------------- */ + pseudo_bit_t eq59_set_ci[0x00020]; /* EQ59_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved59[0x00020]; +/* -------------- */ + pseudo_bit_t eq60_set_ci[0x00020]; /* EQ60_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved60[0x00020]; +/* -------------- */ + pseudo_bit_t eq61_set_ci[0x00020]; /* EQ61_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved61[0x00020]; +/* -------------- */ + pseudo_bit_t eq62_set_ci[0x00020]; /* EQ62_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved62[0x00020]; +/* -------------- */ + pseudo_bit_t eq63_set_ci[0x00020]; /* EQ63_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved63[0x00020]; +/* -------------- */ +}; + +/* InfiniHost-III-EX Configuration Registers */ + +struct arbelprm_configuration_registers_st { /* Little Endian */ + pseudo_bit_t reserved0[0x403400]; +/* -------------- */ + struct arbelprm_hca_command_register_st hca_command_interface_register;/* HCA Command Register */ +/* -------------- */ + pseudo_bit_t reserved1[0x3fcb20]; +/* -------------- */ +}; + +/* QP_DB_Record */ + +struct arbelprm_qp_db_record_st { /* Little Endian */ + pseudo_bit_t counter[0x00010]; /* Modulo-64K counter of WQEs posted to the QP since its creation. Should be initialized to zero. */ + pseudo_bit_t reserved0[0x00010]; +/* -------------- */ + pseudo_bit_t reserved1[0x00005]; + pseudo_bit_t res[0x00003]; /* 0x3 for SQ + 0x4 for RQ + 0x5 for SRQ */ + pseudo_bit_t qp_number[0x00018]; /* QP number */ +/* -------------- */ +}; + +/* CQ_ARM_DB_Record */ + +struct arbelprm_cq_arm_db_record_st { /* Little Endian */ + pseudo_bit_t counter[0x00020]; /* CQ counter for the arming request */ +/* -------------- */ + pseudo_bit_t cmd[0x00003]; /* 0x0 - No command + 0x1 - Request notification for next Solicited completion event. Counter filed specifies the current CQ Consumer Counter. + 0x2 - Request notification for next Solicited or Unsolicited completion event. Counter filed specifies the current CQ Consumer counter. + 0x3 - Request notification for multiple completions (Arm-N). Counter filed specifies the value of the CQ Index that when reached by HW (i.e. HW generates a CQE into this Index) Event will be generated + Other - Reserved */ + pseudo_bit_t cmd_sn[0x00002]; /* Command Sequence Number - See Table 35, "CQ Doorbell Layout" for definition of this filed */ + pseudo_bit_t res[0x00003]; /* Must be 0x2 */ + pseudo_bit_t cq_number[0x00018]; /* CQ number */ +/* -------------- */ +}; + +/* CQ_CI_DB_Record */ + +struct arbelprm_cq_ci_db_record_st { /* Little Endian */ + pseudo_bit_t counter[0x00020]; /* CQ counter */ +/* -------------- */ + pseudo_bit_t reserved0[0x00005]; + pseudo_bit_t res[0x00003]; /* Must be 0x1 */ + pseudo_bit_t cq_number[0x00018]; /* CQ number */ +/* -------------- */ +}; + +/* Virtual_Physical_Mapping */ + +struct arbelprm_virtual_physical_mapping_st { /* Little Endian */ + pseudo_bit_t va_h[0x00020]; /* Virtual Address[63:32]. Valid only for MAP_ICM command. */ +/* -------------- */ + pseudo_bit_t reserved0[0x0000c]; + pseudo_bit_t va_l[0x00014]; /* Virtual Address[31:12]. Valid only for MAP_ICM command. */ +/* -------------- */ + pseudo_bit_t pa_h[0x00020]; /* Physical Address[63:32] */ +/* -------------- */ + pseudo_bit_t log2size[0x00006]; /* Log2 of the size in 4KB pages of the physical and virtual contiguous memory that starts at PA_L/H and VA_L/H */ + pseudo_bit_t reserved1[0x00006]; + pseudo_bit_t pa_l[0x00014]; /* Physical Address[31:12] */ +/* -------------- */ +}; + +/* MOD_STAT_CFG */ + +struct arbelprm_mod_stat_cfg_st { /* Little Endian */ + pseudo_bit_t log_max_srqs[0x00005]; /* Log (base 2) of the number of SRQs to allocate (0 if no SRQs are required), valid only if srq bit is set. */ + pseudo_bit_t reserved0[0x00001]; + pseudo_bit_t srq[0x00001]; /* When set SRQs are supported */ + pseudo_bit_t srq_m[0x00001]; /* Modify SRQ parameters */ + pseudo_bit_t reserved1[0x00018]; +/* -------------- */ + pseudo_bit_t reserved2[0x007e0]; +/* -------------- */ +}; + +/* SRQ Context */ + +struct arbelprm_srq_context_st { /* Little Endian */ + pseudo_bit_t srqn[0x00018]; /* SRQ number */ + pseudo_bit_t log_srq_size[0x00004]; /* Log2 of the Number of WQEs in the Receive Queue. + Maximum value is 0x10, i.e. 16M WQEs. */ + pseudo_bit_t state[0x00004]; /* SRQ State: + 1111 - SW Ownership + 0000 - HW Ownership + 0001 - Error + Valid only on QUERY_SRQ and HW2SW_SRQ commands. */ +/* -------------- */ + pseudo_bit_t l_key[0x00020]; /* memory key (L-Key) to be used to access WQEs. */ +/* -------------- */ + pseudo_bit_t srq_db_record_index[0x00020];/* Index in the UAR Context Table Entry containing the doorbell record for the receive queue. + HW uses this index as an offset from the UAR Context Table Entry in order to read this SRQ doorbell record. + The entry is obtained via the usr_page field. */ +/* -------------- */ + pseudo_bit_t usr_page[0x00018]; /* Index (offset) of user page allocated for this SRQ (see "non_privileged Access to the HCA Hardware"). Not valid (reserved) in EE context. */ + pseudo_bit_t reserved0[0x00005]; + pseudo_bit_t log_rq_stride[0x00003];/* Stride (max WQE size) on the receive queue. WQ entry is 16*(2^log_RQ_stride) bytes. */ +/* -------------- */ + pseudo_bit_t wqe_addr_h[0x00020]; /* Bits 63:32 of WQE address (WQE base address) */ +/* -------------- */ + pseudo_bit_t reserved1[0x00006]; + pseudo_bit_t srq_wqe_base_adr_l[0x0001a];/* While opening (creating) the SRQ, this field should contain the address of first descriptor to be posted. */ +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* SRQ protection domain. */ + pseudo_bit_t reserved2[0x00008]; +/* -------------- */ + pseudo_bit_t wqe_cnt[0x00010]; /* WQE count on the SRQ. + Valid only on QUERY_SRQ and HW2SW_SRQ commands. */ + pseudo_bit_t lwm[0x00010]; /* Limit Water Mark - if the LWM is not zero, and the wqe_cnt drops below LWM when a WQE is dequeued from the SRQ, then a SRQ limit event is fired and the LWM is set to zero. */ +/* -------------- */ + pseudo_bit_t srq_wqe_counter[0x00010];/* A 16bits counter that is incremented for each WQE posted to the SQ. + Must be 0x0 in SRQ initialization. + (QUERY_SRQ only). */ + pseudo_bit_t reserved3[0x00010]; +/* -------------- */ + pseudo_bit_t reserved4[0x00060]; +/* -------------- */ +}; + +/* PBL */ + +struct arbelprm_pbl_st { /* Little Endian */ + pseudo_bit_t mtt_0_h[0x00020]; /* First MTT[63:32] */ +/* -------------- */ + pseudo_bit_t mtt_0_l[0x00020]; /* First MTT[31:0] */ +/* -------------- */ + pseudo_bit_t mtt_1_h[0x00020]; /* Second MTT[63:32] */ +/* -------------- */ + pseudo_bit_t mtt_1_l[0x00020]; /* Second MTT[31:0] */ +/* -------------- */ + pseudo_bit_t mtt_2_h[0x00020]; /* Third MTT[63:32] */ +/* -------------- */ + pseudo_bit_t mtt_2_l[0x00020]; /* Third MTT[31:0] */ +/* -------------- */ + pseudo_bit_t mtt_3_h[0x00020]; /* Fourth MTT[63:32] */ +/* -------------- */ + pseudo_bit_t mtt_3_l[0x00020]; /* Fourth MTT[31:0] */ +/* -------------- */ +}; + +/* Performance Counters */ + +struct arbelprm_performance_counters_st { /* Little Endian */ + pseudo_bit_t sqpc_access_cnt[0x00020];/* SQPC cache access count */ +/* -------------- */ + pseudo_bit_t sqpc_miss_cnt[0x00020];/* SQPC cache miss count */ +/* -------------- */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ + pseudo_bit_t rqpc_access_cnt[0x00020];/* RQPC cache access count */ +/* -------------- */ + pseudo_bit_t rqpc_miss_cnt[0x00020];/* RQPC cache miss count */ +/* -------------- */ + pseudo_bit_t reserved1[0x00040]; +/* -------------- */ + pseudo_bit_t cqc_access_cnt[0x00020];/* CQC cache access count */ +/* -------------- */ + pseudo_bit_t cqc_miss_cnt[0x00020]; /* CQC cache miss count */ +/* -------------- */ + pseudo_bit_t reserved2[0x00040]; +/* -------------- */ + pseudo_bit_t tpt_access_cnt[0x00020];/* TPT cache access count */ +/* -------------- */ + pseudo_bit_t mpt_miss_cnt[0x00020]; /* MPT cache miss count */ +/* -------------- */ + pseudo_bit_t mtt_miss_cnt[0x00020]; /* MTT cache miss count */ +/* -------------- */ + pseudo_bit_t reserved3[0x00620]; +/* -------------- */ +}; + +/* Transport and CI Error Counters */ + +struct arbelprm_transport_and_ci_error_counters_st { /* Little Endian */ + pseudo_bit_t rq_num_lle[0x00020]; /* Responder - number of local length errors */ +/* -------------- */ + pseudo_bit_t sq_num_lle[0x00020]; /* Requester - number of local length errors */ +/* -------------- */ + pseudo_bit_t rq_num_lqpoe[0x00020]; /* Responder - number local QP operation error */ +/* -------------- */ + pseudo_bit_t sq_num_lqpoe[0x00020]; /* Requester - number local QP operation error */ +/* -------------- */ + pseudo_bit_t rq_num_leeoe[0x00020]; /* Responder - number local EE operation error */ +/* -------------- */ + pseudo_bit_t sq_num_leeoe[0x00020]; /* Requester - number local EE operation error */ +/* -------------- */ + pseudo_bit_t rq_num_lpe[0x00020]; /* Responder - number of local protection errors */ +/* -------------- */ + pseudo_bit_t sq_num_lpe[0x00020]; /* Requester - number of local protection errors */ +/* -------------- */ + pseudo_bit_t rq_num_wrfe[0x00020]; /* Responder - number of CQEs with error. + Incremented each time a CQE with error is generated */ +/* -------------- */ + pseudo_bit_t sq_num_wrfe[0x00020]; /* Requester - number of CQEs with error. + Incremented each time a CQE with error is generated */ +/* -------------- */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_mwbe[0x00020]; /* Requester - number of memory window bind errors */ +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_bre[0x00020]; /* Requester - number of bad response errors */ +/* -------------- */ + pseudo_bit_t rq_num_lae[0x00020]; /* Responder - number of local access errors */ +/* -------------- */ + pseudo_bit_t reserved2[0x00040]; +/* -------------- */ + pseudo_bit_t sq_num_rire[0x00020]; /* Requester - number of remote invalid request errors + NAK-Invalid Request on: + 1. Unsupported OpCode: Responder detected an unsupported OpCode. + 2. Unexpected OpCode: Responder detected an error in the sequence of OpCodes, such + as a missing "Last" packet. + Note: there is no PSN error, thus this does not indicate a dropped packet. */ +/* -------------- */ + pseudo_bit_t rq_num_rire[0x00020]; /* Responder - number of remote invalid request errors. + NAK may or may not be sent. + 1. QP Async Affiliated Error: Unsupported or Reserved OpCode (RC,RD only): + Inbound request OpCode was either reserved, or was for a function not supported by this + QP. (E.g. RDMA or ATOMIC on QP not set up for this). + 2. Misaligned ATOMIC: VA does not point to an aligned address on an atomic opera-tion. + 3. Too many RDMA READ or ATOMIC Requests: There were more requests received + and not ACKed than allowed for the connection. + 4. Out of Sequence OpCode, current packet is "First" or "Only": The Responder + detected an error in the sequence of OpCodes; a missing "Last" packet + 5. Out of Sequence OpCode, current packet is not "First" or "Only": The Responder + detected an error in the sequence of OpCodes; a missing "First" packet + 6. Local Length Error: Inbound "Send" request message exceeded the responder.s avail-able + buffer space. + 7. Length error: RDMA WRITE request message contained too much or too little pay-load + data compared to the DMA length advertised in the first or only packet. + 8. Length error: Payload length was not consistent with the opcode: + a: 0 byte <= "only" <= PMTU bytes + b: ("first" or "middle") == PMTU bytes + c: 1byte <= "last" <= PMTU bytes + 9. Length error: Inbound message exceeded the size supported by the CA port. */ +/* -------------- */ + pseudo_bit_t sq_num_rae[0x00020]; /* Requester - number of remote access errors. + NAK-Remote Access Error on: + R_Key Violation: Responder detected an invalid R_Key while executing an RDMA + Request. */ +/* -------------- */ + pseudo_bit_t rq_num_rae[0x00020]; /* Responder - number of remote access errors. + R_Key Violation Responder detected an R_Key violation while executing an RDMA + request. + NAK may or may not be sent. */ +/* -------------- */ + pseudo_bit_t sq_num_roe[0x00020]; /* Requester - number of remote operation errors. + NAK-Remote Operation Error on: + Remote Operation Error: Responder encountered an error, (local to the responder), + which prevented it from completing the request. */ +/* -------------- */ + pseudo_bit_t rq_num_roe[0x00020]; /* Responder - number of remote operation errors. + NAK-Remote Operation Error on: + 1. Malformed WQE: Responder detected a malformed Receive Queue WQE while pro-cessing + the packet. + 2. Remote Operation Error: Responder encountered an error, (local to the responder), + which prevented it from completing the request. */ +/* -------------- */ + pseudo_bit_t sq_num_tree[0x00020]; /* Requester - number of transport retries exceeded errors */ +/* -------------- */ + pseudo_bit_t reserved3[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_rree[0x00020]; /* Requester - number of RNR nak retries exceeded errors */ +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_lrdve[0x00020]; /* Requester - number of local RDD violation errors */ +/* -------------- */ + pseudo_bit_t rq_num_rirdre[0x00020];/* Responder - number of remote invalid RD request errors */ +/* -------------- */ + pseudo_bit_t reserved5[0x00040]; +/* -------------- */ + pseudo_bit_t sq_num_rabrte[0x00020];/* Requester - number of remote aborted errors */ +/* -------------- */ + pseudo_bit_t reserved6[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_ieecne[0x00020];/* Requester - number of invalid EE context number errors */ +/* -------------- */ + pseudo_bit_t reserved7[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_ieecse[0x00020];/* Requester - invalid EE context state errors */ +/* -------------- */ + pseudo_bit_t reserved8[0x00380]; +/* -------------- */ + pseudo_bit_t rq_num_oos[0x00020]; /* Responder - number of out of sequence requests received */ +/* -------------- */ + pseudo_bit_t sq_num_oos[0x00020]; /* Requester - number of out of sequence Naks received */ +/* -------------- */ + pseudo_bit_t rq_num_mce[0x00020]; /* Responder - number of bad multicast packets received */ +/* -------------- */ + pseudo_bit_t reserved9[0x00020]; +/* -------------- */ + pseudo_bit_t rq_num_rsync[0x00020]; /* Responder - number of RESYNC operations */ +/* -------------- */ + pseudo_bit_t sq_num_rsync[0x00020]; /* Requester - number of RESYNC operations */ +/* -------------- */ + pseudo_bit_t rq_num_udsdprd[0x00020];/* The number of UD packets silently discarded on the receive queue due to lack of receive descriptor. */ +/* -------------- */ + pseudo_bit_t reserved10[0x00020]; +/* -------------- */ + pseudo_bit_t rq_num_ucsdprd[0x00020];/* The number of UC packets silently discarded on the receive queue due to lack of receive descriptor. */ +/* -------------- */ + pseudo_bit_t reserved11[0x003e0]; +/* -------------- */ + pseudo_bit_t num_cqovf[0x00020]; /* Number of CQ overflows */ +/* -------------- */ + pseudo_bit_t num_eqovf[0x00020]; /* Number of EQ overflows */ +/* -------------- */ + pseudo_bit_t num_baddb[0x00020]; /* Number of bad doorbells */ +/* -------------- */ + pseudo_bit_t reserved12[0x002a0]; +/* -------------- */ +}; + +/* Event_data Field - HCR Completion Event */ + +struct arbelprm_hcr_completion_event_st { /* Little Endian */ + pseudo_bit_t token[0x00010]; /* HCR Token */ + pseudo_bit_t reserved0[0x00010]; +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ + pseudo_bit_t status[0x00008]; /* HCR Status */ + pseudo_bit_t reserved2[0x00018]; +/* -------------- */ + pseudo_bit_t out_param_h[0x00020]; /* HCR Output Parameter [63:32] */ +/* -------------- */ + pseudo_bit_t out_param_l[0x00020]; /* HCR Output Parameter [31:0] */ +/* -------------- */ + pseudo_bit_t reserved3[0x00020]; +/* -------------- */ +}; + +/* Completion with Error CQE */ + +struct arbelprm_completion_with_error_st { /* Little Endian */ + pseudo_bit_t myqpn[0x00018]; /* Indicates the QP for which completion is being reported */ + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t reserved1[0x00060]; +/* -------------- */ + pseudo_bit_t reserved2[0x00010]; + pseudo_bit_t vendor_code[0x00008]; + pseudo_bit_t syndrome[0x00008]; /* Completion with error syndrome: + 0x01 - Local Length Error + 0x02 - Local QP Operation Error + 0x03 - Local EE Context Operation Error + 0x04 - Local Protection Error + 0x05 - Work Request Flushed Error + 0x06 - Memory Window Bind Error + 0x10 - Bad Response Error + 0x11 - Local Access Error + 0x12 - Remote Invalid Request Error + 0x13 - Remote Access Error + 0x14 - Remote Operation Error + 0x15 - Transport Retry Counter Exceeded + 0x16 - RNR Retry Counter Exceeded + 0x20 - Local RDD Violation Error + 0x21 - Remote Invalid RD Request + 0x22 - Remote Aborted Error + 0x23 - Invalid EE Context Number + 0x24 - Invalid EE Context State + other - Reserved + Syndrome is defined according to the IB specification volume 1. For detailed explanation of the syndromes, refer to chapters 10-11 of the IB specification rev 1.1. */ +/* -------------- */ + pseudo_bit_t reserved3[0x00020]; +/* -------------- */ + pseudo_bit_t reserved4[0x00006]; + pseudo_bit_t wqe_addr[0x0001a]; /* Bits 31:6 of WQE virtual address completion is reported for. The 6 least significant bits are zero. */ +/* -------------- */ + pseudo_bit_t reserved5[0x00007]; + pseudo_bit_t owner[0x00001]; /* Owner field. Zero value of this field means SW ownership of CQE. */ + pseudo_bit_t reserved6[0x00010]; + pseudo_bit_t opcode[0x00008]; /* The opcode of WQE completion is reported for. + + The following values are reported in case of completion with error: + 0xFE - For completion with error on Receive Queues + 0xFF - For completion with error on Send Queues */ +/* -------------- */ +}; + +/* Resize CQ Input Mailbox */ + +struct arbelprm_resize_cq_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t start_addr_h[0x00020]; /* Start address of CQ[63:32]. + Must be aligned on CQE size (32 bytes) */ +/* -------------- */ + pseudo_bit_t start_addr_l[0x00020]; /* Start address of CQ[31:0]. + Must be aligned on CQE size (32 bytes) */ +/* -------------- */ + pseudo_bit_t reserved1[0x00018]; + pseudo_bit_t log_cq_size[0x00005]; /* Log (base 2) of the CQ size (in entries) */ + pseudo_bit_t reserved2[0x00003]; +/* -------------- */ + pseudo_bit_t reserved3[0x00060]; +/* -------------- */ + pseudo_bit_t l_key[0x00020]; /* Memory key (L_Key) to be used to access CQ */ +/* -------------- */ + pseudo_bit_t reserved4[0x00100]; +/* -------------- */ +}; + +/* MAD_IFC Input Modifier */ + +struct arbelprm_mad_ifc_input_modifier_st { /* Little Endian */ + pseudo_bit_t port_number[0x00008]; /* The packet reception port number (1 or 2). */ + pseudo_bit_t mad_extended_info[0x00001];/* Mad_Extended_Info valid bit (MAD_IFC Input Mailbox data from offset 00100h and down). MAD_Extended_Info is read only if this bit is set. + Required for trap generation when BKey check is enabled and for global routed packets. */ + pseudo_bit_t reserved0[0x00007]; + pseudo_bit_t rlid[0x00010]; /* Remote (source) LID from the received MAD. + This field is required for trap generation upon MKey/BKey validation. */ +/* -------------- */ +}; + +/* MAD_IFC Input Mailbox */ + +struct arbelprm_mad_ifc_st { /* Little Endian */ + pseudo_bit_t request_mad_packet[64][0x00020];/* Request MAD Packet (256bytes) */ +/* -------------- */ + pseudo_bit_t my_qpn[0x00018]; /* Destination QP number from the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t rqpn[0x00018]; /* Remote (source) QP number from the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t rlid[0x00010]; /* Remote (source) LID from the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t ml_path[0x00007]; /* My (destination) LID path bits from the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t g[0x00001]; /* If set, the GRH field in valid. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t reserved2[0x00004]; + pseudo_bit_t sl[0x00004]; /* Service Level of the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ +/* -------------- */ + pseudo_bit_t pkey_indx[0x00010]; /* Index in PKey table that matches PKey of the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t reserved3[0x00010]; +/* -------------- */ + pseudo_bit_t reserved4[0x00180]; +/* -------------- */ + pseudo_bit_t grh[10][0x00020]; /* The GRH field of the MAD packet that was scattered to the first 40 bytes pointed to by the scatter list. + Valid if Mad_extended_info bit (in the input modifier) and g bit are set. + Otherwise this field is reserved. */ +/* -------------- */ + pseudo_bit_t reserved5[0x004c0]; +/* -------------- */ +}; + +/* Query Debug Message */ + +struct arbelprm_query_debug_msg_st { /* Little Endian */ + pseudo_bit_t phy_addr_h[0x00020]; /* Translation of the address in firmware area. High 32 bits. */ +/* -------------- */ + pseudo_bit_t v[0x00001]; /* Physical translation is valid */ + pseudo_bit_t reserved0[0x0000b]; + pseudo_bit_t phy_addr_l[0x00014]; /* Translation of the address in firmware area. Low 32 bits. */ +/* -------------- */ + pseudo_bit_t fw_area_base[0x00020]; /* Firmware area base address. The format strings and the trace buffers may be located starting from this address. */ +/* -------------- */ + pseudo_bit_t fw_area_size[0x00020]; /* Firmware area size */ +/* -------------- */ + pseudo_bit_t trc_hdr_sz[0x00020]; /* Trace message header size in dwords. */ +/* -------------- */ + pseudo_bit_t trc_arg_num[0x00020]; /* The number of arguments per trace message. */ +/* -------------- */ + pseudo_bit_t reserved1[0x000c0]; +/* -------------- */ + pseudo_bit_t dbg_msk_h[0x00020]; /* Debug messages mask [63:32] */ +/* -------------- */ + pseudo_bit_t dbg_msk_l[0x00020]; /* Debug messages mask [31:0] */ +/* -------------- */ + pseudo_bit_t reserved2[0x00040]; +/* -------------- */ + pseudo_bit_t buff0_addr[0x00020]; /* Address in firmware area of Trace Buffer 0 */ +/* -------------- */ + pseudo_bit_t buff0_size[0x00020]; /* Size of Trace Buffer 0 */ +/* -------------- */ + pseudo_bit_t buff1_addr[0x00020]; /* Address in firmware area of Trace Buffer 1 */ +/* -------------- */ + pseudo_bit_t buff1_size[0x00020]; /* Size of Trace Buffer 1 */ +/* -------------- */ + pseudo_bit_t buff2_addr[0x00020]; /* Address in firmware area of Trace Buffer 2 */ +/* -------------- */ + pseudo_bit_t buff2_size[0x00020]; /* Size of Trace Buffer 2 */ +/* -------------- */ + pseudo_bit_t buff3_addr[0x00020]; /* Address in firmware area of Trace Buffer 3 */ +/* -------------- */ + pseudo_bit_t buff3_size[0x00020]; /* Size of Trace Buffer 3 */ +/* -------------- */ + pseudo_bit_t buff4_addr[0x00020]; /* Address in firmware area of Trace Buffer 4 */ +/* -------------- */ + pseudo_bit_t buff4_size[0x00020]; /* Size of Trace Buffer 4 */ +/* -------------- */ + pseudo_bit_t buff5_addr[0x00020]; /* Address in firmware area of Trace Buffer 5 */ +/* -------------- */ + pseudo_bit_t buff5_size[0x00020]; /* Size of Trace Buffer 5 */ +/* -------------- */ + pseudo_bit_t buff6_addr[0x00020]; /* Address in firmware area of Trace Buffer 6 */ +/* -------------- */ + pseudo_bit_t buff6_size[0x00020]; /* Size of Trace Buffer 6 */ +/* -------------- */ + pseudo_bit_t buff7_addr[0x00020]; /* Address in firmware area of Trace Buffer 7 */ +/* -------------- */ + pseudo_bit_t buff7_size[0x00020]; /* Size of Trace Buffer 7 */ +/* -------------- */ + pseudo_bit_t reserved3[0x00400]; +/* -------------- */ +}; + +/* User Access Region */ + +struct arbelprm_uar_st { /* Little Endian */ + struct arbelprm_rd_send_doorbell_st rd_send_doorbell;/* Reliable Datagram send doorbell */ +/* -------------- */ + struct arbelprm_send_doorbell_st send_doorbell;/* Send doorbell */ +/* -------------- */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ + struct arbelprm_cq_cmd_doorbell_st cq_command_doorbell;/* CQ Doorbell */ +/* -------------- */ + pseudo_bit_t reserved1[0x03ec0]; +/* -------------- */ +}; + +/* Receive doorbell */ + +struct arbelprm_receive_doorbell_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t wqe_counter[0x00010]; /* Modulo-64K counter of WQEs posted on this queue since its creation. Should be zero for the first doorbell on the QP */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t reserved2[0x00005]; + pseudo_bit_t srq[0x00001]; /* If set, this is a Shared Receive Queue */ + pseudo_bit_t reserved3[0x00002]; + pseudo_bit_t qpn[0x00018]; /* QP number or SRQ number this doorbell is rung on */ +/* -------------- */ +}; + +/* SET_IB Parameters */ + +struct arbelprm_set_ib_st { /* Little Endian */ + pseudo_bit_t rqk[0x00001]; /* Reset QKey Violation Counter */ + pseudo_bit_t reserved0[0x00011]; + pseudo_bit_t sig[0x00001]; /* Set System Image GUID to system_image_guid specified. + system_image_guid and sig must be the same for all ports. */ + pseudo_bit_t reserved1[0x0000d]; +/* -------------- */ + pseudo_bit_t capability_mask[0x00020];/* PortInfo Capability Mask */ +/* -------------- */ + pseudo_bit_t system_image_guid_h[0x00020];/* System Image GUID[63:32], takes effect only if the SIG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t system_image_guid_l[0x00020];/* System Image GUID[31:0], takes effect only if the SIG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t reserved2[0x00180]; +/* -------------- */ +}; + +/* Multicast Group Member */ + +struct arbelprm_mgm_entry_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00006]; + pseudo_bit_t next_gid_index[0x0001a];/* Index of next Multicast Group Member whose GID maps to same MGID_HASH number. + The index is into the Multicast Group Table, which is the comprised the MGHT and AMGM tables. + next_gid_index=0 means end of the chain. */ +/* -------------- */ + pseudo_bit_t reserved1[0x00060]; +/* -------------- */ + pseudo_bit_t mgid_128_96[0x00020]; /* Multicast group GID[128:96] in big endian format. + Use the Reserved GID 0:0:0:0:0:0:0:0 for an invalid entry. */ +/* -------------- */ + pseudo_bit_t mgid_95_64[0x00020]; /* Multicast group GID[95:64] in big endian format. + Use the Reserved GID 0:0:0:0:0:0:0:0 for an invalid entry. */ +/* -------------- */ + pseudo_bit_t mgid_63_32[0x00020]; /* Multicast group GID[63:32] in big endian format. + Use the Reserved GID 0:0:0:0:0:0:0:0 for an invalid entry. */ +/* -------------- */ + pseudo_bit_t mgid_31_0[0x00020]; /* Multicast group GID[31:0] in big endian format. + Use the Reserved GID 0:0:0:0:0:0:0:0 for an invalid entry. */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_0; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_1; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_2; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_3; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_4; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_5; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_6; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_7; /* Multicast Group Member QP */ +/* -------------- */ +}; + +/* INIT_IB Parameters */ + +struct arbelprm_init_ib_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00004]; + pseudo_bit_t vl_cap[0x00004]; /* Maximum VLs supported on the port, excluding VL15. + Legal values are 1,2,4 and 8. */ + pseudo_bit_t port_width_cap[0x00004];/* IB Port Width + 1 - 1x + 3 - 1x, 4x + 11 - 1x, 4x or 12x (must not be used in InfiniHost-III-EX MT25208) + else - Reserved */ + pseudo_bit_t mtu_cap[0x00004]; /* Maximum MTU Supported + 0x0 - Reserved + 0x1 - 256 + 0x2 - 512 + 0x3 - 1024 + 0x4 - 2048 + 0x5 - 0xF Reserved */ + pseudo_bit_t g0[0x00001]; /* Set port GUID0 to GUID0 specified */ + pseudo_bit_t ng[0x00001]; /* Set node GUID to node_guid specified. + node_guid and ng must be the same for all ports. */ + pseudo_bit_t sig[0x00001]; /* Set System Image GUID to system_image_guid specified. + system_image_guid and sig must be the same for all ports. */ + pseudo_bit_t reserved1[0x0000d]; +/* -------------- */ + pseudo_bit_t max_gid[0x00010]; /* Maximum number of GIDs for the port */ + pseudo_bit_t reserved2[0x00010]; +/* -------------- */ + pseudo_bit_t max_pkey[0x00010]; /* Maximum pkeys for the port. + Must be the same for both ports. */ + pseudo_bit_t reserved3[0x00010]; +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ + pseudo_bit_t guid0_h[0x00020]; /* EUI-64 GUID assigned by the manufacturer, takes effect only if the G0 bit is set (bits 63:32) */ +/* -------------- */ + pseudo_bit_t guid0_l[0x00020]; /* EUI-64 GUID assigned by the manufacturer, takes effect only if the G0 bit is set (bits 31:0) */ +/* -------------- */ + pseudo_bit_t node_guid_h[0x00020]; /* Node GUID[63:32], takes effect only if the NG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t node_guid_l[0x00020]; /* Node GUID[31:0], takes effect only if the NG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t system_image_guid_h[0x00020];/* System Image GUID[63:32], takes effect only if the SIG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t system_image_guid_l[0x00020];/* System Image GUID[31:0], takes effect only if the SIG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t reserved5[0x006c0]; +/* -------------- */ +}; + +/* Query Device Limitations */ + +struct arbelprm_query_dev_lim_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ + pseudo_bit_t log_max_qp[0x00005]; /* Log2 of the Maximum number of QPs supported */ + pseudo_bit_t reserved1[0x00003]; + pseudo_bit_t log2_rsvd_qps[0x00004];/* Log (base 2) of the number of QPs reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_qps-1 */ + pseudo_bit_t reserved2[0x00004]; + pseudo_bit_t log_max_qp_sz[0x00008];/* The maximum number of WQEs allowed on the RQ or the SQ is 2^log_max_qp_sz-1 */ + pseudo_bit_t log_max_srq_sz[0x00008];/* The maximum number of WQEs allowed on the SRQ is 2^log_max_srq_sz-1 */ +/* -------------- */ + pseudo_bit_t log_max_ee[0x00005]; /* Log2 of the Maximum number of EE contexts supported */ + pseudo_bit_t reserved3[0x00003]; + pseudo_bit_t log2_rsvd_ees[0x00004];/* Log (base 2) of the number of EECs reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_ees-1 */ + pseudo_bit_t reserved4[0x00004]; + pseudo_bit_t log_max_srqs[0x00005]; /* Log base 2 of the maximum number of SRQs supported, valid only if SRQ bit is set. + */ + pseudo_bit_t reserved5[0x00007]; + pseudo_bit_t log2_rsvd_srqs[0x00004];/* Log (base 2) of the number of reserved SRQs for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_srqs-1 + This parameter is valid only if the SRQ bit is set. */ +/* -------------- */ + pseudo_bit_t log_max_cq[0x00005]; /* Log2 of the Maximum number of CQs supported */ + pseudo_bit_t reserved6[0x00003]; + pseudo_bit_t log2_rsvd_cqs[0x00004];/* Log (base 2) of the number of CQs reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsrvd_cqs-1 */ + pseudo_bit_t reserved7[0x00004]; + pseudo_bit_t log_max_cq_sz[0x00008];/* Log2 of the Maximum CQEs allowed in a CQ */ + pseudo_bit_t reserved8[0x00008]; +/* -------------- */ + pseudo_bit_t log_max_eq[0x00003]; /* Log2 of the Maximum number of EQs */ + pseudo_bit_t reserved9[0x00005]; + pseudo_bit_t num_rsvd_eqs[0x00004]; /* The number of EQs reserved for firmware use + The reserved resources are numbered from 0 to num_rsvd_eqs-1 + If 0 - no resources are reserved. */ + pseudo_bit_t reserved10[0x00004]; + pseudo_bit_t log_max_mpts[0x00006]; /* Log (base 2) of the maximum number of MPT entries (the number of Regions/Windows) */ + pseudo_bit_t reserved11[0x00002]; + pseudo_bit_t log_max_eq_sz[0x00008];/* Log2 of the Maximum EQEs allowed in a EQ */ +/* -------------- */ + pseudo_bit_t log_max_mtts[0x00006]; /* Log2 of the Maximum number of MTT entries */ + pseudo_bit_t reserved12[0x00002]; + pseudo_bit_t log2_rsvd_mrws[0x00004];/* Log (base 2) of the number of MPTs reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_mrws-1 */ + pseudo_bit_t reserved13[0x00004]; + pseudo_bit_t log_max_mrw_sz[0x00008];/* Log2 of the Maximum Size of Memory Region/Window */ + pseudo_bit_t reserved14[0x00004]; + pseudo_bit_t log2_rsvd_mtts[0x00004];/* Log (base 2) of the number of MTT entries reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_mtts-1 + */ +/* -------------- */ + pseudo_bit_t reserved15[0x00020]; +/* -------------- */ + pseudo_bit_t log_max_ra_res_qp[0x00006];/* Log2 of the Maximum number of outstanding RDMA read/Atomic per QP as a responder */ + pseudo_bit_t reserved16[0x0000a]; + pseudo_bit_t log_max_ra_req_qp[0x00006];/* Log2 of the maximum number of outstanding RDMA read/Atomic per QP as a requester */ + pseudo_bit_t reserved17[0x0000a]; +/* -------------- */ + pseudo_bit_t log_max_ra_res_global[0x00006];/* Log2 of the maximum number of RDMA read/atomic operations the HCA responder can support globally. That implies the RDB table size. */ + pseudo_bit_t reserved18[0x00016]; + pseudo_bit_t log2_rsvd_rdbs[0x00004];/* Log (base 2) of the number of RDB entries reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_rdbs-1 */ +/* -------------- */ + pseudo_bit_t rsz_srq[0x00001]; /* Ability to modify the maximum number of WRs per SRQ. */ + pseudo_bit_t reserved19[0x0001f]; +/* -------------- */ + pseudo_bit_t num_ports[0x00004]; /* Number of IB ports. */ + pseudo_bit_t max_vl[0x00004]; /* Maximum VLs supported on each port, excluding VL15 */ + pseudo_bit_t max_port_width[0x00004];/* IB Port Width + 1 - 1x + 3 - 1x, 4x + 11 - 1x, 4x or 12x + else - Reserved */ + pseudo_bit_t max_mtu[0x00004]; /* Maximum MTU Supported + 0x0 - Reserved + 0x1 - 256 + 0x2 - 512 + 0x3 - 1024 + 0x4 - 2048 + 0x5 - 0xF Reserved */ + pseudo_bit_t local_ca_ack_delay[0x00005];/* The Local CA ACK Delay. This is the value recommended to be returned in Query HCA verb. + The delay value in microseconds is computed using 4.096us * 2^(local_ca_ack_delay). */ + pseudo_bit_t reserved20[0x0000b]; +/* -------------- */ + pseudo_bit_t log_max_gid[0x00004]; /* Log2 of the maximum number of GIDs per port */ + pseudo_bit_t reserved21[0x0001c]; +/* -------------- */ + pseudo_bit_t log_max_pkey[0x00004]; /* Log2 of the max PKey Table Size (per IB port) */ + pseudo_bit_t reserved22[0x0000c]; + pseudo_bit_t stat_rate_support[0x00010];/* bit mask of stat rate supported + bit 0 - full bw + bit 1 - 1/4 bw + bit 2 - 1/8 bw + bit 3 - 1/2 bw; */ +/* -------------- */ + pseudo_bit_t reserved23[0x00020]; +/* -------------- */ + pseudo_bit_t rc[0x00001]; /* RC Transport supported */ + pseudo_bit_t uc[0x00001]; /* UC Transport Supported */ + pseudo_bit_t ud[0x00001]; /* UD Transport Supported */ + pseudo_bit_t rd[0x00001]; /* RD Transport Supported */ + pseudo_bit_t raw_ipv6[0x00001]; /* Raw IPv6 Transport Supported */ + pseudo_bit_t raw_ether[0x00001]; /* Raw Ethertype Transport Supported */ + pseudo_bit_t srq[0x00001]; /* SRQ is supported + */ + pseudo_bit_t ipo_ib_checksum[0x00001];/* IP over IB checksum is supported */ + pseudo_bit_t pkv[0x00001]; /* PKey Violation Counter Supported */ + pseudo_bit_t qkv[0x00001]; /* QKey Violation Coutner Supported */ + pseudo_bit_t reserved24[0x00006]; + pseudo_bit_t mw[0x00001]; /* Memory windows supported */ + pseudo_bit_t apm[0x00001]; /* Automatic Path Migration Supported */ + pseudo_bit_t atm[0x00001]; /* Atomic operations supported (atomicity is guaranteed between QPs on this HCA) */ + pseudo_bit_t rm[0x00001]; /* Raw Multicast Supported */ + pseudo_bit_t avp[0x00001]; /* Address Vector Port checking supported */ + pseudo_bit_t udm[0x00001]; /* UD Multicast Supported */ + pseudo_bit_t reserved25[0x00002]; + pseudo_bit_t pg[0x00001]; /* Paging on demand supported */ + pseudo_bit_t r[0x00001]; /* Router mode supported */ + pseudo_bit_t reserved26[0x00006]; +/* -------------- */ + pseudo_bit_t log_pg_sz[0x00008]; /* Minimum system page size supported (log2). + For proper operation it must be less than or equal the hosting platform (CPU) minimum page size. */ + pseudo_bit_t reserved27[0x00008]; + pseudo_bit_t uar_sz[0x00006]; /* UAR Area Size = 1MB * 2^uar_sz */ + pseudo_bit_t reserved28[0x00006]; + pseudo_bit_t num_rsvd_uars[0x00004];/* The number of UARs reserved for firmware use + The reserved resources are numbered from 0 to num_reserved_uars-1 + Note that UAR number num_reserved_uars is always for the kernel. */ +/* -------------- */ + pseudo_bit_t reserved29[0x00020]; +/* -------------- */ + pseudo_bit_t max_desc_sz_sq[0x00010];/* Max descriptor size in bytes for the send queue */ + pseudo_bit_t max_sg_sq[0x00008]; /* The maximum S/G list elements in a SQ WQE (max_desc_sz/16 - 3) */ + pseudo_bit_t reserved30[0x00008]; +/* -------------- */ + pseudo_bit_t max_desc_sz_rq[0x00010];/* Max descriptor size in bytes for the receive queue */ + pseudo_bit_t max_sg_rq[0x00008]; /* The maximum S/G list elements in a RQ WQE (max_desc_sz/16 - 3) */ + pseudo_bit_t reserved31[0x00008]; +/* -------------- */ + pseudo_bit_t reserved32[0x00040]; +/* -------------- */ + pseudo_bit_t log_max_mcg[0x00008]; /* Log2 of the maximum number of multicast groups */ + pseudo_bit_t num_rsvd_mcgs[0x00004];/* The number of MGMs reserved for firmware use in the MGHT. + The reserved resources are numbered from 0 to num_reserved_mcgs-1 + If 0 - no resources are reserved. */ + pseudo_bit_t reserved33[0x00004]; + pseudo_bit_t log_max_qp_mcg[0x00008];/* Log2 of the maximum number of QPs per multicast group */ + pseudo_bit_t reserved34[0x00008]; +/* -------------- */ + pseudo_bit_t log_max_rdds[0x00006]; /* Log2 of the maximum number of RDDs */ + pseudo_bit_t reserved35[0x00006]; + pseudo_bit_t num_rsvd_rdds[0x00004];/* The number of RDDs reserved for firmware use + The reserved resources are numbered from 0 to num_reserved_rdds-1. + If 0 - no resources are reserved. */ + pseudo_bit_t log_max_pd[0x00006]; /* Log2 of the maximum number of PDs */ + pseudo_bit_t reserved36[0x00006]; + pseudo_bit_t num_rsvd_pds[0x00004]; /* The number of PDs reserved for firmware use + The reserved resources are numbered from 0 to num_reserved_pds-1 + If 0 - no resources are reserved. */ +/* -------------- */ + pseudo_bit_t reserved37[0x000c0]; +/* -------------- */ + pseudo_bit_t qpc_entry_sz[0x00010]; /* QPC Entry Size for the device + For the InfiniHost-III-EX MT25208 entry size is 256 bytes */ + pseudo_bit_t eec_entry_sz[0x00010]; /* EEC Entry Size for the device + For the InfiniHost-III-EX MT25208 entry size is 256 bytes */ +/* -------------- */ + pseudo_bit_t eqpc_entry_sz[0x00010];/* Extended QPC entry size for the device + For the InfiniHost-III-EX MT25208 entry size is 32 bytes */ + pseudo_bit_t eeec_entry_sz[0x00010];/* Extended EEC entry size for the device + For the InfiniHost-III-EX MT25208 entry size is 32 bytes */ +/* -------------- */ + pseudo_bit_t cqc_entry_sz[0x00010]; /* CQC entry size for the device + For the InfiniHost-III-EX MT25208 entry size is 64 bytes */ + pseudo_bit_t eqc_entry_sz[0x00010]; /* EQ context entry size for the device + For the InfiniHost-III-EX MT25208 entry size is 64 bytes */ +/* -------------- */ + pseudo_bit_t uar_scratch_entry_sz[0x00010];/* UAR Scratchpad Entry Size + For the InfiniHost-III-EX MT25208 entry size is 32 bytes */ + pseudo_bit_t srq_entry_sz[0x00010]; /* SRQ context entry size for the device + For the InfiniHost-III-EX MT25208 entry size is 32 bytes */ +/* -------------- */ + pseudo_bit_t mpt_entry_sz[0x00010]; /* MPT entry size in Bytes for the device. + For the InfiniHost-III-EX MT25208 entry size is 64 bytes */ + pseudo_bit_t mtt_entry_sz[0x00010]; /* MTT entry size in Bytes for the device. + For the InfiniHost-III-EX MT25208 entry size is 8 bytes */ +/* -------------- */ + pseudo_bit_t bmme[0x00001]; /* Base Memory Management Extension Support */ + pseudo_bit_t win_type[0x00001]; /* Bound Type 2 Memory Window Association mechanism: + 0 - Type 2A - QP Number Association; or + 1 - Type 2B - QP Number and PD Association. */ + pseudo_bit_t mps[0x00001]; /* Ability of this HCA to support multiple page sizes per Memory Region. */ + pseudo_bit_t bl[0x00001]; /* Ability of this HCA to support Block List Physical Buffer Lists. (The device does not supports Block List) */ + pseudo_bit_t zb[0x00001]; /* Zero Based region/windows supported */ + pseudo_bit_t lif[0x00001]; /* Ability of this HCA to support Local Invalidate Fencing. */ + pseudo_bit_t reserved38[0x00002]; + pseudo_bit_t log_pbl_sz[0x00006]; /* Log2 of the Maximum Physical Buffer List size in Bytes supported by this HCA when invoking the Allocate L_Key verb. + */ + pseudo_bit_t reserved39[0x00012]; +/* -------------- */ + pseudo_bit_t resd_lkey[0x00020]; /* The value of the reserved Lkey for Base Memory Management Extension */ +/* -------------- */ + pseudo_bit_t lamr[0x00001]; /* When set the device requires local attached memory in order to operate. + When set, ICM pages, Firmware Area and ICM auxiliary pages must be allocated in the local attached memory. */ + pseudo_bit_t reserved40[0x0001f]; +/* -------------- */ + pseudo_bit_t max_icm_size_h[0x00020];/* Bits [63:32] of maximum ICM size InfiniHost III Ex support in bytes. */ +/* -------------- */ + pseudo_bit_t max_icm_size_l[0x00020];/* Bits [31:0] of maximum ICM size InfiniHost III Ex support in bytes. */ +/* -------------- */ + pseudo_bit_t reserved41[0x002c0]; +/* -------------- */ +}; + +/* QUERY_ADAPTER Parameters Block */ + +struct arbelprm_query_adapter_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ + pseudo_bit_t reserved1[0x00018]; + pseudo_bit_t intapin[0x00008]; /* Driver should set this field to INTR value in the event queue in order to get Express interrupt messages. */ +/* -------------- */ + pseudo_bit_t reserved2[0x00060]; +/* -------------- */ + struct arbelprm_vsd_st vsd; +/* -------------- */ +}; + +/* QUERY_FW Parameters Block */ + +struct arbelprm_query_fw_st { /* Little Endian */ + pseudo_bit_t fw_rev_major[0x00010]; /* Firmware Revision - Major */ + pseudo_bit_t fw_pages[0x00010]; /* Amount of physical memory to be allocated for FW usage is in 4KByte pages. */ +/* -------------- */ + pseudo_bit_t fw_rev_minor[0x00010]; /* Firmware Revision - Minor */ + pseudo_bit_t fw_rev_subminor[0x00010];/* Firmware Sub-minor version (Patch level). */ +/* -------------- */ + pseudo_bit_t cmd_interface_rev[0x00010];/* Command Interface Interpreter Revision ID */ + pseudo_bit_t reserved0[0x0000e]; + pseudo_bit_t wqe_h_mode[0x00001]; /* Hermon mode. If '1', then WQE and AV format is the advanced format */ + pseudo_bit_t zb_wq_cq[0x00001]; /* If '1', then ZB mode of WQ and CQ are enabled (i.e. real Memfree PRM is supported) */ +/* -------------- */ + pseudo_bit_t log_max_outstanding_cmd[0x00008];/* Log2 of the maximum number of commands the HCR can support simultaneously */ + pseudo_bit_t reserved1[0x00017]; + pseudo_bit_t dt[0x00001]; /* Debug Trace Support + 0 - Debug trace is not supported + 1 - Debug trace is supported */ +/* -------------- */ + pseudo_bit_t cmd_interface_db[0x00001];/* Set if the device accepts commands by means of special doorbells */ + pseudo_bit_t reserved2[0x0001f]; +/* -------------- */ + pseudo_bit_t reserved3[0x00060]; +/* -------------- */ + pseudo_bit_t clr_int_base_addr_h[0x00020];/* Bits [63:32] of Clear interrupt register physical address. + Points to 64 bit register. */ +/* -------------- */ + pseudo_bit_t clr_int_base_addr_l[0x00020];/* Bits [31:0] of Clear interrupt register physical address. + Points to 64 bit register. */ +/* -------------- */ + pseudo_bit_t reserved4[0x00040]; +/* -------------- */ + pseudo_bit_t error_buf_start_h[0x00020];/* Read Only buffer for catastrophic error reports (physical address) */ +/* -------------- */ + pseudo_bit_t error_buf_start_l[0x00020];/* Read Only buffer for catastrophic error reports (physical address) */ +/* -------------- */ + pseudo_bit_t error_buf_size[0x00020];/* Size in words */ +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ + pseudo_bit_t eq_arm_base_addr_h[0x00020];/* Bits [63:32] of EQ Arm DBs physical address. + Points to 64 bit register. + Setting bit x in the offset, arms EQ number x. + */ +/* -------------- */ + pseudo_bit_t eq_arm_base_addr_l[0x00020];/* Bits [31:0] of EQ Arm DBs physical address. + Points to 64 bit register. + Setting bit x in the offset, arms EQ number x. */ +/* -------------- */ + pseudo_bit_t eq_set_ci_base_addr_h[0x00020];/* Bits [63:32] of EQ Set CI DBs Table physical address. + Points to a the EQ Set CI DBs Table base address. */ +/* -------------- */ + pseudo_bit_t eq_set_ci_base_addr_l[0x00020];/* Bits [31:0] of EQ Set CI DBs Table physical address. + Points to a the EQ Set CI DBs Table base address. */ +/* -------------- */ + pseudo_bit_t cmd_db_dw1[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 1 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ + pseudo_bit_t cmd_db_dw0[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 0 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t cmd_db_dw3[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 3 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ + pseudo_bit_t cmd_db_dw2[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 2 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t cmd_db_dw5[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 5 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ + pseudo_bit_t cmd_db_dw4[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 4 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t cmd_db_dw7[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 7 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ + pseudo_bit_t cmd_db_dw6[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 6 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t cmd_db_addr_base_h[0x00020];/* High bits of cmd_db_addr_base, which cmd_db_dw offsets refer to. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t cmd_db_addr_base_l[0x00020];/* Low bits of cmd_db_addr_base, which cmd_db_dw offsets refer to. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t reserved6[0x004c0]; +/* -------------- */ +}; + +/* ACCESS_LAM */ + +struct arbelprm_access_lam_st { /* Little Endian */ + struct arbelprm_access_lam_inject_errors_st access_lam_inject_errors; +/* -------------- */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ +}; + +/* ENABLE_LAM Parameters Block */ + +struct arbelprm_enable_lam_st { /* Little Endian */ + pseudo_bit_t lam_start_adr_h[0x00020];/* LAM start address [63:32] */ +/* -------------- */ + pseudo_bit_t lam_start_adr_l[0x00020];/* LAM start address [31:0] */ +/* -------------- */ + pseudo_bit_t lam_end_adr_h[0x00020];/* LAM end address [63:32] */ +/* -------------- */ + pseudo_bit_t lam_end_adr_l[0x00020];/* LAM end address [31:0] */ +/* -------------- */ + pseudo_bit_t di[0x00002]; /* Data Integrity Configuration: + 00 - none + 01 - Parity + 10 - ECC Detection Only + 11 - ECC With Correction */ + pseudo_bit_t ap[0x00002]; /* Auto Precharge Mode + 00 - No auto precharge + 01 - Auto precharge per transaction + 10 - Auto precharge per 64 bytes + 11 - reserved */ + pseudo_bit_t dh[0x00001]; /* When set, LAM is Hidden and can not be accessed directly from the PCI bus. */ + pseudo_bit_t reserved0[0x0001b]; +/* -------------- */ + pseudo_bit_t reserved1[0x00160]; +/* -------------- */ + struct arbelprm_dimminfo_st dimm0; /* Logical DIMM 0 Parameters */ +/* -------------- */ + struct arbelprm_dimminfo_st dimm1; /* Logical DIMM 1 Parameters */ +/* -------------- */ + pseudo_bit_t reserved2[0x00400]; +/* -------------- */ +}; + +/* Memory Access Parameters for UD Address Vector Table */ + +struct arbelprm_udavtable_memory_parameters_st { /* Little Endian */ + pseudo_bit_t l_key[0x00020]; /* L_Key used to access TPT */ +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* PD used by TPT for matching against PD of region entry being accessed. */ + pseudo_bit_t reserved0[0x00005]; + pseudo_bit_t xlation_en[0x00001]; /* When cleared, address is physical address and no translation will be done. When set, address is virtual. */ + pseudo_bit_t reserved1[0x00002]; +/* -------------- */ +}; + +/* INIT_HCA & QUERY_HCA Parameters Block */ + +struct arbelprm_init_hca_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00060]; +/* -------------- */ + pseudo_bit_t reserved1[0x00010]; + pseudo_bit_t time_stamp_granularity[0x00008];/* This field controls the granularity in which CQE Timestamp counter is incremented. + The TimeStampGranularity units is 1/4 of a microseconds. (e.g is TimeStampGranularity is configured to 0x2, CQE Timestamp will be incremented every one microsecond) + When sets to Zero, timestamp reporting in the CQE is disabled. + This feature is currently not supported. + */ + pseudo_bit_t hca_core_clock[0x00008];/* Internal Clock Period (in units of 1/16 ns) (QUERY_HCA only) */ +/* -------------- */ + pseudo_bit_t reserved2[0x00008]; + pseudo_bit_t router_qp[0x00010]; /* Upper 16 bit to be used as a QP number for router mode. Low order 8 bits are taken from the TClass field of the incoming packet. + Valid only if RE bit is set */ + pseudo_bit_t reserved3[0x00007]; + pseudo_bit_t re[0x00001]; /* Router Mode Enable + If this bit is set, entire packet (including all headers and ICRC) will be considered as a data payload and will be scattered to memory as specified in the descriptor that is posted on the QP matching the TClass field of packet. */ +/* -------------- */ + pseudo_bit_t udp[0x00001]; /* UD Port Check Enable + 0 - Port field in Address Vector is ignored + 1 - HCA will check the port field in AV entry (fetched for UD descriptor) against the Port of the UD QP executing the descriptor. */ + pseudo_bit_t he[0x00001]; /* Host Endianess - Used for Atomic Operations + 0 - Host is Little Endian + 1 - Host is Big endian + */ + pseudo_bit_t reserved4[0x00001]; + pseudo_bit_t ce[0x00001]; /* Checksum Enabled - when Set IPoverIB checksum generation & checking is enabled */ + pseudo_bit_t sph[0x00001]; /* 0 - SW calculates TCP/UDP Pseudo-Header checksum and inserts it into the TCP/UDP checksum field when sending a packet + 1 - HW calculates TCP/UDP Pseudo-Header checksum when sending a packet + */ + pseudo_bit_t rph[0x00001]; /* 0 - Not HW calculation of TCP/UDP Pseudo-Header checksum are done when receiving a packet + 1 - HW calculates TCP/UDP Pseudo-Header checksum when receiving a packet + */ + pseudo_bit_t reserved5[0x00002]; + pseudo_bit_t responder_exu[0x00004];/* Indicate the relation between the execution enegines allocation dedicated for responder versus the engines dedicated for reqvester . + responder_exu/16 = (number of responder exu engines)/(total number of engines) + Legal values are 0x0-0xF. 0 is "auto". + + */ + pseudo_bit_t reserved6[0x00004]; + pseudo_bit_t wqe_quota[0x0000f]; /* Maximum number of WQEs that are executed prior to preemption of execution unit. 0 - reserved. */ + pseudo_bit_t wqe_quota_en[0x00001]; /* If set - wqe_quota field is used. If cleared - WQE quota is set to "auto" value */ +/* -------------- */ + pseudo_bit_t reserved7[0x00040]; +/* -------------- */ + struct arbelprm_qpcbaseaddr_st qpc_eec_cqc_eqc_rdb_parameters; +/* -------------- */ + pseudo_bit_t reserved8[0x00100]; +/* -------------- */ + struct arbelprm_multicastparam_st multicast_parameters; +/* -------------- */ + pseudo_bit_t reserved9[0x00080]; +/* -------------- */ + struct arbelprm_tptparams_st tpt_parameters; +/* -------------- */ + pseudo_bit_t reserved10[0x00080]; +/* -------------- */ + struct arbelprm_uar_params_st uar_parameters;/* UAR Parameters */ +/* -------------- */ + pseudo_bit_t reserved11[0x00600]; +/* -------------- */ +}; + +/* Event Queue Context Table Entry */ + +struct arbelprm_eqc_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t st[0x00004]; /* Event delivery state machine + 0x9 - Armed + 0xA - Fired + 0xB - Always_Armed (auto-rearm) + other - reserved */ + pseudo_bit_t reserved1[0x00005]; + pseudo_bit_t oi[0x00001]; /* Oerrun ignore. + If set, HW will not check EQ full condition when writing new EQEs. */ + pseudo_bit_t tr[0x00001]; /* Translation Required. If set - EQ access undergo address translation. */ + pseudo_bit_t reserved2[0x00005]; + pseudo_bit_t owner[0x00004]; /* 0 - SW ownership + 1 - HW ownership + Valid for the QUERY_EQ and HW2SW_EQ commands only */ + pseudo_bit_t status[0x00004]; /* EQ status: + 0000 - OK + 1010 - EQ write failure + Valid for the QUERY_EQ and HW2SW_EQ commands only */ +/* -------------- */ + pseudo_bit_t start_address_h[0x00020];/* Start Address of Event Queue[63:32]. */ +/* -------------- */ + pseudo_bit_t start_address_l[0x00020];/* Start Address of Event Queue[31:0]. + Must be aligned on 32-byte boundary */ +/* -------------- */ + pseudo_bit_t reserved3[0x00018]; + pseudo_bit_t log_eq_size[0x00005]; /* Amount of entries in this EQ is 2^log_eq_size. + Log_eq_size must be bigger than 1. + Maximum EQ size is 2^17 EQEs (max Log_eq_size is 17). */ + pseudo_bit_t reserved4[0x00003]; +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ + pseudo_bit_t intr[0x00008]; /* Interrupt (message) to be generated to report event to INT layer. + 00iiiiii - set to INTA given in QUERY_ADAPTER in order to generate INTA messages on Express. + 10jjjjjj - specificies type of interrupt message to be generated (total 64 different messages supported). + All other values are reserved and should not be used. + + If interrupt generation is not required, ST field must be set upon creation to Fired state. No EQ arming doorbell should be performed. In this case hardware will not generate any interrupt. */ + pseudo_bit_t reserved6[0x00018]; +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* PD to be used to access EQ */ + pseudo_bit_t reserved7[0x00008]; +/* -------------- */ + pseudo_bit_t lkey[0x00020]; /* Memory key (L-Key) to be used to access EQ */ +/* -------------- */ + pseudo_bit_t reserved8[0x00040]; +/* -------------- */ + pseudo_bit_t consumer_indx[0x00020];/* Contains next entry to be read upon polling the event queue. + Must be initalized to zero while opening EQ */ +/* -------------- */ + pseudo_bit_t producer_indx[0x00020];/* Contains next entry in EQ to be written by the HCA. + Must be initalized to zero while opening EQ. */ +/* -------------- */ + pseudo_bit_t reserved9[0x00080]; +/* -------------- */ +}; + +/* Memory Translation Table (MTT) Entry */ + +struct arbelprm_mtt_st { /* Little Endian */ + pseudo_bit_t ptag_h[0x00020]; /* High-order bits of physical tag. The size of the field depends on the page size of the region. Maximum PTAG size is 52 bits. */ +/* -------------- */ + pseudo_bit_t p[0x00001]; /* Present bit. If set, page entry is valid. If cleared, access to this page will generate non-present page access fault. */ + pseudo_bit_t reserved0[0x0000b]; + pseudo_bit_t ptag_l[0x00014]; /* Low-order bits of Physical tag. The size of the field depends on the page size of the region. Maximum PTAG size is 52 bits. */ +/* -------------- */ +}; + +/* Memory Protection Table (MPT) Entry */ + +struct arbelprm_mpt_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t r_w[0x00001]; /* Defines whether this entry is Region (1) or Window (0) */ + pseudo_bit_t pa[0x00001]; /* Physical address. If set, no virtual-to-physical address translation will be performed for this region */ + pseudo_bit_t lr[0x00001]; /* If set - local read access enabled */ + pseudo_bit_t lw[0x00001]; /* If set - local write access enabled */ + pseudo_bit_t rr[0x00001]; /* If set - remote read access enabled. */ + pseudo_bit_t rw[0x00001]; /* If set - remote write access enabled */ + pseudo_bit_t a[0x00001]; /* If set - remote Atomic access is enabled */ + pseudo_bit_t eb[0x00001]; /* If set - Bind is enabled. Valid for region entry only. */ + pseudo_bit_t reserved1[0x0000c]; + pseudo_bit_t status[0x00004]; /* Region/Window Status + 0xF - not valid (SW ownership) + 0x3 - FREE state + else - HW ownership + Unbound Type I windows are doneted reg_wnd_len field equals zero. + Unbound Type II windows are donated by Status=FREE. */ +/* -------------- */ + pseudo_bit_t page_size[0x00005]; /* Page size used for the region. Actual size is [4K]*2^Page_size bytes. + page_size should be less than 20. */ + pseudo_bit_t reserved2[0x00002]; + pseudo_bit_t type[0x00001]; /* Applicable for windows only, must be zero for regions + 0 - Type one window + 1 - Type two window */ + pseudo_bit_t qpn[0x00018]; /* QP number this MW is attached to. Valid for type2 memory windows and on QUERY_MPT only */ +/* -------------- */ + pseudo_bit_t mem_key[0x00020]; /* The memory Key. The field holds the mem_key field in the following semantics: {key[7:0],key[31:8]}. + */ +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* Protection Domain */ + pseudo_bit_t reserved3[0x00001]; + pseudo_bit_t ei[0x00001]; /* Enable Invalidation - When set, Local/Remote invalidation can be executed on this window/region. + Must be set for type2 windows and non-shared physical memory regions. + Must be clear for regions that are used to access Work Queues, Completion Queues and Event Queues */ + pseudo_bit_t zb[0x00001]; /* When set, this region is Zero Based Region */ + pseudo_bit_t fre[0x00001]; /* When set, Fast Registration Operations can be executed on this region */ + pseudo_bit_t rae[0x00001]; /* When set, remote access can be enabled on this region. + Used when executing Fast Registration Work Request to validate that remote access rights can be granted to this MPT. + If the bit is cleared, Fast Registration Work Request requesting remote access rights will fail. + */ + pseudo_bit_t reserved4[0x00003]; +/* -------------- */ + pseudo_bit_t start_address_h[0x00020];/* Start Address[63:32] - Virtual Address where this region/window starts */ +/* -------------- */ + pseudo_bit_t start_address_l[0x00020];/* Start Address[31:0] - Virtual Address where this region/window starts */ +/* -------------- */ + pseudo_bit_t reg_wnd_len_h[0x00020];/* Region/Window Length[63:32] */ +/* -------------- */ + pseudo_bit_t reg_wnd_len_l[0x00020];/* Region/Window Length[31:0] */ +/* -------------- */ + pseudo_bit_t lkey[0x00020]; /* Must be 0 for SW2HW_MPT. + On QUERY_MPT and HW2SW_MPT commands for Memory Window it reflects the LKey of the Region that the Window is bound to. + The field holds the lkey field in the following semantics: {key[7:0],key[31:8]}. */ +/* -------------- */ + pseudo_bit_t win_cnt[0x00020]; /* Number of windows bound to this region. Valid for regions only. + The field is valid only for the QUERY_MPT and HW2SW_MPT commands. */ +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ + pseudo_bit_t mtt_adr_h[0x00006]; /* Base (first) address of the MTT relative to MTT base in the ICM */ + pseudo_bit_t reserved6[0x0001a]; +/* -------------- */ + pseudo_bit_t reserved7[0x00003]; + pseudo_bit_t mtt_adr_l[0x0001d]; /* Base (first) address of the MTT relative to MTT base address in the ICM. Must be aligned on 8 bytes. */ +/* -------------- */ + pseudo_bit_t mtt_sz[0x00020]; /* Number of MTT entries allocated for this MR. + When Fast Registration Operations can not be executed on this region (FRE bit is zero) this field is reserved. + When Fast Registration Operation is enabled (FRE bit is set) this field indicates the number of MTTs allocated for this MR. If mtt_sz value is zero, there is no limit for the numbers of MTTs and the HCA does not check this field when executing fast register WQE. */ +/* -------------- */ + pseudo_bit_t reserved8[0x00040]; +/* -------------- */ +}; + +/* Completion Queue Context Table Entry */ + +struct arbelprm_completion_queue_context_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t st[0x00004]; /* Event delivery state machine + 0x0 - reserved + 0x9 - ARMED (Request for Notification) + 0x6 - ARMED SOLICITED (Request Solicited Notification) + 0xA - FIRED + other - reserved + + Must be 0x0 in CQ initialization. + Valid for the QUERY_CQ and HW2SW_CQ commands only. */ + pseudo_bit_t reserved1[0x00005]; + pseudo_bit_t oi[0x00001]; /* When set, overrun ignore is enabled. + When set, Updates of CQ consumer counter (poll for completion) or Request completion notifications (Arm CQ) doorbells should not be rang on that CQ. */ + pseudo_bit_t reserved2[0x0000a]; + pseudo_bit_t status[0x00004]; /* CQ status + 0000 - OK + 1001 - CQ overflow + 1010 - CQ write failure + Valid for the QUERY_CQ and HW2SW_CQ commands only */ +/* -------------- */ + pseudo_bit_t start_address_h[0x00020];/* Start address of CQ[63:32]. + Must be aligned on CQE size (32 bytes) */ +/* -------------- */ + pseudo_bit_t start_address_l[0x00020];/* Start address of CQ[31:0]. + Must be aligned on CQE size (32 bytes) */ +/* -------------- */ + pseudo_bit_t usr_page[0x00018]; /* UAR page this CQ can be accessed through (ringinig CQ doorbells) */ + pseudo_bit_t log_cq_size[0x00005]; /* Log (base 2) of the CQ size (in entries). + Maximum CQ size is 2^17 CQEs (max log_cq_size is 17) */ + pseudo_bit_t reserved3[0x00003]; +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ + pseudo_bit_t c_eqn[0x00008]; /* Event Queue this CQ reports completion events to. + Valid values are 0 to 63 + If configured to value other than 0-63, completion events will not be reported on the CQ. */ + pseudo_bit_t reserved5[0x00018]; +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* Protection Domain to be used to access CQ. + Must be the same PD of the CQ L_Key. */ + pseudo_bit_t reserved6[0x00008]; +/* -------------- */ + pseudo_bit_t l_key[0x00020]; /* Memory key (L_Key) to be used to access CQ */ +/* -------------- */ + pseudo_bit_t last_notified_indx[0x00020];/* Maintained by HW. + Valid for QUERY_CQ and HW2SW_CQ commands only. */ +/* -------------- */ + pseudo_bit_t solicit_producer_indx[0x00020];/* Maintained by HW. + Valid for QUERY_CQ and HW2SW_CQ commands only. + */ +/* -------------- */ + pseudo_bit_t consumer_counter[0x00020];/* Consumer counter is a 32bits counter that is incremented for each CQE pooled from the CQ. + Must be 0x0 in CQ initialization. + Valid for the QUERY_CQ and HW2SW_CQ commands only. */ +/* -------------- */ + pseudo_bit_t producer_counter[0x00020];/* Producer counter is a 32bits counter that is incremented for each CQE that is written by the HW to the CQ. + CQ overrun is reported if Producer_counter + 1 equals to Consumer_counter and a CQE needs to be added.. + Maintained by HW (valid for the QUERY_CQ and HW2SW_CQ commands only) */ +/* -------------- */ + pseudo_bit_t cqn[0x00018]; /* CQ number. Least significant bits are constrained by the position of this CQ in CQC table + Valid for the QUERY_CQ and HW2SW_CQ commands only */ + pseudo_bit_t reserved7[0x00008]; +/* -------------- */ + pseudo_bit_t cq_ci_db_record[0x00020];/* Index in the UAR Context Table Entry. + HW uses this index as an offset from the UAR Context Table Entry in order to read this CQ Consumer Counter doorbell record. + This value can be retrieved from the HW in the QUERY_CQ command. */ +/* -------------- */ + pseudo_bit_t cq_state_db_record[0x00020];/* Index in the UAR Context Table Entry. + HW uses this index as an offset from the UAR Context Table Entry in order to read this CQ state doorbell record. + This value can be retrieved from the HW in the QUERY_CQ command. */ +/* -------------- */ + pseudo_bit_t reserved8[0x00020]; +/* -------------- */ +}; + +/* GPIO_event_data */ + +struct arbelprm_gpio_event_data_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00060]; +/* -------------- */ + pseudo_bit_t gpio_event_hi[0x00020];/* If any bit is set to 1, then a rising/falling event has occurred on the corrsponding GPIO pin. */ +/* -------------- */ + pseudo_bit_t gpio_event_lo[0x00020];/* If any bit is set to 1, then a rising/falling event has occurred on the corrsponding GPIO pin. */ +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ +}; + +/* Event_data Field - QP/EE Events */ + +struct arbelprm_qp_ee_event_st { /* Little Endian */ + pseudo_bit_t qpn_een[0x00018]; /* QP/EE/SRQ number event is reported for */ + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ + pseudo_bit_t reserved2[0x0001c]; + pseudo_bit_t e_q[0x00001]; /* If set - EEN if cleared - QP in the QPN/EEN field + Not valid on SRQ events */ + pseudo_bit_t reserved3[0x00003]; +/* -------------- */ + pseudo_bit_t reserved4[0x00060]; +/* -------------- */ +}; + +/* InfiniHost-III-EX Type0 Configuration Header */ + +struct arbelprm_mt25208_type0_st { /* Little Endian */ + pseudo_bit_t vendor_id[0x00010]; /* Hardwired to 0x15B3 */ + pseudo_bit_t device_id[0x00010]; /* 25208 (decimal) - InfiniHost-III compatible mode + 25218 (decimal) - InfiniHost-III EX mode (the mode described in this manual) + 25209 (decimal) - Flash burner mode - see Flash burning application note for further details on this mode + */ +/* -------------- */ + pseudo_bit_t command[0x00010]; /* PCI Command Register */ + pseudo_bit_t status[0x00010]; /* PCI Status Register */ +/* -------------- */ + pseudo_bit_t revision_id[0x00008]; + pseudo_bit_t class_code_hca_class_code[0x00018]; +/* -------------- */ + pseudo_bit_t cache_line_size[0x00008];/* Cache Line Size */ + pseudo_bit_t latency_timer[0x00008]; + pseudo_bit_t header_type[0x00008]; /* hardwired to zero */ + pseudo_bit_t bist[0x00008]; +/* -------------- */ + pseudo_bit_t bar0_ctrl[0x00004]; /* hard-wired to 0100 */ + pseudo_bit_t reserved0[0x00010]; + pseudo_bit_t bar0_l[0x0000c]; /* Lower bits of BAR0 (Device Configuration Space) */ +/* -------------- */ + pseudo_bit_t bar0_h[0x00020]; /* Upper 32 bits of BAR0 (Device Configuration Space) */ +/* -------------- */ + pseudo_bit_t bar1_ctrl[0x00004]; /* Hardwired to 1100 */ + pseudo_bit_t reserved1[0x00010]; + pseudo_bit_t bar1_l[0x0000c]; /* Lower bits of BAR1 (User Access Region - UAR - space) */ +/* -------------- */ + pseudo_bit_t bar1_h[0x00020]; /* upper 32 bits of BAR1 (User Access Region - UAR - space) */ +/* -------------- */ + pseudo_bit_t bar2_ctrl[0x00004]; /* Hardwired to 1100 */ + pseudo_bit_t reserved2[0x00010]; + pseudo_bit_t bar2_l[0x0000c]; /* Lower bits of BAR2 - Local Attached Memory if present and enabled. Else zeroed. */ +/* -------------- */ + pseudo_bit_t bar2_h[0x00020]; /* Upper 32 bits of BAR2 - Local Attached Memory if present and enabled. Else zeroed. */ +/* -------------- */ + pseudo_bit_t cardbus_cis_pointer[0x00020]; +/* -------------- */ + pseudo_bit_t subsystem_vendor_id[0x00010];/* Specified by the device NVMEM configuration */ + pseudo_bit_t subsystem_id[0x00010]; /* Specified by the device NVMEM configuration */ +/* -------------- */ + pseudo_bit_t expansion_rom_enable[0x00001];/* Expansion ROM Enable. Hardwired to 0 if expansion ROM is disabled in the device NVMEM configuration. */ + pseudo_bit_t reserved3[0x0000a]; + pseudo_bit_t expansion_rom_base_address[0x00015];/* Expansion ROM Base Address (upper 21 bit). Hardwired to 0 if expansion ROM is disabled in the device NVMEM configuration. */ +/* -------------- */ + pseudo_bit_t capabilities_pointer[0x00008];/* Specified by the device NVMEM configuration */ + pseudo_bit_t reserved4[0x00018]; +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ + pseudo_bit_t interrupt_line[0x00008]; + pseudo_bit_t interrupt_pin[0x00008]; + pseudo_bit_t min_gnt[0x00008]; + pseudo_bit_t max_latency[0x00008]; +/* -------------- */ + pseudo_bit_t reserved6[0x00100]; +/* -------------- */ + pseudo_bit_t msi_cap_id[0x00008]; + pseudo_bit_t msi_next_cap_ptr[0x00008]; + pseudo_bit_t msi_en[0x00001]; + pseudo_bit_t multiple_msg_cap[0x00003]; + pseudo_bit_t multiple_msg_en[0x00003]; + pseudo_bit_t cap_64_bit_addr[0x00001]; + pseudo_bit_t reserved7[0x00008]; +/* -------------- */ + pseudo_bit_t msg_addr_l[0x00020]; +/* -------------- */ + pseudo_bit_t msg_addr_h[0x00020]; +/* -------------- */ + pseudo_bit_t msg_data[0x00010]; + pseudo_bit_t reserved8[0x00010]; +/* -------------- */ + pseudo_bit_t reserved9[0x00080]; +/* -------------- */ + pseudo_bit_t pm_cap_id[0x00008]; /* Power management capability ID - 01h */ + pseudo_bit_t pm_next_cap_ptr[0x00008]; + pseudo_bit_t pm_cap[0x00010]; /* [2:0] Version - 02h + [3] PME clock - 0h + [4] RsvP + [5] Device specific initialization - 0h + [8:6] AUX current - 0h + [9] D1 support - 0h + [10] D2 support - 0h + [15:11] PME support - 0h */ +/* -------------- */ + pseudo_bit_t pm_status_control[0x00010];/* [14:13] - Data scale - 0h */ + pseudo_bit_t pm_control_status_brdg_ext[0x00008]; + pseudo_bit_t data[0x00008]; +/* -------------- */ + pseudo_bit_t reserved10[0x00040]; +/* -------------- */ + pseudo_bit_t vpd_cap_id[0x00008]; /* 03h */ + pseudo_bit_t vpd_next_cap_id[0x00008]; + pseudo_bit_t vpd_address[0x0000f]; + pseudo_bit_t f[0x00001]; +/* -------------- */ + pseudo_bit_t vpd_data[0x00020]; +/* -------------- */ + pseudo_bit_t reserved11[0x00040]; +/* -------------- */ + pseudo_bit_t pciex_cap_id[0x00008]; /* PCI-Express capability ID - 10h */ + pseudo_bit_t pciex_next_cap_ptr[0x00008]; + pseudo_bit_t pciex_cap[0x00010]; /* [3:0] Capability version - 1h + [7:4] Device/Port Type - 0h + [8] Slot implemented - 0h + [13:9] Interrupt message number + */ +/* -------------- */ + pseudo_bit_t device_cap[0x00020]; /* [2:0] Max_Payload_Size supported - 2h + [4:3] Phantom Function supported - 0h + [5] Extended Tag Filed supported - 0h + [8:6] Endpoint L0s Acceptable Latency - TBD + [11:9] Endpoint L1 Acceptable Latency - TBD + [12] Attention Button Present - configured through InfiniBurn + [13] Attention Indicator Present - configured through InfiniBurn + [14] Power Indicator Present - configured through InfiniBurn + [25:18] Captured Slot Power Limit Value + [27:26] Captured Slot Power Limit Scale */ +/* -------------- */ + pseudo_bit_t device_control[0x00010]; + pseudo_bit_t device_status[0x00010]; +/* -------------- */ + pseudo_bit_t link_cap[0x00020]; /* [3:0] Maximum Link Speed - 1h + [9:4] Maximum Link Width - 8h + [11:10] Active State Power Management Support - 3h + [14:12] L0s Exit Latency - TBD + [17:15] L1 Exit Latency - TBD + [31:24] Port Number - 0h */ +/* -------------- */ + pseudo_bit_t link_control[0x00010]; + pseudo_bit_t link_status[0x00010]; /* [3:0] Link Speed - 1h + [9:4] Negotiated Link Width + [12] Slot clock configuration - 1h */ +/* -------------- */ + pseudo_bit_t reserved12[0x00260]; +/* -------------- */ + pseudo_bit_t advanced_error_reporting_cap_id[0x00010];/* 0001h. */ + pseudo_bit_t capability_version[0x00004];/* 1h */ + pseudo_bit_t next_capability_offset[0x0000c];/* 0h */ +/* -------------- */ + pseudo_bit_t uncorrectable_error_status_register[0x00020];/* 0 Training Error Status + 4 Data Link Protocol Error Status + 12 Poisoned TLP Status + 13 Flow Control Protocol Error Status + 14 Completion Timeout Status + 15 Completer Abort Status + 16 Unexpected Completion Status + 17 Receiver Overflow Status + 18 Malformed TLP Status + 19 ECRC Error Status + 20 Unsupported Request Error Status */ +/* -------------- */ + pseudo_bit_t uncorrectable_error_mask_register[0x00020];/* 0 Training Error Mask + 4 Data Link Protocol Error Mask + 12 Poisoned TLP Mask + 13 Flow Control Protocol Error Mask + 14 Completion Timeout Mask + 15 Completer Abort Mask + 16 Unexpected Completion Mask + 17 Receiver Overflow Mask + 18 Malformed TLP Mask + 19 ECRC Error Mask + 20 Unsupported Request Error Mask */ +/* -------------- */ + pseudo_bit_t uncorrectable_severity_mask_register[0x00020];/* 0 Training Error Severity + 4 Data Link Protocol Error Severity + 12 Poisoned TLP Severity + 13 Flow Control Protocol Error Severity + 14 Completion Timeout Severity + 15 Completer Abort Severity + 16 Unexpected Completion Severity + 17 Receiver Overflow Severity + 18 Malformed TLP Severity + 19 ECRC Error Severity + 20 Unsupported Request Error Severity */ +/* -------------- */ + pseudo_bit_t correctable_error_status_register[0x00020];/* 0 Receiver Error Status + 6 Bad TLP Status + 7 Bad DLLP Status + 8 REPLAY_NUM Rollover Status + 12 Replay Timer Timeout Status */ +/* -------------- */ + pseudo_bit_t correctable_error_mask_register[0x00020];/* 0 Receiver Error Mask + 6 Bad TLP Mask + 7 Bad DLLP Mask + 8 REPLAY_NUM Rollover Mask + 12 Replay Timer Timeout Mask */ +/* -------------- */ + pseudo_bit_t advance_error_capabilities_and_control_register[0x00020]; +/* -------------- */ + struct arbelprm_header_log_register_st header_log_register; +/* -------------- */ + pseudo_bit_t reserved13[0x006a0]; +/* -------------- */ +}; + +/* Event Data Field - Performance Monitor */ + +struct arbelprm_performance_monitor_event_st { /* Little Endian */ + struct arbelprm_performance_monitors_st performance_monitor_snapshot;/* Performance monitor snapshot */ +/* -------------- */ + pseudo_bit_t monitor_number[0x00008];/* 0x01 - SQPC + 0x02 - RQPC + 0x03 - CQC + 0x04 - Rkey + 0x05 - TLB + 0x06 - port0 + 0x07 - port1 */ + pseudo_bit_t reserved0[0x00018]; +/* -------------- */ + pseudo_bit_t reserved1[0x00040]; +/* -------------- */ +}; + +/* Event_data Field - Page Faults */ + +struct arbelprm_page_fault_event_data_st { /* Little Endian */ + pseudo_bit_t va_h[0x00020]; /* Virtual Address[63:32] this page fault is reported on */ +/* -------------- */ + pseudo_bit_t va_l[0x00020]; /* Virtual Address[63:32] this page fault is reported on */ +/* -------------- */ + pseudo_bit_t mem_key[0x00020]; /* Memory Key this page fault is reported on */ +/* -------------- */ + pseudo_bit_t qp[0x00018]; /* QP this page fault is reported on */ + pseudo_bit_t reserved0[0x00003]; + pseudo_bit_t a[0x00001]; /* If set the memory access that caused the page fault was atomic */ + pseudo_bit_t lw[0x00001]; /* If set the memory access that caused the page fault was local write */ + pseudo_bit_t lr[0x00001]; /* If set the memory access that caused the page fault was local read */ + pseudo_bit_t rw[0x00001]; /* If set the memory access that caused the page fault was remote write */ + pseudo_bit_t rr[0x00001]; /* If set the memory access that caused the page fault was remote read */ +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* PD this page fault is reported on */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t prefetch_len[0x00020]; /* Indicates how many subsequent pages in the same memory region/window will be accessed by the following transaction after this page fault is resolved. measured in bytes. SW can use this information in order to page-in the subsequent pages if they are not present. */ +/* -------------- */ +}; + +/* WQE segments format */ + +struct arbelprm_wqe_segment_st { /* Little Endian */ + struct arbelprm_send_wqe_segment_st send_wqe_segment;/* Send WQE segment format */ +/* -------------- */ + pseudo_bit_t reserved0[0x00280]; +/* -------------- */ + struct arbelprm_wqe_segment_ctrl_mlx_st mlx_wqe_segment_ctrl;/* MLX WQE segment format */ +/* -------------- */ + pseudo_bit_t reserved1[0x00100]; +/* -------------- */ + struct arbelprm_wqe_segment_ctrl_recv_st recv_wqe_segment_ctrl;/* Receive segment format */ +/* -------------- */ + pseudo_bit_t reserved2[0x00080]; +/* -------------- */ +}; + +/* Event_data Field - Port State Change */ + +struct arbelprm_port_state_change_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ + pseudo_bit_t reserved1[0x0001c]; + pseudo_bit_t p[0x00002]; /* Port number (1 or 2) */ + pseudo_bit_t reserved2[0x00002]; +/* -------------- */ + pseudo_bit_t reserved3[0x00060]; +/* -------------- */ +}; + +/* Event_data Field - Completion Queue Error */ + +struct arbelprm_completion_queue_error_st { /* Little Endian */ + pseudo_bit_t cqn[0x00018]; /* CQ number event is reported for */ + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ + pseudo_bit_t syndrome[0x00008]; /* Error syndrome + 0x01 - CQ overrun + 0x02 - CQ access violation error */ + pseudo_bit_t reserved2[0x00018]; +/* -------------- */ + pseudo_bit_t reserved3[0x00060]; +/* -------------- */ +}; + +/* Event_data Field - Completion Event */ + +struct arbelprm_completion_event_st { /* Little Endian */ + pseudo_bit_t cqn[0x00018]; /* CQ number event is reported for */ + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t reserved1[0x000a0]; +/* -------------- */ +}; + +/* Event Queue Entry */ + +struct arbelprm_event_queue_entry_st { /* Little Endian */ + pseudo_bit_t event_sub_type[0x00008];/* Event Sub Type. + Defined for events which have sub types, zero elsewhere. */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t event_type[0x00008]; /* Event Type */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t event_data[6][0x00020];/* Delivers auxilary data to handle event. */ +/* -------------- */ + pseudo_bit_t reserved2[0x00007]; + pseudo_bit_t owner[0x00001]; /* Owner of the entry + 0 SW + 1 HW */ + pseudo_bit_t reserved3[0x00018]; +/* -------------- */ +}; + +/* QP/EE State Transitions Command Parameters */ + +struct arbelprm_qp_ee_state_transitions_st { /* Little Endian */ + pseudo_bit_t opt_param_mask[0x00020];/* This field defines which optional parameters are passed. Each bit specifies whether optional parameter is passed (set) or not (cleared). The optparammask is defined for each QP/EE command. */ +/* -------------- */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + struct arbelprm_queue_pair_ee_context_entry_st qpc_eec_data;/* QPC/EEC data */ +/* -------------- */ + pseudo_bit_t reserved1[0x009c0]; +/* -------------- */ +}; + +/* Completion Queue Entry Format */ + +struct arbelprm_completion_queue_entry_st { /* Little Endian */ + pseudo_bit_t my_qpn[0x00018]; /* Indicates the QP for which completion is being reported */ + pseudo_bit_t reserved0[0x00004]; + pseudo_bit_t ver[0x00004]; /* CQE version. + 0 for InfiniHost-III-EX */ +/* -------------- */ + pseudo_bit_t my_ee[0x00018]; /* EE context (for RD only). + Invalid for Bind and Nop operation on RD. + For non RD services this filed reports the CQE timestamp. The Timestamp is a free running counter that is incremented every TimeStampGranularity tick. The counter rolls-over when it reaches saturation. TimeStampGranularity is configured in the INIT_HCA command. This feature is currently not supported. + */ + pseudo_bit_t checksum_15_8[0x00008];/* Checksum[15:8] - See IPoverIB checksum offloading chapter */ +/* -------------- */ + pseudo_bit_t rqpn[0x00018]; /* Remote (source) QP number. Valid in Responder CQE only for Datagram QP. */ + pseudo_bit_t checksum_7_0[0x00008]; /* Checksum[7:0] - See IPoverIB checksum offloading chapter */ +/* -------------- */ + pseudo_bit_t rlid[0x00010]; /* Remote (source) LID of the message. Valid in Responder of UD QP CQE only. */ + pseudo_bit_t ml_path[0x00007]; /* My (destination) LID path bits - these are the lowemost LMC bits of the DLID in an incoming UD packet, higher bits of this field, that are not part of the LMC bits are zeroed by HW. + Valid in responder of UD QP CQE only. + Invalid if incoming message DLID is the permissive LID or incoming message is multicast. */ + pseudo_bit_t g[0x00001]; /* GRH present indicator. Valid in Responder of UD QP CQE only. */ + pseudo_bit_t ipok[0x00001]; /* IP OK - See IPoverIB checksum offloading chapter */ + pseudo_bit_t reserved1[0x00003]; + pseudo_bit_t sl[0x00004]; /* Service Level of the message. Valid in Responder of UD QP CQE only. */ +/* -------------- */ + pseudo_bit_t immediate_ethertype_pkey_indx_eecredits[0x00020];/* Valid for receive queue completion only. + If Opcode field indicates that this was send/write with immediate, this field contains immediate field of the packet. + If completion corresponds to RAW receive queue, bits 15:0 contain Ethertype field of the packet. + If completion corresponds to GSI receive queue, bits 31:16 contain index in PKey table that matches PKey of the message arrived. + If Opcode field indicates that this was send and invalidate, this field contains the key that was invalidated. + For CQE of send queue of the reliable connection service (but send and invalide), bits [4:0] of this field contain the encoded EEcredits received in last ACK of the message. */ +/* -------------- */ + pseudo_bit_t byte_cnt[0x00020]; /* Byte count of data actually transferred (valid for receive queue completions only) */ +/* -------------- */ + pseudo_bit_t reserved2[0x00006]; + pseudo_bit_t wqe_adr[0x0001a]; /* Bits 31:6 of WQE virtual address completion is reported for. The 6 least significant bits are zero. */ +/* -------------- */ + pseudo_bit_t reserved3[0x00007]; + pseudo_bit_t owner[0x00001]; /* Owner field. Zero value of this field means SW ownership of CQE. */ + pseudo_bit_t reserved4[0x0000f]; + pseudo_bit_t s[0x00001]; /* If set, completion is reported for Send queue, if cleared - receive queue. */ + pseudo_bit_t opcode[0x00008]; /* The opcode of WQE completion is reported for. + For CQEs corresponding to send completion, NOPCODE field of the WQE is copied to this field. + For CQEs corresponding to receive completions, opcode field of last packet in the message copied to this field. + For CQEs corresponding to the receive queue of QPs mapped to QP1, the opcode will be SEND with Immediate (messages are guaranteed to be SEND only) + + The following values are reported in case of completion with error: + 0xFE - For completion with error on Receive Queues + 0xFF - For completion with error on Send Queues */ +/* -------------- */ +}; + +/* */ + +struct arbelprm_ecc_detect_event_data_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ + pseudo_bit_t cause_lsb[0x00001]; + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t cause_msb[0x00001]; + pseudo_bit_t reserved2[0x00002]; + pseudo_bit_t err_rmw[0x00001]; + pseudo_bit_t err_src_id[0x00003]; + pseudo_bit_t err_da[0x00002]; + pseudo_bit_t err_ba[0x00002]; + pseudo_bit_t reserved3[0x00011]; + pseudo_bit_t overflow[0x00001]; +/* -------------- */ + pseudo_bit_t err_ra[0x00010]; + pseudo_bit_t err_ca[0x00010]; +/* -------------- */ +}; + +/* Event_data Field - ECC Detection Event */ + +struct arbelprm_scrubbing_event_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ + pseudo_bit_t cause_lsb[0x00001]; /* data integrity error cause: + single ECC error in the 64bit lsb data, on the rise edge of the clock */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t cause_msb[0x00001]; /* data integrity error cause: + single ECC error in the 64bit msb data, on the fall edge of the clock */ + pseudo_bit_t reserved2[0x00002]; + pseudo_bit_t err_rmw[0x00001]; /* transaction type: + 0 - read + 1 - read/modify/write */ + pseudo_bit_t err_src_id[0x00003]; /* source of the transaction: 0x4 - PCI, other - internal or IB */ + pseudo_bit_t err_da[0x00002]; /* Error DIMM address */ + pseudo_bit_t err_ba[0x00002]; /* Error bank address */ + pseudo_bit_t reserved3[0x00011]; + pseudo_bit_t overflow[0x00001]; /* Fatal: ECC error FIFO overflow - ECC errors were detected, which may or may not have been corrected by InfiniHost-III-EX */ +/* -------------- */ + pseudo_bit_t err_ra[0x00010]; /* Error row address */ + pseudo_bit_t err_ca[0x00010]; /* Error column address */ +/* -------------- */ +}; + +/* Miscellaneous Counters */ + +struct arbelprm_misc_counters_st { /* Little Endian */ + pseudo_bit_t ddr_scan_cnt[0x00020]; /* Number of times whole of LAM was scanned */ +/* -------------- */ + pseudo_bit_t reserved0[0x007e0]; +/* -------------- */ +}; + +/* LAM_EN Output Parameter */ + +struct arbelprm_lam_en_out_param_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ +}; + +/* Extended_Completion_Queue_Entry */ + +struct arbelprm_extended_completion_queue_entry_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ +}; + +/* */ + +struct arbelprm_eq_cmd_doorbell_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ +}; + +/* 0 */ + +struct arbelprm_arbel_prm_st { /* Little Endian */ + struct arbelprm_completion_queue_entry_st completion_queue_entry;/* Completion Queue Entry Format */ +/* -------------- */ + pseudo_bit_t reserved0[0x7ff00]; +/* -------------- */ + struct arbelprm_qp_ee_state_transitions_st qp_ee_state_transitions;/* QP/EE State Transitions Command Parameters */ +/* -------------- */ + pseudo_bit_t reserved1[0x7f000]; +/* -------------- */ + struct arbelprm_event_queue_entry_st event_queue_entry;/* Event Queue Entry */ +/* -------------- */ + pseudo_bit_t reserved2[0x7ff00]; +/* -------------- */ + struct arbelprm_completion_event_st completion_event;/* Event_data Field - Completion Event */ +/* -------------- */ + pseudo_bit_t reserved3[0x7ff40]; +/* -------------- */ + struct arbelprm_completion_queue_error_st completion_queue_error;/* Event_data Field - Completion Queue Error */ +/* -------------- */ + pseudo_bit_t reserved4[0x7ff40]; +/* -------------- */ + struct arbelprm_port_state_change_st port_state_change;/* Event_data Field - Port State Change */ +/* -------------- */ + pseudo_bit_t reserved5[0x7ff40]; +/* -------------- */ + struct arbelprm_wqe_segment_st wqe_segment;/* WQE segments format */ +/* -------------- */ + pseudo_bit_t reserved6[0x7f000]; +/* -------------- */ + struct arbelprm_page_fault_event_data_st page_fault_event_data;/* Event_data Field - Page Faults */ +/* -------------- */ + pseudo_bit_t reserved7[0x7ff40]; +/* -------------- */ + struct arbelprm_performance_monitor_event_st performance_monitor_event;/* Event Data Field - Performance Monitor */ +/* -------------- */ + pseudo_bit_t reserved8[0xfff20]; +/* -------------- */ + struct arbelprm_mt25208_type0_st mt25208_type0;/* InfiniHost-III-EX Type0 Configuration Header */ +/* -------------- */ + pseudo_bit_t reserved9[0x7f000]; +/* -------------- */ + struct arbelprm_qp_ee_event_st qp_ee_event;/* Event_data Field - QP/EE Events */ +/* -------------- */ + pseudo_bit_t reserved10[0x00040]; +/* -------------- */ + struct arbelprm_gpio_event_data_st gpio_event_data; +/* -------------- */ + pseudo_bit_t reserved11[0x7fe40]; +/* -------------- */ + struct arbelprm_ud_address_vector_st ud_address_vector;/* UD Address Vector */ +/* -------------- */ + pseudo_bit_t reserved12[0x7ff00]; +/* -------------- */ + struct arbelprm_queue_pair_ee_context_entry_st queue_pair_ee_context_entry;/* QP and EE Context Entry */ +/* -------------- */ + pseudo_bit_t reserved13[0x7fa00]; +/* -------------- */ + struct arbelprm_address_path_st address_path;/* Address Path */ +/* -------------- */ + pseudo_bit_t reserved14[0x7ff00]; +/* -------------- */ + struct arbelprm_completion_queue_context_st completion_queue_context;/* Completion Queue Context Table Entry */ +/* -------------- */ + pseudo_bit_t reserved15[0x7fe00]; +/* -------------- */ + struct arbelprm_mpt_st mpt; /* Memory Protection Table (MPT) Entry */ +/* -------------- */ + pseudo_bit_t reserved16[0x7fe00]; +/* -------------- */ + struct arbelprm_mtt_st mtt; /* Memory Translation Table (MTT) Entry */ +/* -------------- */ + pseudo_bit_t reserved17[0x7ffc0]; +/* -------------- */ + struct arbelprm_eqc_st eqc; /* Event Queue Context Table Entry */ +/* -------------- */ + pseudo_bit_t reserved18[0x7fe00]; +/* -------------- */ + struct arbelprm_performance_monitors_st performance_monitors;/* Performance Monitors */ +/* -------------- */ + pseudo_bit_t reserved19[0x7ff80]; +/* -------------- */ + struct arbelprm_hca_command_register_st hca_command_register;/* HCA Command Register (HCR) */ +/* -------------- */ + pseudo_bit_t reserved20[0xfff20]; +/* -------------- */ + struct arbelprm_init_hca_st init_hca;/* INIT_HCA & QUERY_HCA Parameters Block */ +/* -------------- */ + pseudo_bit_t reserved21[0x7f000]; +/* -------------- */ + struct arbelprm_qpcbaseaddr_st qpcbaseaddr;/* QPC/EEC/CQC/EQC/RDB Parameters */ +/* -------------- */ + pseudo_bit_t reserved22[0x7fc00]; +/* -------------- */ + struct arbelprm_udavtable_memory_parameters_st udavtable_memory_parameters;/* Memory Access Parameters for UD Address Vector Table */ +/* -------------- */ + pseudo_bit_t reserved23[0x7ffc0]; +/* -------------- */ + struct arbelprm_multicastparam_st multicastparam;/* Multicast Support Parameters */ +/* -------------- */ + pseudo_bit_t reserved24[0x7ff00]; +/* -------------- */ + struct arbelprm_tptparams_st tptparams;/* Translation and Protection Tables Parameters */ +/* -------------- */ + pseudo_bit_t reserved25[0x7ff00]; +/* -------------- */ + struct arbelprm_enable_lam_st enable_lam;/* ENABLE_LAM Parameters Block */ +/* -------------- */ + struct arbelprm_access_lam_st access_lam; +/* -------------- */ + pseudo_bit_t reserved26[0x7f700]; +/* -------------- */ + struct arbelprm_dimminfo_st dimminfo;/* Logical DIMM Information */ +/* -------------- */ + pseudo_bit_t reserved27[0x7ff00]; +/* -------------- */ + struct arbelprm_query_fw_st query_fw;/* QUERY_FW Parameters Block */ +/* -------------- */ + pseudo_bit_t reserved28[0x7f800]; +/* -------------- */ + struct arbelprm_query_adapter_st query_adapter;/* QUERY_ADAPTER Parameters Block */ +/* -------------- */ + pseudo_bit_t reserved29[0x7f800]; +/* -------------- */ + struct arbelprm_query_dev_lim_st query_dev_lim;/* Query Device Limitations */ +/* -------------- */ + pseudo_bit_t reserved30[0x7f800]; +/* -------------- */ + struct arbelprm_uar_params_st uar_params;/* UAR Parameters */ +/* -------------- */ + pseudo_bit_t reserved31[0x7ff00]; +/* -------------- */ + struct arbelprm_init_ib_st init_ib; /* INIT_IB Parameters */ +/* -------------- */ + pseudo_bit_t reserved32[0x7f800]; +/* -------------- */ + struct arbelprm_mgm_entry_st mgm_entry;/* Multicast Group Member */ +/* -------------- */ + pseudo_bit_t reserved33[0x7fe00]; +/* -------------- */ + struct arbelprm_set_ib_st set_ib; /* SET_IB Parameters */ +/* -------------- */ + pseudo_bit_t reserved34[0x7fe00]; +/* -------------- */ + struct arbelprm_rd_send_doorbell_st rd_send_doorbell;/* RD-send doorbell */ +/* -------------- */ + pseudo_bit_t reserved35[0x7ff80]; +/* -------------- */ + struct arbelprm_send_doorbell_st send_doorbell;/* Send doorbell */ +/* -------------- */ + pseudo_bit_t reserved36[0x7ffc0]; +/* -------------- */ + struct arbelprm_receive_doorbell_st receive_doorbell;/* Receive doorbell */ +/* -------------- */ + pseudo_bit_t reserved37[0x7ffc0]; +/* -------------- */ + struct arbelprm_cq_cmd_doorbell_st cq_cmd_doorbell;/* CQ Doorbell */ +/* -------------- */ + pseudo_bit_t reserved38[0xfffc0]; +/* -------------- */ + struct arbelprm_uar_st uar; /* User Access Region */ +/* -------------- */ + pseudo_bit_t reserved39[0x7c000]; +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp; /* Multicast Group Member QP */ +/* -------------- */ + pseudo_bit_t reserved40[0x7ffe0]; +/* -------------- */ + struct arbelprm_query_debug_msg_st query_debug_msg;/* Query Debug Message */ +/* -------------- */ + pseudo_bit_t reserved41[0x7f800]; +/* -------------- */ + struct arbelprm_mad_ifc_st mad_ifc; /* MAD_IFC Input Mailbox */ +/* -------------- */ + pseudo_bit_t reserved42[0x00900]; +/* -------------- */ + struct arbelprm_mad_ifc_input_modifier_st mad_ifc_input_modifier;/* MAD_IFC Input Modifier */ +/* -------------- */ + pseudo_bit_t reserved43[0x7e6e0]; +/* -------------- */ + struct arbelprm_resize_cq_st resize_cq;/* Resize CQ Input Mailbox */ +/* -------------- */ + pseudo_bit_t reserved44[0x7fe00]; +/* -------------- */ + struct arbelprm_completion_with_error_st completion_with_error;/* Completion with Error CQE */ +/* -------------- */ + pseudo_bit_t reserved45[0x7ff00]; +/* -------------- */ + struct arbelprm_hcr_completion_event_st hcr_completion_event;/* Event_data Field - HCR Completion Event */ +/* -------------- */ + pseudo_bit_t reserved46[0x7ff40]; +/* -------------- */ + struct arbelprm_transport_and_ci_error_counters_st transport_and_ci_error_counters;/* Transport and CI Error Counters */ +/* -------------- */ + pseudo_bit_t reserved47[0x7f000]; +/* -------------- */ + struct arbelprm_performance_counters_st performance_counters;/* Performance Counters */ +/* -------------- */ + pseudo_bit_t reserved48[0x9ff800]; +/* -------------- */ + struct arbelprm_fast_registration_segment_st fast_registration_segment;/* Fast Registration Segment */ +/* -------------- */ + pseudo_bit_t reserved49[0x7ff00]; +/* -------------- */ + struct arbelprm_pbl_st pbl; /* Physical Buffer List */ +/* -------------- */ + pseudo_bit_t reserved50[0x7ff00]; +/* -------------- */ + struct arbelprm_srq_context_st srq_context;/* SRQ Context */ +/* -------------- */ + pseudo_bit_t reserved51[0x7fe80]; +/* -------------- */ + struct arbelprm_mod_stat_cfg_st mod_stat_cfg;/* MOD_STAT_CFG */ +/* -------------- */ + pseudo_bit_t reserved52[0x7f800]; +/* -------------- */ + struct arbelprm_virtual_physical_mapping_st virtual_physical_mapping;/* Virtual and Physical Mapping */ +/* -------------- */ + pseudo_bit_t reserved53[0x7ff80]; +/* -------------- */ + struct arbelprm_cq_ci_db_record_st cq_ci_db_record;/* CQ_CI_DB_Record */ +/* -------------- */ + pseudo_bit_t reserved54[0x7ffc0]; +/* -------------- */ + struct arbelprm_cq_arm_db_record_st cq_arm_db_record;/* CQ_ARM_DB_Record */ +/* -------------- */ + pseudo_bit_t reserved55[0x7ffc0]; +/* -------------- */ + struct arbelprm_qp_db_record_st qp_db_record;/* QP_DB_Record */ +/* -------------- */ + pseudo_bit_t reserved56[0x1fffc0]; +/* -------------- */ + struct arbelprm_configuration_registers_st configuration_registers;/* InfiniHost III EX Configuration Registers */ +/* -------------- */ + struct arbelprm_eq_set_ci_table_st eq_set_ci_table;/* EQ Set CI DBs Table */ +/* -------------- */ + pseudo_bit_t reserved57[0x01000]; +/* -------------- */ + struct arbelprm_eq_arm_db_region_st eq_arm_db_region;/* EQ Arm Doorbell Region */ +/* -------------- */ + pseudo_bit_t reserved58[0x00fc0]; +/* -------------- */ + struct arbelprm_clr_int_st clr_int; /* Clear Interrupt Register */ +/* -------------- */ + pseudo_bit_t reserved59[0xffcfc0]; +/* -------------- */ +}; +#endif /* H_prefix_arbelprm_bits_fixnames_MT25218_PRM_csp_H */ diff --git a/src/drivers/infiniband/arbel.c b/src/drivers/infiniband/arbel.c new file mode 100644 index 00000000..a57ade30 --- /dev/null +++ b/src/drivers/infiniband/arbel.c @@ -0,0 +1,2129 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * Based in part upon the original driver by Mellanox Technologies + * Ltd. Portions may be Copyright (c) Mellanox Technologies Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "arbel.h" + +/** + * @file + * + * Mellanox Arbel Infiniband HCA + * + */ + +/* Port to use */ +#define PXE_IB_PORT 1 + +/*************************************************************************** + * + * Queue number allocation + * + *************************************************************************** + */ + +/** + * Allocate queue number + * + * @v q_inuse Queue usage bitmask + * @v max_inuse Maximum number of in-use queues + * @ret qn_offset Free queue number offset, or negative error + */ +static int arbel_alloc_qn_offset ( arbel_bitmask_t *q_inuse, + unsigned int max_inuse ) { + unsigned int qn_offset = 0; + arbel_bitmask_t mask = 1; + + while ( qn_offset < max_inuse ) { + if ( ( mask & *q_inuse ) == 0 ) { + *q_inuse |= mask; + return qn_offset; + } + qn_offset++; + mask <<= 1; + if ( ! mask ) { + mask = 1; + q_inuse++; + } + } + return -ENFILE; +} + +/** + * Free queue number + * + * @v q_inuse Queue usage bitmask + * @v qn_offset Queue number offset + */ +static void arbel_free_qn_offset ( arbel_bitmask_t *q_inuse, int qn_offset ) { + arbel_bitmask_t mask; + + mask = ( 1 << ( qn_offset % ( 8 * sizeof ( mask ) ) ) ); + q_inuse += ( qn_offset / ( 8 * sizeof ( mask ) ) ); + *q_inuse &= ~mask; +} + +/*************************************************************************** + * + * HCA commands + * + *************************************************************************** + */ + +/** + * Wait for Arbel command completion + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_cmd_wait ( struct arbel *arbel, + struct arbelprm_hca_command_register *hcr ) { + unsigned int wait; + + for ( wait = ARBEL_HCR_MAX_WAIT_MS ; wait ; wait-- ) { + hcr->u.dwords[6] = + readl ( arbel->config + ARBEL_HCR_REG ( 6 ) ); + if ( MLX_GET ( hcr, go ) == 0 ) + return 0; + mdelay ( 1 ); + } + return -EBUSY; +} + +/** + * Issue HCA command + * + * @v arbel Arbel device + * @v command Command opcode, flags and input/output lengths + * @v op_mod Opcode modifier (0 if no modifier applicable) + * @v in Input parameters + * @v in_mod Input modifier (0 if no modifier applicable) + * @v out Output parameters + * @ret rc Return status code + */ +static int arbel_cmd ( struct arbel *arbel, unsigned long command, + unsigned int op_mod, const void *in, + unsigned int in_mod, void *out ) { + struct arbelprm_hca_command_register hcr; + unsigned int opcode = ARBEL_HCR_OPCODE ( command ); + size_t in_len = ARBEL_HCR_IN_LEN ( command ); + size_t out_len = ARBEL_HCR_OUT_LEN ( command ); + void *in_buffer; + void *out_buffer; + unsigned int status; + unsigned int i; + int rc; + + assert ( in_len <= ARBEL_MBOX_SIZE ); + assert ( out_len <= ARBEL_MBOX_SIZE ); + + DBGC2 ( arbel, "Arbel %p command %02x in %zx%s out %zx%s\n", + arbel, opcode, in_len, + ( ( command & ARBEL_HCR_IN_MBOX ) ? "(mbox)" : "" ), out_len, + ( ( command & ARBEL_HCR_OUT_MBOX ) ? "(mbox)" : "" ) ); + + /* Check that HCR is free */ + if ( ( rc = arbel_cmd_wait ( arbel, &hcr ) ) != 0 ) { + DBGC ( arbel, "Arbel %p command interface locked\n", arbel ); + return rc; + } + + /* Prepare HCR */ + memset ( &hcr, 0, sizeof ( hcr ) ); + in_buffer = &hcr.u.dwords[0]; + if ( in_len && ( command & ARBEL_HCR_IN_MBOX ) ) { + in_buffer = arbel->mailbox_in; + MLX_FILL_1 ( &hcr, 1, in_param_l, virt_to_bus ( in_buffer ) ); + } + memcpy ( in_buffer, in, in_len ); + MLX_FILL_1 ( &hcr, 2, input_modifier, in_mod ); + out_buffer = &hcr.u.dwords[3]; + if ( out_len && ( command & ARBEL_HCR_OUT_MBOX ) ) { + out_buffer = arbel->mailbox_out; + MLX_FILL_1 ( &hcr, 4, out_param_l, + virt_to_bus ( out_buffer ) ); + } + MLX_FILL_3 ( &hcr, 6, + opcode, opcode, + opcode_modifier, op_mod, + go, 1 ); + DBGC2_HD ( arbel, &hcr, sizeof ( hcr ) ); + if ( in_len ) { + DBGC2 ( arbel, "Input:\n" ); + DBGC2_HD ( arbel, in, ( ( in_len < 512 ) ? in_len : 512 ) ); + } + + /* Issue command */ + for ( i = 0 ; i < ( sizeof ( hcr ) / sizeof ( hcr.u.dwords[0] ) ) ; + i++ ) { + writel ( hcr.u.dwords[i], + arbel->config + ARBEL_HCR_REG ( i ) ); + barrier(); + } + + /* Wait for command completion */ + if ( ( rc = arbel_cmd_wait ( arbel, &hcr ) ) != 0 ) { + DBGC ( arbel, "Arbel %p timed out waiting for command:\n", + arbel ); + DBGC_HD ( arbel, &hcr, sizeof ( hcr ) ); + return rc; + } + + /* Check command status */ + status = MLX_GET ( &hcr, status ); + if ( status != 0 ) { + DBGC ( arbel, "Arbel %p command failed with status %02x:\n", + arbel, status ); + DBGC_HD ( arbel, &hcr, sizeof ( hcr ) ); + return -EIO; + } + + /* Read output parameters, if any */ + hcr.u.dwords[3] = readl ( arbel->config + ARBEL_HCR_REG ( 3 ) ); + hcr.u.dwords[4] = readl ( arbel->config + ARBEL_HCR_REG ( 4 ) ); + memcpy ( out, out_buffer, out_len ); + if ( out_len ) { + DBGC2 ( arbel, "Output:\n" ); + DBGC2_HD ( arbel, out, ( ( out_len < 512 ) ? out_len : 512 ) ); + } + + return 0; +} + +static inline int +arbel_cmd_query_dev_lim ( struct arbel *arbel, + struct arbelprm_query_dev_lim *dev_lim ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_DEV_LIM, + 1, sizeof ( *dev_lim ) ), + 0, NULL, 0, dev_lim ); +} + +static inline int +arbel_cmd_query_fw ( struct arbel *arbel, struct arbelprm_query_fw *fw ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_FW, + 1, sizeof ( *fw ) ), + 0, NULL, 0, fw ); +} + +static inline int +arbel_cmd_init_hca ( struct arbel *arbel, + const struct arbelprm_init_hca *init_hca ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT_HCA, + 1, sizeof ( *init_hca ) ), + 0, init_hca, 0, NULL ); +} + +static inline int +arbel_cmd_close_hca ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_CLOSE_HCA ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_init_ib ( struct arbel *arbel, unsigned int port, + const struct arbelprm_init_ib *init_ib ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT_IB, + 1, sizeof ( *init_ib ) ), + 0, init_ib, port, NULL ); +} + +static inline int +arbel_cmd_close_ib ( struct arbel *arbel, unsigned int port ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_CLOSE_IB ), + 0, NULL, port, NULL ); +} + +static inline int +arbel_cmd_sw2hw_mpt ( struct arbel *arbel, unsigned int index, + const struct arbelprm_mpt *mpt ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_MPT, + 1, sizeof ( *mpt ) ), + 0, mpt, index, NULL ); +} + +static inline int +arbel_cmd_sw2hw_eq ( struct arbel *arbel, unsigned int index, + const struct arbelprm_eqc *eqc ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_EQ, + 1, sizeof ( *eqc ) ), + 0, eqc, index, NULL ); +} + +static inline int +arbel_cmd_hw2sw_eq ( struct arbel *arbel, unsigned int index ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_HW2SW_EQ ), + 1, NULL, index, NULL ); +} + +static inline int +arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, + const struct arbelprm_completion_queue_context *cqctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_CQ, + 1, sizeof ( *cqctx ) ), + 0, cqctx, cqn, NULL ); +} + +static inline int +arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn, + struct arbelprm_completion_queue_context *cqctx) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_HW2SW_CQ, + 1, sizeof ( *cqctx ) ), + 0, NULL, cqn, cqctx ); +} + +static inline int +arbel_cmd_rst2init_qpee ( struct arbel *arbel, unsigned long qpn, + const struct arbelprm_qp_ee_state_transitions *ctx ){ + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_RST2INIT_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_init2rtr_qpee ( struct arbel *arbel, unsigned long qpn, + const struct arbelprm_qp_ee_state_transitions *ctx ){ + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT2RTR_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_rtr2rts_qpee ( struct arbel *arbel, unsigned long qpn, + const struct arbelprm_qp_ee_state_transitions *ctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_RTR2RTS_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_2rst_qpee ( struct arbel *arbel, unsigned long qpn ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_2RST_QPEE ), + 0x03, NULL, qpn, NULL ); +} + +static inline int +arbel_cmd_mad_ifc ( struct arbel *arbel, union arbelprm_mad *mad ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_MAD_IFC, + 1, sizeof ( *mad ), + 1, sizeof ( *mad ) ), + 0x03, mad, PXE_IB_PORT, mad ); +} + +static inline int +arbel_cmd_read_mgm ( struct arbel *arbel, unsigned int index, + struct arbelprm_mgm_entry *mgm ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_READ_MGM, + 1, sizeof ( *mgm ) ), + 0, NULL, index, mgm ); +} + +static inline int +arbel_cmd_write_mgm ( struct arbel *arbel, unsigned int index, + const struct arbelprm_mgm_entry *mgm ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_WRITE_MGM, + 1, sizeof ( *mgm ) ), + 0, mgm, index, NULL ); +} + +static inline int +arbel_cmd_mgid_hash ( struct arbel *arbel, const struct ib_gid *gid, + struct arbelprm_mgm_hash *hash ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_MGID_HASH, + 1, sizeof ( *gid ), + 0, sizeof ( *hash ) ), + 0, gid, 0, hash ); +} + +static inline int +arbel_cmd_run_fw ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_RUN_FW ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_disable_lam ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_DISABLE_LAM ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_enable_lam ( struct arbel *arbel, struct arbelprm_access_lam *lam ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_ENABLE_LAM, + 1, sizeof ( *lam ) ), + 1, NULL, 0, lam ); +} + +static inline int +arbel_cmd_unmap_icm ( struct arbel *arbel, unsigned int page_count ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_ICM ), + 0, NULL, page_count, NULL ); +} + +static inline int +arbel_cmd_map_icm ( struct arbel *arbel, + const struct arbelprm_virtual_physical_mapping *map ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_ICM, + 1, sizeof ( *map ) ), + 0, map, 1, NULL ); +} + +static inline int +arbel_cmd_unmap_icm_aux ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_ICM_AUX ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_map_icm_aux ( struct arbel *arbel, + const struct arbelprm_virtual_physical_mapping *map ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_ICM_AUX, + 1, sizeof ( *map ) ), + 0, map, 1, NULL ); +} + +static inline int +arbel_cmd_set_icm_size ( struct arbel *arbel, + const struct arbelprm_scalar_parameter *icm_size, + struct arbelprm_scalar_parameter *icm_aux_size ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_SET_ICM_SIZE, + 0, sizeof ( *icm_size ), + 0, sizeof ( *icm_aux_size ) ), + 0, icm_size, 0, icm_aux_size ); +} + +static inline int +arbel_cmd_unmap_fa ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_FA ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_map_fa ( struct arbel *arbel, + const struct arbelprm_virtual_physical_mapping *map ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_FA, + 1, sizeof ( *map ) ), + 0, map, 1, NULL ); +} + +/*************************************************************************** + * + * Completion queue operations + * + *************************************************************************** + */ + +/** + * Create completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @ret rc Return status code + */ +static int arbel_create_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq; + struct arbelprm_completion_queue_context cqctx; + struct arbelprm_cq_ci_db_record *ci_db_rec; + struct arbelprm_cq_arm_db_record *arm_db_rec; + int cqn_offset; + unsigned int i; + int rc; + + /* Find a free completion queue number */ + cqn_offset = arbel_alloc_qn_offset ( arbel->cq_inuse, ARBEL_MAX_CQS ); + if ( cqn_offset < 0 ) { + DBGC ( arbel, "Arbel %p out of completion queues\n", arbel ); + rc = cqn_offset; + goto err_cqn_offset; + } + cq->cqn = ( arbel->limits.reserved_cqs + cqn_offset ); + + /* Allocate control structures */ + arbel_cq = zalloc ( sizeof ( *arbel_cq ) ); + if ( ! arbel_cq ) { + rc = -ENOMEM; + goto err_arbel_cq; + } + arbel_cq->ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); + arbel_cq->arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); + + /* Allocate completion queue itself */ + arbel_cq->cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); + arbel_cq->cqe = malloc_dma ( arbel_cq->cqe_size, + sizeof ( arbel_cq->cqe[0] ) ); + if ( ! arbel_cq->cqe ) { + rc = -ENOMEM; + goto err_cqe; + } + memset ( arbel_cq->cqe, 0, arbel_cq->cqe_size ); + for ( i = 0 ; i < cq->num_cqes ; i++ ) { + MLX_FILL_1 ( &arbel_cq->cqe[i].normal, 7, owner, 1 ); + } + barrier(); + + /* Initialise doorbell records */ + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; + MLX_FILL_1 ( ci_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( ci_db_rec, 1, + res, ARBEL_UAR_RES_CQ_CI, + cq_number, cq->cqn ); + arm_db_rec = &arbel->db_rec[arbel_cq->arm_doorbell_idx].cq_arm; + MLX_FILL_1 ( arm_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( arm_db_rec, 1, + res, ARBEL_UAR_RES_CQ_ARM, + cq_number, cq->cqn ); + + /* Hand queue over to hardware */ + memset ( &cqctx, 0, sizeof ( cqctx ) ); + MLX_FILL_1 ( &cqctx, 0, st, 0xa /* "Event fired" */ ); + MLX_FILL_1 ( &cqctx, 2, start_address_l, + virt_to_bus ( arbel_cq->cqe ) ); + MLX_FILL_2 ( &cqctx, 3, + usr_page, arbel->limits.reserved_uars, + log_cq_size, fls ( cq->num_cqes - 1 ) ); + MLX_FILL_1 ( &cqctx, 5, c_eqn, ARBEL_NO_EQ ); + MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); + MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); + MLX_FILL_1 ( &cqctx, 12, cqn, cq->cqn ); + MLX_FILL_1 ( &cqctx, 13, + cq_ci_db_record, arbel_cq->ci_doorbell_idx ); + MLX_FILL_1 ( &cqctx, 14, + cq_state_db_record, arbel_cq->arm_doorbell_idx ); + if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { + DBGC ( arbel, "Arbel %p SW2HW_CQ failed: %s\n", + arbel, strerror ( rc ) ); + goto err_sw2hw_cq; + } + + DBGC ( arbel, "Arbel %p CQN %#lx ring at [%p,%p)\n", + arbel, cq->cqn, arbel_cq->cqe, + ( ( ( void * ) arbel_cq->cqe ) + arbel_cq->cqe_size ) ); + cq->dev_priv = arbel_cq; + return 0; + + err_sw2hw_cq: + MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + free_dma ( arbel_cq->cqe, arbel_cq->cqe_size ); + err_cqe: + free ( arbel_cq ); + err_arbel_cq: + arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); + err_cqn_offset: + return rc; +} + +/** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +static void arbel_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq = cq->dev_priv; + struct arbelprm_completion_queue_context cqctx; + struct arbelprm_cq_ci_db_record *ci_db_rec; + struct arbelprm_cq_arm_db_record *arm_db_rec; + int cqn_offset; + int rc; + + /* Take ownership back from hardware */ + if ( ( rc = arbel_cmd_hw2sw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL HW2SW_CQ failed on CQN %#lx: " + "%s\n", arbel, cq->cqn, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + + /* Clear doorbell records */ + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; + arm_db_rec = &arbel->db_rec[arbel_cq->arm_doorbell_idx].cq_arm; + MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + + /* Free memory */ + free_dma ( arbel_cq->cqe, arbel_cq->cqe_size ); + free ( arbel_cq ); + + /* Mark queue number as free */ + cqn_offset = ( cq->cqn - arbel->limits.reserved_cqs ); + arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); + + cq->dev_priv = NULL; +} + +/*************************************************************************** + * + * Queue pair operations + * + *************************************************************************** + */ + +/** + * Create send work queue + * + * @v arbel_send_wq Send work queue + * @v num_wqes Number of work queue entries + * @ret rc Return status code + */ +static int arbel_create_send_wq ( struct arbel_send_work_queue *arbel_send_wq, + unsigned int num_wqes ) { + struct arbelprm_ud_send_wqe *wqe; + struct arbelprm_ud_send_wqe *next_wqe; + unsigned int wqe_idx_mask; + unsigned int i; + + /* Allocate work queue */ + arbel_send_wq->wqe_size = ( num_wqes * + sizeof ( arbel_send_wq->wqe[0] ) ); + arbel_send_wq->wqe = malloc_dma ( arbel_send_wq->wqe_size, + sizeof ( arbel_send_wq->wqe[0] ) ); + if ( ! arbel_send_wq->wqe ) + return -ENOMEM; + memset ( arbel_send_wq->wqe, 0, arbel_send_wq->wqe_size ); + + /* Link work queue entries */ + wqe_idx_mask = ( num_wqes - 1 ); + for ( i = 0 ; i < num_wqes ; i++ ) { + wqe = &arbel_send_wq->wqe[i].ud; + next_wqe = &arbel_send_wq->wqe[ ( i + 1 ) & wqe_idx_mask ].ud; + MLX_FILL_1 ( &wqe->next, 0, nda_31_6, + ( virt_to_bus ( next_wqe ) >> 6 ) ); + } + + return 0; +} + +/** + * Create receive work queue + * + * @v arbel_recv_wq Receive work queue + * @v num_wqes Number of work queue entries + * @ret rc Return status code + */ +static int arbel_create_recv_wq ( struct arbel_recv_work_queue *arbel_recv_wq, + unsigned int num_wqes ) { + struct arbelprm_recv_wqe *wqe; + struct arbelprm_recv_wqe *next_wqe; + unsigned int wqe_idx_mask; + size_t nds; + unsigned int i; + unsigned int j; + + /* Allocate work queue */ + arbel_recv_wq->wqe_size = ( num_wqes * + sizeof ( arbel_recv_wq->wqe[0] ) ); + arbel_recv_wq->wqe = malloc_dma ( arbel_recv_wq->wqe_size, + sizeof ( arbel_recv_wq->wqe[0] ) ); + if ( ! arbel_recv_wq->wqe ) + return -ENOMEM; + memset ( arbel_recv_wq->wqe, 0, arbel_recv_wq->wqe_size ); + + /* Link work queue entries */ + wqe_idx_mask = ( num_wqes - 1 ); + nds = ( ( offsetof ( typeof ( *wqe ), data ) + + sizeof ( wqe->data[0] ) ) >> 4 ); + for ( i = 0 ; i < num_wqes ; i++ ) { + wqe = &arbel_recv_wq->wqe[i].recv; + next_wqe = &arbel_recv_wq->wqe[( i + 1 ) & wqe_idx_mask].recv; + MLX_FILL_1 ( &wqe->next, 0, nda_31_6, + ( virt_to_bus ( next_wqe ) >> 6 ) ); + MLX_FILL_1 ( &wqe->next, 1, nds, ( sizeof ( *wqe ) / 16 ) ); + for ( j = 0 ; ( ( ( void * ) &wqe->data[j] ) < + ( ( void * ) ( wqe + 1 ) ) ) ; j++ ) { + MLX_FILL_1 ( &wqe->data[j], 1, + l_key, ARBEL_INVALID_LKEY ); + } + } + + return 0; +} + +/** + * Create queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @ret rc Return status code + */ +static int arbel_create_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp; + struct arbelprm_qp_ee_state_transitions qpctx; + struct arbelprm_qp_db_record *send_db_rec; + struct arbelprm_qp_db_record *recv_db_rec; + int qpn_offset; + int rc; + + /* Find a free queue pair number */ + qpn_offset = arbel_alloc_qn_offset ( arbel->qp_inuse, ARBEL_MAX_QPS ); + if ( qpn_offset < 0 ) { + DBGC ( arbel, "Arbel %p out of queue pairs\n", arbel ); + rc = qpn_offset; + goto err_qpn_offset; + } + qp->qpn = ( ARBEL_QPN_BASE + arbel->limits.reserved_qps + qpn_offset ); + + /* Allocate control structures */ + arbel_qp = zalloc ( sizeof ( *arbel_qp ) ); + if ( ! arbel_qp ) { + rc = -ENOMEM; + goto err_arbel_qp; + } + arbel_qp->send.doorbell_idx = arbel_send_doorbell_idx ( qpn_offset ); + arbel_qp->recv.doorbell_idx = arbel_recv_doorbell_idx ( qpn_offset ); + + /* Create send and receive work queues */ + if ( ( rc = arbel_create_send_wq ( &arbel_qp->send, + qp->send.num_wqes ) ) != 0 ) + goto err_create_send_wq; + if ( ( rc = arbel_create_recv_wq ( &arbel_qp->recv, + qp->recv.num_wqes ) ) != 0 ) + goto err_create_recv_wq; + + /* Initialise doorbell records */ + send_db_rec = &arbel->db_rec[arbel_qp->send.doorbell_idx].qp; + MLX_FILL_1 ( send_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( send_db_rec, 1, + res, ARBEL_UAR_RES_SQ, + qp_number, qp->qpn ); + recv_db_rec = &arbel->db_rec[arbel_qp->recv.doorbell_idx].qp; + MLX_FILL_1 ( recv_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( recv_db_rec, 1, + res, ARBEL_UAR_RES_RQ, + qp_number, qp->qpn ); + + /* Hand queue over to hardware */ + memset ( &qpctx, 0, sizeof ( qpctx ) ); + MLX_FILL_3 ( &qpctx, 2, + qpc_eec_data.de, 1, + qpc_eec_data.pm_state, 0x03 /* Always 0x03 for UD */, + qpc_eec_data.st, ARBEL_ST_UD ); + MLX_FILL_6 ( &qpctx, 4, + qpc_eec_data.mtu, ARBEL_MTU_2048, + qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */, + qpc_eec_data.log_rq_size, fls ( qp->recv.num_wqes - 1 ), + qpc_eec_data.log_rq_stride, + ( fls ( sizeof ( arbel_qp->recv.wqe[0] ) - 1 ) - 4 ), + qpc_eec_data.log_sq_size, fls ( qp->send.num_wqes - 1 ), + qpc_eec_data.log_sq_stride, + ( fls ( sizeof ( arbel_qp->send.wqe[0] ) - 1 ) - 4 ) ); + MLX_FILL_1 ( &qpctx, 5, + qpc_eec_data.usr_page, arbel->limits.reserved_uars ); + MLX_FILL_1 ( &qpctx, 10, qpc_eec_data.primary_address_path.port_number, + PXE_IB_PORT ); + MLX_FILL_1 ( &qpctx, 27, qpc_eec_data.pd, ARBEL_GLOBAL_PD ); + MLX_FILL_1 ( &qpctx, 29, qpc_eec_data.wqe_lkey, arbel->reserved_lkey ); + MLX_FILL_1 ( &qpctx, 30, qpc_eec_data.ssc, 1 ); + MLX_FILL_1 ( &qpctx, 33, qpc_eec_data.cqn_snd, qp->send.cq->cqn ); + MLX_FILL_1 ( &qpctx, 34, qpc_eec_data.snd_wqe_base_adr_l, + ( virt_to_bus ( arbel_qp->send.wqe ) >> 6 ) ); + MLX_FILL_1 ( &qpctx, 35, qpc_eec_data.snd_db_record_index, + arbel_qp->send.doorbell_idx ); + MLX_FILL_1 ( &qpctx, 38, qpc_eec_data.rsc, 1 ); + MLX_FILL_1 ( &qpctx, 41, qpc_eec_data.cqn_rcv, qp->recv.cq->cqn ); + MLX_FILL_1 ( &qpctx, 42, qpc_eec_data.rcv_wqe_base_adr_l, + ( virt_to_bus ( arbel_qp->recv.wqe ) >> 6 ) ); + MLX_FILL_1 ( &qpctx, 43, qpc_eec_data.rcv_db_record_index, + arbel_qp->recv.doorbell_idx ); + MLX_FILL_1 ( &qpctx, 44, qpc_eec_data.q_key, qp->qkey ); + if ( ( rc = arbel_cmd_rst2init_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ + DBGC ( arbel, "Arbel %p RST2INIT_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_rst2init_qpee; + } + memset ( &qpctx, 0, sizeof ( qpctx ) ); + MLX_FILL_2 ( &qpctx, 4, + qpc_eec_data.mtu, ARBEL_MTU_2048, + qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */ ); + if ( ( rc = arbel_cmd_init2rtr_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ + DBGC ( arbel, "Arbel %p INIT2RTR_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_init2rtr_qpee; + } + memset ( &qpctx, 0, sizeof ( qpctx ) ); + if ( ( rc = arbel_cmd_rtr2rts_qpee ( arbel, qp->qpn, &qpctx ) ) != 0 ){ + DBGC ( arbel, "Arbel %p RTR2RTS_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_rtr2rts_qpee; + } + + DBGC ( arbel, "Arbel %p QPN %#lx send ring at [%p,%p)\n", + arbel, qp->qpn, arbel_qp->send.wqe, + ( ( (void *) arbel_qp->send.wqe ) + arbel_qp->send.wqe_size ) ); + DBGC ( arbel, "Arbel %p QPN %#lx receive ring at [%p,%p)\n", + arbel, qp->qpn, arbel_qp->recv.wqe, + ( ( (void *) arbel_qp->recv.wqe ) + arbel_qp->recv.wqe_size ) ); + qp->dev_priv = arbel_qp; + return 0; + + err_rtr2rts_qpee: + err_init2rtr_qpee: + arbel_cmd_2rst_qpee ( arbel, qp->qpn ); + err_rst2init_qpee: + MLX_FILL_1 ( send_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( recv_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + free_dma ( arbel_qp->recv.wqe, arbel_qp->recv.wqe_size ); + err_create_recv_wq: + free_dma ( arbel_qp->send.wqe, arbel_qp->send.wqe_size ); + err_create_send_wq: + free ( arbel_qp ); + err_arbel_qp: + arbel_free_qn_offset ( arbel->qp_inuse, qpn_offset ); + err_qpn_offset: + return rc; +} + +/** + * Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +static void arbel_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; + struct arbelprm_qp_db_record *send_db_rec; + struct arbelprm_qp_db_record *recv_db_rec; + int qpn_offset; + int rc; + + /* Take ownership back from hardware */ + if ( ( rc = arbel_cmd_2rst_qpee ( arbel, qp->qpn ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL 2RST_QPEE failed on QPN %#lx: " + "%s\n", arbel, qp->qpn, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + + /* Clear doorbell records */ + send_db_rec = &arbel->db_rec[arbel_qp->send.doorbell_idx].qp; + recv_db_rec = &arbel->db_rec[arbel_qp->recv.doorbell_idx].qp; + MLX_FILL_1 ( send_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( recv_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + + /* Free memory */ + free_dma ( arbel_qp->send.wqe, arbel_qp->send.wqe_size ); + free_dma ( arbel_qp->recv.wqe, arbel_qp->recv.wqe_size ); + free ( arbel_qp ); + + /* Mark queue number as free */ + qpn_offset = ( qp->qpn - ARBEL_QPN_BASE - arbel->limits.reserved_qps ); + arbel_free_qn_offset ( arbel->qp_inuse, qpn_offset ); + + qp->dev_priv = NULL; +} + +/*************************************************************************** + * + * Work request operations + * + *************************************************************************** + */ + +/** + * Ring doorbell register in UAR + * + * @v arbel Arbel device + * @v db_reg Doorbell register structure + * @v offset Address of doorbell + */ +static void arbel_ring_doorbell ( struct arbel *arbel, + union arbelprm_doorbell_register *db_reg, + unsigned int offset ) { + + DBGC2 ( arbel, "Arbel %p ringing doorbell %08lx:%08lx at %lx\n", + arbel, db_reg->dword[0], db_reg->dword[1], + virt_to_phys ( arbel->uar + offset ) ); + + barrier(); + writel ( db_reg->dword[0], ( arbel->uar + offset + 0 ) ); + barrier(); + writel ( db_reg->dword[1], ( arbel->uar + offset + 4 ) ); +} + +/** GID used for GID-less send work queue entries */ +static const struct ib_gid arbel_no_gid = { + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 } } +}; + +/** + * Post send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int arbel_post_send ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_address_vector *av, + struct io_buffer *iobuf ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; + struct ib_work_queue *wq = &qp->send; + struct arbel_send_work_queue *arbel_send_wq = &arbel_qp->send; + struct arbelprm_ud_send_wqe *prev_wqe; + struct arbelprm_ud_send_wqe *wqe; + struct arbelprm_qp_db_record *qp_db_rec; + union arbelprm_doorbell_register db_reg; + const struct ib_gid *gid; + unsigned int wqe_idx_mask; + size_t nds; + + /* Allocate work queue entry */ + wqe_idx_mask = ( wq->num_wqes - 1 ); + if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { + DBGC ( arbel, "Arbel %p send queue full", arbel ); + return -ENOBUFS; + } + wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; + prev_wqe = &arbel_send_wq->wqe[(wq->next_idx - 1) & wqe_idx_mask].ud; + wqe = &arbel_send_wq->wqe[wq->next_idx & wqe_idx_mask].ud; + + /* Construct work queue entry */ + MLX_FILL_1 ( &wqe->next, 1, always1, 1 ); + memset ( &wqe->ctrl, 0, sizeof ( wqe->ctrl ) ); + MLX_FILL_1 ( &wqe->ctrl, 0, always1, 1 ); + memset ( &wqe->ud, 0, sizeof ( wqe->ud ) ); + MLX_FILL_2 ( &wqe->ud, 0, + ud_address_vector.pd, ARBEL_GLOBAL_PD, + ud_address_vector.port_number, PXE_IB_PORT ); + MLX_FILL_2 ( &wqe->ud, 1, + ud_address_vector.rlid, av->dlid, + ud_address_vector.g, av->gid_present ); + MLX_FILL_2 ( &wqe->ud, 2, + ud_address_vector.max_stat_rate, + ( ( av->rate >= 3 ) ? 0 : 1 ), + ud_address_vector.msg, 3 ); + MLX_FILL_1 ( &wqe->ud, 3, ud_address_vector.sl, av->sl ); + gid = ( av->gid_present ? &av->gid : &arbel_no_gid ); + memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); + MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); + MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); + MLX_FILL_1 ( &wqe->data[0], 1, l_key, arbel->reserved_lkey ); + MLX_FILL_1 ( &wqe->data[0], 3, + local_address_l, virt_to_bus ( iobuf->data ) ); + + /* Update previous work queue entry's "next" field */ + nds = ( ( offsetof ( typeof ( *wqe ), data ) + + sizeof ( wqe->data[0] ) ) >> 4 ); + MLX_SET ( &prev_wqe->next, nopcode, ARBEL_OPCODE_SEND ); + MLX_FILL_3 ( &prev_wqe->next, 1, + nds, nds, + f, 1, + always1, 1 ); + + /* Update doorbell record */ + barrier(); + qp_db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx].qp; + MLX_FILL_1 ( qp_db_rec, 0, + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); + + /* Ring doorbell register */ + MLX_FILL_4 ( &db_reg.send, 0, + nopcode, ARBEL_OPCODE_SEND, + f, 1, + wqe_counter, ( wq->next_idx & 0xffff ), + wqe_cnt, 1 ); + MLX_FILL_2 ( &db_reg.send, 1, + nds, nds, + qpn, qp->qpn ); + arbel_ring_doorbell ( arbel, &db_reg, ARBEL_DB_POST_SND_OFFSET ); + + /* Update work queue's index */ + wq->next_idx++; + + return 0; +} + +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int arbel_post_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; + struct ib_work_queue *wq = &qp->recv; + struct arbel_recv_work_queue *arbel_recv_wq = &arbel_qp->recv; + struct arbelprm_recv_wqe *wqe; + union arbelprm_doorbell_record *db_rec; + unsigned int wqe_idx_mask; + + /* Allocate work queue entry */ + wqe_idx_mask = ( wq->num_wqes - 1 ); + if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { + DBGC ( arbel, "Arbel %p receive queue full", arbel ); + return -ENOBUFS; + } + wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; + wqe = &arbel_recv_wq->wqe[wq->next_idx & wqe_idx_mask].recv; + + /* Construct work queue entry */ + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_tailroom ( iobuf ) ); + MLX_FILL_1 ( &wqe->data[0], 1, l_key, arbel->reserved_lkey ); + MLX_FILL_1 ( &wqe->data[0], 3, + local_address_l, virt_to_bus ( iobuf->data ) ); + + /* Update doorbell record */ + barrier(); + db_rec = &arbel->db_rec[arbel_recv_wq->doorbell_idx]; + MLX_FILL_1 ( &db_rec->qp, 0, + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); + + /* Update work queue's index */ + wq->next_idx++; + + return 0; +} + +/** + * Handle completion + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v cqe Hardware completion queue entry + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + * @ret rc Return status code + */ +static int arbel_complete ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + union arbelprm_completion_entry *cqe, + ib_completer_t complete_send, + ib_completer_t complete_recv ) { + struct arbel *arbel = ibdev->dev_priv; + struct ib_completion completion; + struct ib_work_queue *wq; + struct ib_queue_pair *qp; + struct arbel_queue_pair *arbel_qp; + struct arbel_send_work_queue *arbel_send_wq; + struct arbel_recv_work_queue *arbel_recv_wq; + struct arbelprm_recv_wqe *recv_wqe; + struct io_buffer *iobuf; + ib_completer_t complete; + unsigned int opcode; + unsigned long qpn; + int is_send; + unsigned long wqe_adr; + unsigned int wqe_idx; + int rc = 0; + + /* Parse completion */ + memset ( &completion, 0, sizeof ( completion ) ); + qpn = MLX_GET ( &cqe->normal, my_qpn ); + is_send = MLX_GET ( &cqe->normal, s ); + wqe_adr = ( MLX_GET ( &cqe->normal, wqe_adr ) << 6 ); + opcode = MLX_GET ( &cqe->normal, opcode ); + if ( opcode >= ARBEL_OPCODE_RECV_ERROR ) { + /* "s" field is not valid for error opcodes */ + is_send = ( opcode == ARBEL_OPCODE_SEND_ERROR ); + completion.syndrome = MLX_GET ( &cqe->error, syndrome ); + DBGC ( arbel, "Arbel %p CPN %lx syndrome %x vendor %lx\n", + arbel, cq->cqn, completion.syndrome, + MLX_GET ( &cqe->error, vendor_code ) ); + rc = -EIO; + /* Don't return immediately; propagate error to completer */ + } + + /* Identify work queue */ + wq = ib_find_wq ( cq, qpn, is_send ); + if ( ! wq ) { + DBGC ( arbel, "Arbel %p CQN %lx unknown %s QPN %lx\n", + arbel, cq->cqn, ( is_send ? "send" : "recv" ), qpn ); + return -EIO; + } + qp = wq->qp; + arbel_qp = qp->dev_priv; + arbel_send_wq = &arbel_qp->send; + arbel_recv_wq = &arbel_qp->recv; + + /* Identify work queue entry index */ + if ( is_send ) { + wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / + sizeof ( arbel_send_wq->wqe[0] ) ); + assert ( wqe_idx < qp->send.num_wqes ); + } else { + wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / + sizeof ( arbel_recv_wq->wqe[0] ) ); + assert ( wqe_idx < qp->recv.num_wqes ); + } + + /* Identify I/O buffer */ + iobuf = wq->iobufs[wqe_idx]; + if ( ! iobuf ) { + DBGC ( arbel, "Arbel %p CQN %lx QPN %lx empty WQE %x\n", + arbel, cq->cqn, qpn, wqe_idx ); + return -EIO; + } + wq->iobufs[wqe_idx] = NULL; + + /* Fill in length for received packets */ + if ( ! is_send ) { + completion.len = MLX_GET ( &cqe->normal, byte_cnt ); + recv_wqe = &arbel_recv_wq->wqe[wqe_idx].recv; + assert ( MLX_GET ( &recv_wqe->data[0], local_address_l ) == + virt_to_bus ( iobuf->data ) ); + assert ( MLX_GET ( &recv_wqe->data[0], byte_count ) == + iob_tailroom ( iobuf ) ); + MLX_FILL_1 ( &recv_wqe->data[0], 0, byte_count, 0 ); + MLX_FILL_1 ( &recv_wqe->data[0], 1, + l_key, ARBEL_INVALID_LKEY ); + if ( completion.len > iob_tailroom ( iobuf ) ) { + DBGC ( arbel, "Arbel %p CQN %lx QPN %lx IDX %x " + "overlength received packet length %zd\n", + arbel, cq->cqn, qpn, wqe_idx, completion.len ); + return -EIO; + } + } + + /* Pass off to caller's completion handler */ + complete = ( is_send ? complete_send : complete_recv ); + complete ( ibdev, qp, &completion, iobuf ); + + return rc; +} + +/** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + */ +static void arbel_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq = cq->dev_priv; + struct arbelprm_cq_ci_db_record *ci_db_rec; + union arbelprm_completion_entry *cqe; + unsigned int cqe_idx_mask; + int rc; + + while ( 1 ) { + /* Look for completion entry */ + cqe_idx_mask = ( cq->num_cqes - 1 ); + cqe = &arbel_cq->cqe[cq->next_idx & cqe_idx_mask]; + if ( MLX_GET ( &cqe->normal, owner ) != 0 ) { + /* Entry still owned by hardware; end of poll */ + break; + } + + /* Handle completion */ + if ( ( rc = arbel_complete ( ibdev, cq, cqe, complete_send, + complete_recv ) ) != 0 ) { + DBGC ( arbel, "Arbel %p failed to complete: %s\n", + arbel, strerror ( rc ) ); + DBGC_HD ( arbel, cqe, sizeof ( *cqe ) ); + } + + /* Return ownership to hardware */ + MLX_FILL_1 ( &cqe->normal, 7, owner, 1 ); + barrier(); + /* Update completion queue's index */ + cq->next_idx++; + /* Update doorbell record */ + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; + MLX_FILL_1 ( ci_db_rec, 0, + counter, ( cq->next_idx & 0xffffffffUL ) ); + } +} + +/*************************************************************************** + * + * Multicast group operations + * + *************************************************************************** + */ + +/** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ +static int arbel_mcast_attach ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_gid *gid ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbelprm_mgm_hash hash; + struct arbelprm_mgm_entry mgm; + unsigned int index; + int rc; + + /* Generate hash table index */ + if ( ( rc = arbel_cmd_mgid_hash ( arbel, gid, &hash ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not hash GID: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + index = MLX_GET ( &hash, hash ); + + /* Check for existing hash table entry */ + if ( ( rc = arbel_cmd_read_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not read MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return rc; + } + if ( MLX_GET ( &mgm, mgmqp_0.qi ) != 0 ) { + /* FIXME: this implementation allows only a single QP + * per multicast group, and doesn't handle hash + * collisions. Sufficient for IPoIB but may need to + * be extended in future. + */ + DBGC ( arbel, "Arbel %p MGID index %#x already in use\n", + arbel, index ); + return -EBUSY; + } + + /* Update hash table entry */ + MLX_FILL_2 ( &mgm, 8, + mgmqp_0.qpn_i, qp->qpn, + mgmqp_0.qi, 1 ); + memcpy ( &mgm.u.dwords[4], gid, sizeof ( *gid ) ); + if ( ( rc = arbel_cmd_write_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not write MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +static void arbel_mcast_detach ( struct ib_device *ibdev, + struct ib_queue_pair *qp __unused, + struct ib_gid *gid ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbelprm_mgm_hash hash; + struct arbelprm_mgm_entry mgm; + unsigned int index; + int rc; + + /* Generate hash table index */ + if ( ( rc = arbel_cmd_mgid_hash ( arbel, gid, &hash ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not hash GID: %s\n", + arbel, strerror ( rc ) ); + return; + } + index = MLX_GET ( &hash, hash ); + + /* Clear hash table entry */ + memset ( &mgm, 0, sizeof ( mgm ) ); + if ( ( rc = arbel_cmd_write_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not write MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return; + } +} + +/** Arbel Infiniband operations */ +static struct ib_device_operations arbel_ib_operations = { + .create_cq = arbel_create_cq, + .destroy_cq = arbel_destroy_cq, + .create_qp = arbel_create_qp, + .destroy_qp = arbel_destroy_qp, + .post_send = arbel_post_send, + .post_recv = arbel_post_recv, + .poll_cq = arbel_poll_cq, + .mcast_attach = arbel_mcast_attach, + .mcast_detach = arbel_mcast_detach, +}; + +/*************************************************************************** + * + * MAD IFC operations + * + *************************************************************************** + */ + +static int arbel_mad_ifc ( struct arbel *arbel, + union arbelprm_mad *mad ) { + struct ib_mad_hdr *hdr = &mad->mad.mad_hdr; + int rc; + + hdr->base_version = IB_MGMT_BASE_VERSION; + if ( ( rc = arbel_cmd_mad_ifc ( arbel, mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not issue MAD IFC: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + if ( hdr->status != 0 ) { + DBGC ( arbel, "Arbel %p MAD IFC status %04x\n", + arbel, ntohs ( hdr->status ) ); + return -EIO; + } + return 0; +} + +static int arbel_get_port_info ( struct arbel *arbel, + struct ib_mad_port_info *port_info ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_PORT_INFO ); + hdr->attr_mod = htonl ( PXE_IB_PORT ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get port info: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( port_info, &mad.mad.port_info, sizeof ( *port_info ) ); + return 0; +} + +static int arbel_get_guid_info ( struct arbel *arbel, + struct ib_mad_guid_info *guid_info ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_GUID_INFO ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get GUID info: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( guid_info, &mad.mad.guid_info, sizeof ( *guid_info ) ); + return 0; +} + +static int arbel_get_pkey_table ( struct arbel *arbel, + struct ib_mad_pkey_table *pkey_table ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get pkey table: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( pkey_table, &mad.mad.pkey_table, sizeof ( *pkey_table ) ); + return 0; +} + +static int arbel_get_port_gid ( struct arbel *arbel, + struct ib_gid *port_gid ) { + union { + /* This union exists just to save stack space */ + struct ib_mad_port_info port_info; + struct ib_mad_guid_info guid_info; + } u; + int rc; + + /* Port info gives us the first half of the port GID */ + if ( ( rc = arbel_get_port_info ( arbel, &u.port_info ) ) != 0 ) + return rc; + memcpy ( &port_gid->u.bytes[0], u.port_info.gid_prefix, 8 ); + + /* GUID info gives us the second half of the port GID */ + if ( ( rc = arbel_get_guid_info ( arbel, &u.guid_info ) ) != 0 ) + return rc; + memcpy ( &port_gid->u.bytes[8], u.guid_info.gid_local, 8 ); + + return 0; +} + +static int arbel_get_sm_lid ( struct arbel *arbel, + unsigned long *sm_lid ) { + struct ib_mad_port_info port_info; + int rc; + + if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + return rc; + *sm_lid = ntohs ( port_info.mastersm_lid ); + return 0; +} + +static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) { + struct ib_mad_pkey_table pkey_table; + int rc; + + if ( ( rc = arbel_get_pkey_table ( arbel, &pkey_table ) ) != 0 ) + return rc; + *pkey = ntohs ( pkey_table.pkey[0][0] ); + return 0; +} + +/** + * Wait for link up + * + * @v arbel Arbel device + * @ret rc Return status code + * + * This function shouldn't really exist. Unfortunately, IB links take + * a long time to come up, and we can't get various key parameters + * e.g. our own IPoIB MAC address without information from the subnet + * manager). We should eventually make link-up an asynchronous event. + */ +static int arbel_wait_for_link ( struct arbel *arbel ) { + struct ib_mad_port_info port_info; + unsigned int retries; + int rc; + + printf ( "Waiting for Infiniband link-up..." ); + for ( retries = 20 ; retries ; retries-- ) { + if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + continue; + if ( ( ( port_info.port_state__link_speed_supported ) & 0xf ) + == 4 ) { + printf ( "ok\n" ); + return 0; + } + printf ( "." ); + sleep ( 1 ); + } + printf ( "failed\n" ); + return -ENODEV; +}; + +/** + * Get MAD parameters + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_get_mad_params ( struct ib_device *ibdev ) { + struct arbel *arbel = ibdev->dev_priv; + int rc; + + /* Get subnet manager LID */ + if ( ( rc = arbel_get_sm_lid ( arbel, &ibdev->sm_lid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine subnet manager " + "LID: %s\n", arbel, strerror ( rc ) ); + return rc; + } + + /* Get port GID */ + if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + /* Get partition key */ + if ( ( rc = arbel_get_pkey ( arbel, &ibdev->pkey ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine partition key: " + "%s\n", arbel, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/*************************************************************************** + * + * Firmware control + * + *************************************************************************** + */ + +/** + * Start firmware running + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_start_firmware ( struct arbel *arbel ) { + struct arbelprm_query_fw fw; + struct arbelprm_access_lam lam; + struct arbelprm_virtual_physical_mapping map_fa; + unsigned int fw_pages; + unsigned int log2_fw_pages; + size_t fw_size; + physaddr_t fw_base; + int rc; + + /* Get firmware parameters */ + if ( ( rc = arbel_cmd_query_fw ( arbel, &fw ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not query firmware: %s\n", + arbel, strerror ( rc ) ); + goto err_query_fw; + } + DBGC ( arbel, "Arbel %p firmware version %ld.%ld.%ld\n", arbel, + MLX_GET ( &fw, fw_rev_major ), MLX_GET ( &fw, fw_rev_minor ), + MLX_GET ( &fw, fw_rev_subminor ) ); + fw_pages = MLX_GET ( &fw, fw_pages ); + log2_fw_pages = fls ( fw_pages - 1 ); + fw_pages = ( 1 << log2_fw_pages ); + DBGC ( arbel, "Arbel %p requires %d kB for firmware\n", + arbel, ( fw_pages * 4 ) ); + + /* Enable locally-attached memory. Ignore failure; there may + * be no attached memory. + */ + arbel_cmd_enable_lam ( arbel, &lam ); + + /* Allocate firmware pages and map firmware area */ + fw_size = ( fw_pages * 4096 ); + arbel->firmware_area = umalloc ( fw_size ); + if ( ! arbel->firmware_area ) { + rc = -ENOMEM; + goto err_alloc_fa; + } + fw_base = ( user_to_phys ( arbel->firmware_area, fw_size ) & + ~( fw_size - 1 ) ); + DBGC ( arbel, "Arbel %p firmware area at physical [%lx,%lx)\n", + arbel, fw_base, ( fw_base + fw_size ) ); + memset ( &map_fa, 0, sizeof ( map_fa ) ); + MLX_FILL_2 ( &map_fa, 3, + log2size, log2_fw_pages, + pa_l, ( fw_base >> 12 ) ); + if ( ( rc = arbel_cmd_map_fa ( arbel, &map_fa ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not map firmware: %s\n", + arbel, strerror ( rc ) ); + goto err_map_fa; + } + + /* Start firmware */ + if ( ( rc = arbel_cmd_run_fw ( arbel ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not run firmware: %s\n", + arbel, strerror ( rc ) ); + goto err_run_fw; + } + + DBGC ( arbel, "Arbel %p firmware started\n", arbel ); + return 0; + + err_run_fw: + arbel_cmd_unmap_fa ( arbel ); + err_map_fa: + ufree ( arbel->firmware_area ); + arbel->firmware_area = UNULL; + err_alloc_fa: + err_query_fw: + return rc; +} + +/** + * Stop firmware running + * + * @v arbel Arbel device + */ +static void arbel_stop_firmware ( struct arbel *arbel ) { + int rc; + + if ( ( rc = arbel_cmd_unmap_fa ( arbel ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL could not stop firmware: %s\n", + arbel, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + ufree ( arbel->firmware_area ); + arbel->firmware_area = UNULL; +} + +/*************************************************************************** + * + * Infinihost Context Memory management + * + *************************************************************************** + */ + +/** + * Get device limits + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_get_limits ( struct arbel *arbel ) { + struct arbelprm_query_dev_lim dev_lim; + int rc; + + if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get device limits: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + arbel->limits.reserved_qps = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); + arbel->limits.qpc_entry_size = MLX_GET ( &dev_lim, qpc_entry_sz ); + arbel->limits.eqpc_entry_size = MLX_GET ( &dev_lim, eqpc_entry_sz ); + arbel->limits.reserved_srqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_srqs ) ); + arbel->limits.srqc_entry_size = MLX_GET ( &dev_lim, srq_entry_sz ); + arbel->limits.reserved_ees = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_ees ) ); + arbel->limits.eec_entry_size = MLX_GET ( &dev_lim, eec_entry_sz ); + arbel->limits.eeec_entry_size = MLX_GET ( &dev_lim, eeec_entry_sz ); + arbel->limits.reserved_cqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); + arbel->limits.cqc_entry_size = MLX_GET ( &dev_lim, cqc_entry_sz ); + arbel->limits.reserved_mtts = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_mtts ) ); + arbel->limits.mtt_entry_size = MLX_GET ( &dev_lim, mtt_entry_sz ); + arbel->limits.reserved_mrws = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_mrws ) ); + arbel->limits.mpt_entry_size = MLX_GET ( &dev_lim, mpt_entry_sz ); + arbel->limits.reserved_rdbs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_rdbs ) ); + arbel->limits.eqc_entry_size = MLX_GET ( &dev_lim, eqc_entry_sz ); + arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); + + return 0; +} + +/** + * Get ICM usage + * + * @v log_num_entries Log2 of the number of entries + * @v entry_size Entry size + * @ret usage Usage size in ICM + */ +static size_t icm_usage ( unsigned int log_num_entries, size_t entry_size ) { + size_t usage; + + usage = ( ( 1 << log_num_entries ) * entry_size ); + usage = ( ( usage + 4095 ) & ~4095 ); + return usage; +} + +/** + * Allocate ICM + * + * @v arbel Arbel device + * @v init_hca INIT_HCA structure to fill in + * @ret rc Return status code + */ +static int arbel_alloc_icm ( struct arbel *arbel, + struct arbelprm_init_hca *init_hca ) { + struct arbelprm_scalar_parameter icm_size; + struct arbelprm_scalar_parameter icm_aux_size; + struct arbelprm_virtual_physical_mapping map_icm_aux; + struct arbelprm_virtual_physical_mapping map_icm; + union arbelprm_doorbell_record *db_rec; + size_t icm_offset = 0; + unsigned int log_num_qps, log_num_srqs, log_num_ees, log_num_cqs; + unsigned int log_num_mtts, log_num_mpts, log_num_rdbs, log_num_eqs; + int rc; + + icm_offset = ( ( arbel->limits.reserved_uars + 1 ) << 12 ); + + /* Queue pair contexts */ + log_num_qps = fls ( arbel->limits.reserved_qps + ARBEL_MAX_QPS - 1 ); + MLX_FILL_2 ( init_hca, 13, + qpc_eec_cqc_eqc_rdb_parameters.qpc_base_addr_l, + ( icm_offset >> 7 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_qp, + log_num_qps ); + DBGC ( arbel, "Arbel %p ICM QPC base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_qps, arbel->limits.qpc_entry_size ); + + /* Extended queue pair contexts */ + MLX_FILL_1 ( init_hca, 25, + qpc_eec_cqc_eqc_rdb_parameters.eqpc_base_addr_l, + icm_offset ); + DBGC ( arbel, "Arbel %p ICM EQPC base = %zx\n", arbel, icm_offset ); + // icm_offset += icm_usage ( log_num_qps, arbel->limits.eqpc_entry_size ); + icm_offset += icm_usage ( log_num_qps, arbel->limits.qpc_entry_size ); + + /* Shared receive queue contexts */ + log_num_srqs = fls ( arbel->limits.reserved_srqs - 1 ); + MLX_FILL_2 ( init_hca, 19, + qpc_eec_cqc_eqc_rdb_parameters.srqc_base_addr_l, + ( icm_offset >> 5 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_srq, + log_num_srqs ); + DBGC ( arbel, "Arbel %p ICM SRQC base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_srqs, arbel->limits.srqc_entry_size ); + + /* End-to-end contexts */ + log_num_ees = fls ( arbel->limits.reserved_ees - 1 ); + MLX_FILL_2 ( init_hca, 17, + qpc_eec_cqc_eqc_rdb_parameters.eec_base_addr_l, + ( icm_offset >> 7 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_ee, + log_num_ees ); + DBGC ( arbel, "Arbel %p ICM EEC base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_ees, arbel->limits.eec_entry_size ); + + /* Extended end-to-end contexts */ + MLX_FILL_1 ( init_hca, 29, + qpc_eec_cqc_eqc_rdb_parameters.eeec_base_addr_l, + icm_offset ); + DBGC ( arbel, "Arbel %p ICM EEEC base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_ees, arbel->limits.eeec_entry_size ); + + /* Completion queue contexts */ + log_num_cqs = fls ( arbel->limits.reserved_cqs + ARBEL_MAX_CQS - 1 ); + MLX_FILL_2 ( init_hca, 21, + qpc_eec_cqc_eqc_rdb_parameters.cqc_base_addr_l, + ( icm_offset >> 6 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_cq, + log_num_cqs ); + DBGC ( arbel, "Arbel %p ICM CQC base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_cqs, arbel->limits.cqc_entry_size ); + + /* Memory translation table */ + log_num_mtts = fls ( arbel->limits.reserved_mtts - 1 ); + MLX_FILL_1 ( init_hca, 65, + tpt_parameters.mtt_base_addr_l, icm_offset ); + DBGC ( arbel, "Arbel %p ICM MTT base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_mtts, arbel->limits.mtt_entry_size ); + + /* Memory protection table */ + log_num_mpts = fls ( arbel->limits.reserved_mrws + 1 - 1 ); + MLX_FILL_1 ( init_hca, 61, + tpt_parameters.mpt_base_adr_l, icm_offset ); + MLX_FILL_1 ( init_hca, 62, + tpt_parameters.log_mpt_sz, log_num_mpts ); + DBGC ( arbel, "Arbel %p ICM MTT base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_mpts, arbel->limits.mpt_entry_size ); + + /* RDMA something or other */ + log_num_rdbs = fls ( arbel->limits.reserved_rdbs - 1 ); + MLX_FILL_1 ( init_hca, 37, + qpc_eec_cqc_eqc_rdb_parameters.rdb_base_addr_l, + icm_offset ); + DBGC ( arbel, "Arbel %p ICM RDB base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_rdbs, 32 ); + + /* Event queue contexts */ + log_num_eqs = 6; + MLX_FILL_2 ( init_hca, 33, + qpc_eec_cqc_eqc_rdb_parameters.eqc_base_addr_l, + ( icm_offset >> 6 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_eq, + log_num_eqs ); + DBGC ( arbel, "Arbel %p ICM EQ base = %zx\n", arbel, icm_offset ); + icm_offset += ( ( 1 << log_num_eqs ) * arbel->limits.eqc_entry_size ); + + /* Multicast table */ + MLX_FILL_1 ( init_hca, 49, + multicast_parameters.mc_base_addr_l, icm_offset ); + MLX_FILL_1 ( init_hca, 52, + multicast_parameters.log_mc_table_entry_sz, + fls ( sizeof ( struct arbelprm_mgm_entry ) - 1 ) ); + MLX_FILL_1 ( init_hca, 53, + multicast_parameters.mc_table_hash_sz, 8 ); + MLX_FILL_1 ( init_hca, 54, + multicast_parameters.log_mc_table_sz, 3 ); + DBGC ( arbel, "Arbel %p ICM MC base = %zx\n", arbel, icm_offset ); + icm_offset += ( 8 * sizeof ( struct arbelprm_mgm_entry ) ); + + arbel->icm_len = icm_offset; + arbel->icm_len = ( ( arbel->icm_len + 4095 ) & ~4095 ); + + /* Get ICM auxiliary area size */ + memset ( &icm_size, 0, sizeof ( icm_size ) ); + MLX_FILL_1 ( &icm_size, 1, value, arbel->icm_len ); + if ( ( rc = arbel_cmd_set_icm_size ( arbel, &icm_size, + &icm_aux_size ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not set ICM size: %s\n", + arbel, strerror ( rc ) ); + goto err_set_icm_size; + } + arbel->icm_aux_len = ( MLX_GET ( &icm_aux_size, value ) * 4096 ); + + /* Allocate ICM data and auxiliary area */ + DBGC ( arbel, "Arbel %p requires %zd kB ICM and %zd kB AUX ICM\n", + arbel, ( arbel->icm_len / 1024 ), + ( arbel->icm_aux_len / 1024 ) ); + arbel->icm = umalloc ( arbel->icm_len + arbel->icm_aux_len ); + if ( ! arbel->icm ) { + rc = -ENOMEM; + goto err_alloc; + } + + /* Map ICM auxiliary area */ + memset ( &map_icm_aux, 0, sizeof ( map_icm_aux ) ); + MLX_FILL_2 ( &map_icm_aux, 3, + log2size, fls ( ( arbel->icm_aux_len / 4096 ) - 1 ), + pa_l, + ( user_to_phys ( arbel->icm, arbel->icm_len ) >> 12 ) ); + if ( ( rc = arbel_cmd_map_icm_aux ( arbel, &map_icm_aux ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not map AUX ICM: %s\n", + arbel, strerror ( rc ) ); + goto err_map_icm_aux; + } + + /* MAP ICM area */ + memset ( &map_icm, 0, sizeof ( map_icm ) ); + MLX_FILL_2 ( &map_icm, 3, + log2size, fls ( ( arbel->icm_len / 4096 ) - 1 ), + pa_l, ( user_to_phys ( arbel->icm, 0 ) >> 12 ) ); + if ( ( rc = arbel_cmd_map_icm ( arbel, &map_icm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not map ICM: %s\n", + arbel, strerror ( rc ) ); + goto err_map_icm; + } + + /* Initialise UAR context */ + arbel->db_rec = phys_to_virt ( user_to_phys ( arbel->icm, 0 ) + + ( arbel->limits.reserved_uars * + ARBEL_PAGE_SIZE ) ); + memset ( arbel->db_rec, 0, ARBEL_PAGE_SIZE ); + db_rec = &arbel->db_rec[ARBEL_GROUP_SEPARATOR_DOORBELL]; + MLX_FILL_1 ( &db_rec->qp, 1, res, ARBEL_UAR_RES_GROUP_SEP ); + + return 0; + + arbel_cmd_unmap_icm ( arbel, ( arbel->icm_len / 4096 ) ); + err_map_icm: + arbel_cmd_unmap_icm_aux ( arbel ); + err_map_icm_aux: + ufree ( arbel->icm ); + arbel->icm = UNULL; + err_alloc: + err_set_icm_size: + return rc; +} + +/** + * Free ICM + * + * @v arbel Arbel device + */ +static void arbel_free_icm ( struct arbel *arbel ) { + arbel_cmd_unmap_icm ( arbel, ( arbel->icm_len / 4096 ) ); + arbel_cmd_unmap_icm_aux ( arbel ); + ufree ( arbel->icm ); + arbel->icm = UNULL; +} + +/*************************************************************************** + * + * Infiniband link-layer operations + * + *************************************************************************** + */ + +/** + * Initialise Infiniband link + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_init_ib ( struct arbel *arbel ) { + struct arbelprm_init_ib init_ib; + int rc; + + memset ( &init_ib, 0, sizeof ( init_ib ) ); + MLX_FILL_3 ( &init_ib, 0, + mtu_cap, ARBEL_MTU_2048, + port_width_cap, 3, + vl_cap, 1 ); + MLX_FILL_1 ( &init_ib, 1, max_gid, 1 ); + MLX_FILL_1 ( &init_ib, 2, max_pkey, 64 ); + if ( ( rc = arbel_cmd_init_ib ( arbel, PXE_IB_PORT, + &init_ib ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not intialise IB: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Close Infiniband link + * + * @v arbel Arbel device + */ +static void arbel_close_ib ( struct arbel *arbel ) { + int rc; + + if ( ( rc = arbel_cmd_close_ib ( arbel, PXE_IB_PORT ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not close IB: %s\n", + arbel, strerror ( rc ) ); + /* Nothing we can do about this */ + } +} + +/*************************************************************************** + * + * PCI interface + * + *************************************************************************** + */ + +/** + * Set up memory protection table + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_setup_mpt ( struct arbel *arbel ) { + struct arbelprm_mpt mpt; + uint32_t key; + int rc; + + /* Derive key */ + key = ( arbel->limits.reserved_mrws | ARBEL_MKEY_PREFIX ); + arbel->reserved_lkey = ( ( key << 8 ) | ( key >> 24 ) ); + + /* Initialise memory protection table */ + memset ( &mpt, 0, sizeof ( mpt ) ); + MLX_FILL_4 ( &mpt, 0, + r_w, 1, + pa, 1, + lr, 1, + lw, 1 ); + MLX_FILL_1 ( &mpt, 2, mem_key, key ); + MLX_FILL_1 ( &mpt, 3, pd, ARBEL_GLOBAL_PD ); + MLX_FILL_1 ( &mpt, 6, reg_wnd_len_h, 0xffffffffUL ); + MLX_FILL_1 ( &mpt, 7, reg_wnd_len_l, 0xffffffffUL ); + if ( ( rc = arbel_cmd_sw2hw_mpt ( arbel, arbel->limits.reserved_mrws, + &mpt ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not set up MPT: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Probe PCI device + * + * @v pci PCI device + * @v id PCI ID + * @ret rc Return status code + */ +static int arbel_probe ( struct pci_device *pci, + const struct pci_device_id *id __unused ) { + struct ib_device *ibdev; + struct arbel *arbel; + struct arbelprm_init_hca init_hca; + int rc; + + /* Allocate Infiniband device */ + ibdev = alloc_ibdev ( sizeof ( *arbel ) ); + if ( ! ibdev ) { + rc = -ENOMEM; + goto err_ibdev; + } + ibdev->op = &arbel_ib_operations; + pci_set_drvdata ( pci, ibdev ); + ibdev->dev = &pci->dev; + arbel = ibdev->dev_priv; + memset ( arbel, 0, sizeof ( *arbel ) ); + + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Get PCI BARs */ + arbel->config = ioremap ( pci_bar_start ( pci, ARBEL_PCI_CONFIG_BAR ), + ARBEL_PCI_CONFIG_BAR_SIZE ); + arbel->uar = ioremap ( ( pci_bar_start ( pci, ARBEL_PCI_UAR_BAR ) + + ARBEL_PCI_UAR_IDX * ARBEL_PCI_UAR_SIZE ), + ARBEL_PCI_UAR_SIZE ); + + /* Allocate space for mailboxes */ + arbel->mailbox_in = malloc_dma ( ARBEL_MBOX_SIZE, ARBEL_MBOX_ALIGN ); + if ( ! arbel->mailbox_in ) { + rc = -ENOMEM; + goto err_mailbox_in; + } + arbel->mailbox_out = malloc_dma ( ARBEL_MBOX_SIZE, ARBEL_MBOX_ALIGN ); + if ( ! arbel->mailbox_out ) { + rc = -ENOMEM; + goto err_mailbox_out; + } + + /* Start firmware */ + if ( ( rc = arbel_start_firmware ( arbel ) ) != 0 ) + goto err_start_firmware; + + /* Get device limits */ + if ( ( rc = arbel_get_limits ( arbel ) ) != 0 ) + goto err_get_limits; + + /* Allocate ICM */ + memset ( &init_hca, 0, sizeof ( init_hca ) ); + if ( ( rc = arbel_alloc_icm ( arbel, &init_hca ) ) != 0 ) + goto err_alloc_icm; + + /* Initialise HCA */ + MLX_FILL_1 ( &init_hca, 74, uar_parameters.log_max_uars, 1 ); + if ( ( rc = arbel_cmd_init_hca ( arbel, &init_hca ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not initialise HCA: %s\n", + arbel, strerror ( rc ) ); + goto err_init_hca; + } + + /* Set up memory protection */ + if ( ( rc = arbel_setup_mpt ( arbel ) ) != 0 ) + goto err_setup_mpt; + + /* Bring up IB layer */ + if ( ( rc = arbel_init_ib ( arbel ) ) != 0 ) + goto err_init_ib; + + /* Wait for link */ + if ( ( rc = arbel_wait_for_link ( arbel ) ) != 0 ) + goto err_wait_for_link; + + /* Get MAD parameters */ + if ( ( rc = arbel_get_mad_params ( ibdev ) ) != 0 ) + goto err_get_mad_params; + + DBGC ( arbel, "Arbel %p port GID is %08lx:%08lx:%08lx:%08lx\n", arbel, + htonl ( ibdev->port_gid.u.dwords[0] ), + htonl ( ibdev->port_gid.u.dwords[1] ), + htonl ( ibdev->port_gid.u.dwords[2] ), + htonl ( ibdev->port_gid.u.dwords[3] ) ); + + /* Add IPoIB device */ + if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", + arbel, strerror ( rc ) ); + goto err_ipoib_probe; + } + + return 0; + + err_ipoib_probe: + err_get_mad_params: + err_wait_for_link: + arbel_close_ib ( arbel ); + err_init_ib: + err_setup_mpt: + arbel_cmd_close_hca ( arbel ); + err_init_hca: + arbel_free_icm ( arbel ); + err_alloc_icm: + err_get_limits: + arbel_stop_firmware ( arbel ); + err_start_firmware: + free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); + err_mailbox_out: + free_dma ( arbel->mailbox_in, ARBEL_MBOX_SIZE ); + err_mailbox_in: + free_ibdev ( ibdev ); + err_ibdev: + return rc; +} + +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void arbel_remove ( struct pci_device *pci ) { + struct ib_device *ibdev = pci_get_drvdata ( pci ); + struct arbel *arbel = ibdev->dev_priv; + + ipoib_remove ( ibdev ); + arbel_close_ib ( arbel ); + arbel_cmd_close_hca ( arbel ); + arbel_free_icm ( arbel ); + arbel_stop_firmware ( arbel ); + arbel_stop_firmware ( arbel ); + free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); + free_dma ( arbel->mailbox_in, ARBEL_MBOX_SIZE ); + free_ibdev ( ibdev ); +} + +static struct pci_device_id arbel_nics[] = { + PCI_ROM ( 0x15b3, 0x6282, "mt25218", "MT25218 HCA driver" ), + PCI_ROM ( 0x15b3, 0x6274, "mt25204", "MT25204 HCA driver" ), +}; + +struct pci_driver arbel_driver __pci_driver = { + .ids = arbel_nics, + .id_count = ( sizeof ( arbel_nics ) / sizeof ( arbel_nics[0] ) ), + .probe = arbel_probe, + .remove = arbel_remove, +}; diff --git a/src/drivers/infiniband/arbel.h b/src/drivers/infiniband/arbel.h new file mode 100644 index 00000000..4d7e4fc6 --- /dev/null +++ b/src/drivers/infiniband/arbel.h @@ -0,0 +1,461 @@ +#ifndef _ARBEL_H +#define _ARBEL_H + +/** @file + * + * Mellanox Arbel Infiniband HCA driver + * + */ + +#include +#include +#include "mlx_bitops.h" +#include "MT25218_PRM.h" + +/* + * Hardware constants + * + */ + +/* PCI BARs */ +#define ARBEL_PCI_CONFIG_BAR PCI_BASE_ADDRESS_0 +#define ARBEL_PCI_CONFIG_BAR_SIZE 0x100000 +#define ARBEL_PCI_UAR_BAR PCI_BASE_ADDRESS_2 +#define ARBEL_PCI_UAR_IDX 1 +#define ARBEL_PCI_UAR_SIZE 0x1000 + +/* UAR context table (UCE) resource types */ +#define ARBEL_UAR_RES_NONE 0x00 +#define ARBEL_UAR_RES_CQ_CI 0x01 +#define ARBEL_UAR_RES_CQ_ARM 0x02 +#define ARBEL_UAR_RES_SQ 0x03 +#define ARBEL_UAR_RES_RQ 0x04 +#define ARBEL_UAR_RES_GROUP_SEP 0x07 + +/* Work queue entry and completion queue entry opcodes */ +#define ARBEL_OPCODE_SEND 0x0a +#define ARBEL_OPCODE_RECV_ERROR 0xfe +#define ARBEL_OPCODE_SEND_ERROR 0xff + +/* HCA command register opcodes */ +#define ARBEL_HCR_QUERY_DEV_LIM 0x0003 +#define ARBEL_HCR_QUERY_FW 0x0004 +#define ARBEL_HCR_INIT_HCA 0x0007 +#define ARBEL_HCR_CLOSE_HCA 0x0008 +#define ARBEL_HCR_INIT_IB 0x0009 +#define ARBEL_HCR_CLOSE_IB 0x000a +#define ARBEL_HCR_SW2HW_MPT 0x000d +#define ARBEL_HCR_MAP_EQ 0x0012 +#define ARBEL_HCR_SW2HW_EQ 0x0013 +#define ARBEL_HCR_HW2SW_EQ 0x0014 +#define ARBEL_HCR_SW2HW_CQ 0x0016 +#define ARBEL_HCR_HW2SW_CQ 0x0017 +#define ARBEL_HCR_RST2INIT_QPEE 0x0019 +#define ARBEL_HCR_INIT2RTR_QPEE 0x001a +#define ARBEL_HCR_RTR2RTS_QPEE 0x001b +#define ARBEL_HCR_2RST_QPEE 0x0021 +#define ARBEL_HCR_MAD_IFC 0x0024 +#define ARBEL_HCR_READ_MGM 0x0025 +#define ARBEL_HCR_WRITE_MGM 0x0026 +#define ARBEL_HCR_MGID_HASH 0x0027 +#define ARBEL_HCR_RUN_FW 0x0ff6 +#define ARBEL_HCR_DISABLE_LAM 0x0ff7 +#define ARBEL_HCR_ENABLE_LAM 0x0ff8 +#define ARBEL_HCR_UNMAP_ICM 0x0ff9 +#define ARBEL_HCR_MAP_ICM 0x0ffa +#define ARBEL_HCR_UNMAP_ICM_AUX 0x0ffb +#define ARBEL_HCR_MAP_ICM_AUX 0x0ffc +#define ARBEL_HCR_SET_ICM_SIZE 0x0ffd +#define ARBEL_HCR_UNMAP_FA 0x0ffe +#define ARBEL_HCR_MAP_FA 0x0fff + +/* Service types */ +#define ARBEL_ST_UD 0x03 + +/* MTUs */ +#define ARBEL_MTU_2048 0x04 + +#define ARBEL_NO_EQ 64 + +#define ARBEL_INVALID_LKEY 0x00000100UL + +#define ARBEL_PAGE_SIZE 4096 + +#define ARBEL_DB_POST_SND_OFFSET 0x10 + +/* + * Datatypes that seem to be missing from the autogenerated documentation + * + */ +struct arbelprm_mgm_hash_st { + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t hash[0x00010]; + pseudo_bit_t reserved1[0x00010]; +} __attribute__ (( packed )); + +struct arbelprm_scalar_parameter_st { + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t value[0x00020]; +} __attribute__ (( packed )); + +/* + * Wrapper structures for hardware datatypes + * + */ + +struct MLX_DECLARE_STRUCT ( arbelprm_access_lam ); +struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_context ); +struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); +struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); +struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_eqc ); +struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); +struct MLX_DECLARE_STRUCT ( arbelprm_init_hca ); +struct MLX_DECLARE_STRUCT ( arbelprm_init_ib ); +struct MLX_DECLARE_STRUCT ( arbelprm_mad_ifc ); +struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry ); +struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); +struct MLX_DECLARE_STRUCT ( arbelprm_mpt ); +struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_qp_ee_state_transitions ); +struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); +struct MLX_DECLARE_STRUCT ( arbelprm_query_fw ); +struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry ); +struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); +struct MLX_DECLARE_STRUCT ( arbelprm_scalar_parameter ); +struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); +struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); +struct MLX_DECLARE_STRUCT ( arbelprm_virtual_physical_mapping ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ctrl_send ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_data_ptr ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_next ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud ); + +/* + * Composite hardware datatypes + * + */ + +#define ARBEL_MAX_GATHER 1 + +struct arbelprm_ud_send_wqe { + struct arbelprm_wqe_segment_next next; + struct arbelprm_wqe_segment_ctrl_send ctrl; + struct arbelprm_wqe_segment_ud ud; + struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_GATHER]; +} __attribute__ (( packed )); + +#define ARBEL_MAX_SCATTER 1 + +struct arbelprm_recv_wqe { + /* The autogenerated header is inconsistent between send and + * receive WQEs. The "ctrl" structure for receive WQEs is + * defined to include the "next" structure. Since the "ctrl" + * part of the "ctrl" structure contains only "reserved, must + * be zero" bits, we ignore its definition and provide + * something more usable. + */ + struct arbelprm_recv_wqe_segment_next next; + uint32_t ctrl[2]; /* All "reserved, must be zero" */ + struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_SCATTER]; +} __attribute__ (( packed )); + +union arbelprm_completion_entry { + struct arbelprm_completion_queue_entry normal; + struct arbelprm_completion_with_error error; +} __attribute__ (( packed )); + +union arbelprm_doorbell_record { + struct arbelprm_cq_arm_db_record cq_arm; + struct arbelprm_cq_ci_db_record cq_ci; + struct arbelprm_qp_db_record qp; +} __attribute__ (( packed )); + +union arbelprm_doorbell_register { + struct arbelprm_send_doorbell send; + uint32_t dword[2]; +} __attribute__ (( packed )); + +union arbelprm_mad { + struct arbelprm_mad_ifc ifc; + union ib_mad mad; +} __attribute__ (( packed )); + +/* + * gPXE-specific definitions + * + */ + +/** Arbel device limits */ +struct arbel_dev_limits { + /** Number of reserved QPs */ + unsigned int reserved_qps; + /** QP context entry size */ + size_t qpc_entry_size; + /** Extended QP context entry size */ + size_t eqpc_entry_size; + /** Number of reserved SRQs */ + unsigned int reserved_srqs; + /** SRQ context entry size */ + size_t srqc_entry_size; + /** Number of reserved EEs */ + unsigned int reserved_ees; + /** EE context entry size */ + size_t eec_entry_size; + /** Extended EE context entry size */ + size_t eeec_entry_size; + /** Number of reserved CQs */ + unsigned int reserved_cqs; + /** CQ context entry size */ + size_t cqc_entry_size; + /** Number of reserved MTTs */ + unsigned int reserved_mtts; + /** MTT entry size */ + size_t mtt_entry_size; + /** Number of reserved MRWs */ + unsigned int reserved_mrws; + /** MPT entry size */ + size_t mpt_entry_size; + /** Number of reserved RDBs */ + unsigned int reserved_rdbs; + /** EQ context entry size */ + size_t eqc_entry_size; + /** Number of reserved UARs */ + unsigned int reserved_uars; +}; + +/** Alignment of Arbel send work queue entries */ +#define ARBEL_SEND_WQE_ALIGN 128 + +/** An Arbel send work queue entry */ +union arbel_send_wqe { + struct arbelprm_ud_send_wqe ud; + uint8_t force_align[ARBEL_SEND_WQE_ALIGN]; +} __attribute__ (( packed )); + +/** An Arbel send work queue */ +struct arbel_send_work_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Work queue entries */ + union arbel_send_wqe *wqe; + /** Size of work queue */ + size_t wqe_size; +}; + +/** Alignment of Arbel receive work queue entries */ +#define ARBEL_RECV_WQE_ALIGN 64 + +/** An Arbel receive work queue entry */ +union arbel_recv_wqe { + struct arbelprm_recv_wqe recv; + uint8_t force_align[ARBEL_RECV_WQE_ALIGN]; +} __attribute__ (( packed )); + +/** An Arbel receive work queue */ +struct arbel_recv_work_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Work queue entries */ + union arbel_recv_wqe *wqe; + /** Size of work queue */ + size_t wqe_size; +}; + +/** Maximum number of allocatable queue pairs + * + * This is a policy decision, not a device limit. + */ +#define ARBEL_MAX_QPS 8 + +/** Base queue pair number */ +#define ARBEL_QPN_BASE 0x550000 + +/** An Arbel queue pair */ +struct arbel_queue_pair { + /** Send work queue */ + struct arbel_send_work_queue send; + /** Receive work queue */ + struct arbel_recv_work_queue recv; +}; + +/** Maximum number of allocatable completion queues + * + * This is a policy decision, not a device limit. + */ +#define ARBEL_MAX_CQS 8 + +/** An Arbel completion queue */ +struct arbel_completion_queue { + /** Consumer counter doorbell record number */ + unsigned int ci_doorbell_idx; + /** Arm queue doorbell record number */ + unsigned int arm_doorbell_idx; + /** Completion queue entries */ + union arbelprm_completion_entry *cqe; + /** Size of completion queue */ + size_t cqe_size; +}; + +/** An Arbel resource bitmask */ +typedef uint32_t arbel_bitmask_t; + +/** Size of an Arbel resource bitmask */ +#define ARBEL_BITMASK_SIZE(max_entries) \ + ( ( (max_entries) + ( 8 * sizeof ( arbel_bitmask_t ) ) - 1 ) / \ + ( 8 * sizeof ( arbel_bitmask_t ) ) ) + +/** An Arbel device */ +struct arbel { + /** PCI configuration registers */ + void *config; + /** PCI user Access Region */ + void *uar; + + /** Command input mailbox */ + void *mailbox_in; + /** Command output mailbox */ + void *mailbox_out; + + /** Firmware area in external memory */ + userptr_t firmware_area; + /** ICM size */ + size_t icm_len; + /** ICM AUX size */ + size_t icm_aux_len; + /** ICM area */ + userptr_t icm; + + /** Doorbell records */ + union arbelprm_doorbell_record *db_rec; + /** Reserved LKey + * + * Used to get unrestricted memory access. + */ + unsigned long reserved_lkey; + + /** Completion queue in-use bitmask */ + arbel_bitmask_t cq_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_CQS ) ]; + /** Queue pair in-use bitmask */ + arbel_bitmask_t qp_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_QPS ) ]; + + /** Device limits */ + struct arbel_dev_limits limits; +}; + +/** Global protection domain */ +#define ARBEL_GLOBAL_PD 0x123456 + +/** Memory key prefix */ +#define ARBEL_MKEY_PREFIX 0x77000000UL + +/* + * HCA commands + * + */ + +#define ARBEL_HCR_BASE 0x80680 +#define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) ) +#define ARBEL_HCR_MAX_WAIT_MS 2000 +#define ARBEL_MBOX_ALIGN 4096 +#define ARBEL_MBOX_SIZE 512 + +/* HCA command is split into + * + * bits 11:0 Opcode + * bit 12 Input uses mailbox + * bit 13 Output uses mailbox + * bits 22:14 Input parameter length (in dwords) + * bits 31:23 Output parameter length (in dwords) + * + * Encoding the information in this way allows us to cut out several + * parameters to the arbel_command() call. + */ +#define ARBEL_HCR_IN_MBOX 0x00001000UL +#define ARBEL_HCR_OUT_MBOX 0x00002000UL +#define ARBEL_HCR_OPCODE( _command ) ( (_command) & 0xfff ) +#define ARBEL_HCR_IN_LEN( _command ) ( ( (_command) >> 12 ) & 0x7fc ) +#define ARBEL_HCR_OUT_LEN( _command ) ( ( (_command) >> 21 ) & 0x7fc ) + +/** Build HCR command from component parts */ +#define ARBEL_HCR_INOUT_CMD( _opcode, _in_mbox, _in_len, \ + _out_mbox, _out_len ) \ + ( (_opcode) | \ + ( (_in_mbox) ? ARBEL_HCR_IN_MBOX : 0 ) | \ + ( ( (_in_len) / 4 ) << 14 ) | \ + ( (_out_mbox) ? ARBEL_HCR_OUT_MBOX : 0 ) | \ + ( ( (_out_len) / 4 ) << 23 ) ) + +#define ARBEL_HCR_IN_CMD( _opcode, _in_mbox, _in_len ) \ + ARBEL_HCR_INOUT_CMD ( _opcode, _in_mbox, _in_len, 0, 0 ) + +#define ARBEL_HCR_OUT_CMD( _opcode, _out_mbox, _out_len ) \ + ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) + +#define ARBEL_HCR_VOID_CMD( _opcode ) \ + ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, 0, 0 ) + +/* + * Doorbell record allocation + * + * The doorbell record map looks like: + * + * ARBEL_MAX_CQS * Arm completion queue doorbell + * ARBEL_MAX_QPS * Send work request doorbell + * Group separator + * ...(empty space)... + * ARBEL_MAX_QPS * Receive work request doorbell + * ARBEL_MAX_CQS * Completion queue consumer counter update doorbell + */ + +#define ARBEL_MAX_DOORBELL_RECORDS 512 +#define ARBEL_GROUP_SEPARATOR_DOORBELL ( ARBEL_MAX_CQS + ARBEL_MAX_QPS ) + +/** + * Get arm completion queue doorbell index + * + * @v cqn_offset Completion queue number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_cq_arm_doorbell_idx ( unsigned int cqn_offset ) { + return cqn_offset; +} + +/** + * Get send work request doorbell index + * + * @v qpn_offset Queue pair number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_send_doorbell_idx ( unsigned int qpn_offset ) { + return ( ARBEL_MAX_CQS + qpn_offset ); +} + +/** + * Get receive work request doorbell index + * + * @v qpn_offset Queue pair number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_recv_doorbell_idx ( unsigned int qpn_offset ) { + return ( ARBEL_MAX_DOORBELL_RECORDS - ARBEL_MAX_CQS - qpn_offset - 1 ); +} + +/** + * Get completion queue consumer counter doorbell index + * + * @v cqn_offset Completion queue number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_cq_ci_doorbell_idx ( unsigned int cqn_offset ) { + return ( ARBEL_MAX_DOORBELL_RECORDS - cqn_offset - 1 ); +} + +#endif /* _ARBEL_H */ diff --git a/src/drivers/infiniband/mlx_bitops.h b/src/drivers/infiniband/mlx_bitops.h new file mode 100644 index 00000000..ec57d7b0 --- /dev/null +++ b/src/drivers/infiniband/mlx_bitops.h @@ -0,0 +1,209 @@ +#ifndef _MLX_BITOPS_H +#define _MLX_BITOPS_H + +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/** + * @file + * + * Mellanox bit operations + * + */ + +/* Datatype used to represent a bit in the Mellanox autogenerated headers */ +typedef unsigned char pseudo_bit_t; + +/** + * Wrapper structure for pseudo_bit_t structures + * + * This structure provides a wrapper around the autogenerated + * pseudo_bit_t structures. It has the correct size, and also + * encapsulates type information about the underlying pseudo_bit_t + * structure, which allows the MLX_FILL etc. macros to work without + * requiring explicit type information. + */ +#define MLX_DECLARE_STRUCT( _structure ) \ + _structure { \ + union { \ + uint8_t bytes[ sizeof ( struct _structure ## _st ) / 8 ]; \ + uint32_t dwords[ sizeof ( struct _structure ## _st ) / 32 ]; \ + struct _structure ## _st *dummy[0]; \ + } u; \ + } + +/** Get pseudo_bit_t structure type from wrapper structure pointer */ +#define MLX_PSEUDO_STRUCT( _ptr ) \ + typeof ( *((_ptr)->u.dummy[0]) ) + +/** Bit offset of a field within a pseudo_bit_t structure */ +#define MLX_BIT_OFFSET( _structure_st, _field ) \ + offsetof ( _structure_st, _field ) + +/** Dword offset of a field within a pseudo_bit_t structure */ +#define MLX_DWORD_OFFSET( _structure_st, _field ) \ + ( MLX_BIT_OFFSET ( _structure_st, _field ) / 32 ) + +/** Dword bit offset of a field within a pseudo_bit_t structure + * + * Yes, using mod-32 would work, but would lose the check for the + * error of specifying a mismatched field name and dword index. + */ +#define MLX_DWORD_BIT_OFFSET( _structure_st, _index, _field ) \ + ( MLX_BIT_OFFSET ( _structure_st, _field ) - ( 32 * (_index) ) ) + +/** Bit width of a field within a pseudo_bit_t structure */ +#define MLX_BIT_WIDTH( _structure_st, _field ) \ + sizeof ( ( ( _structure_st * ) NULL )->_field ) + +/** Bit mask for a field within a pseudo_bit_t structure */ +#define MLX_BIT_MASK( _structure_st, _field ) \ + ( ( ~( ( uint32_t ) 0 ) ) >> \ + ( 32 - MLX_BIT_WIDTH ( _structure_st, _field ) ) ) + +/* + * Assemble native-endian dword from named fields and values + * + */ + +#define MLX_ASSEMBLE_1( _structure_st, _index, _field, _value ) \ + ( (_value) << MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) ) + +#define MLX_ASSEMBLE_2( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_1 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_3( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_2 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_4( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_3 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_5( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_4 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_6( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_5 ( _structure_st, _index, __VA_ARGS__ ) ) + +/* + * Build native-endian (positive) dword bitmasks from named fields + * + */ + +#define MLX_MASK_1( _structure_st, _index, _field ) \ + ( MLX_BIT_MASK ( _structure_st, _field ) << \ + MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) ) + +#define MLX_MASK_2( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_1 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_3( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_2 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_4( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_3 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_5( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_4 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_6( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_5 ( _structure_st, _index, __VA_ARGS__ ) ) + +/* + * Populate big-endian dwords from named fields and values + * + */ + +#define MLX_FILL( _ptr, _index, _assembled ) \ + do { \ + uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ + uint32_t __assembled = (_assembled); \ + *__ptr = cpu_to_be32 ( __assembled ); \ + } while ( 0 ) + +#define MLX_FILL_1( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_2( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_2 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_3( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_3 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_4( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_5( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_5 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_6( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_6 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +/* + * Modify big-endian dword using named field and value + * + */ + +#define MLX_SET( _ptr, _field, _value ) \ + do { \ + unsigned int __index = \ + MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value &= ~( MLX_MASK_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + __index, _field ) ); \ + __value |= MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + __index, _field, _value ); \ + *__ptr = cpu_to_be32 ( __value ); \ + } while ( 0 ) + +/* + * Extract value of named field + * + */ + +#define MLX_GET( _ptr, _field ) \ + ( { \ + unsigned int __index = \ + MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value >>= \ + MLX_DWORD_BIT_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), \ + __index, _field ); \ + __value &= \ + MLX_BIT_MASK ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + __value; \ + } ) + +#endif /* _MLX_BITOPS_H */ diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c new file mode 100644 index 00000000..784c0720 --- /dev/null +++ b/src/drivers/net/ipoib.c @@ -0,0 +1,930 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include "timer.h" +#include +#include +#include +#include +#include + +/** @file + * + * IP over Infiniband + */ + +/** IPoIB MTU */ +#define IPOIB_MTU 2048 + +/** Number of IPoIB data send work queue entries */ +#define IPOIB_DATA_NUM_SEND_WQES 2 + +/** Number of IPoIB data receive work queue entries */ +#define IPOIB_DATA_NUM_RECV_WQES 4 + +/** Number of IPoIB data completion entries */ +#define IPOIB_DATA_NUM_CQES 8 + +/** Number of IPoIB metadata send work queue entries */ +#define IPOIB_META_NUM_SEND_WQES 2 + +/** Number of IPoIB metadata receive work queue entries */ +#define IPOIB_META_NUM_RECV_WQES 2 + +/** Number of IPoIB metadata completion entries */ +#define IPOIB_META_NUM_CQES 8 + +/** An IPoIB queue set */ +struct ipoib_queue_set { + /** Completion queue */ + struct ib_completion_queue *cq; + /** Queue pair */ + struct ib_queue_pair *qp; + /** Receive work queue fill level */ + unsigned int recv_fill; + /** Receive work queue maximum fill level */ + unsigned int recv_max_fill; +}; + +/** An IPoIB device */ +struct ipoib_device { + /** Network device */ + struct net_device *netdev; + /** Underlying Infiniband device */ + struct ib_device *ibdev; + /** Data queue set */ + struct ipoib_queue_set data; + /** Data queue set */ + struct ipoib_queue_set meta; + /** Broadcast GID */ + struct ib_gid broadcast_gid; + /** Broadcast LID */ + unsigned int broadcast_lid; + /** Joined to broadcast group */ + int broadcast_joined; + /** Data queue key */ + unsigned long data_qkey; +}; + +/** + * IPoIB path cache entry + * + * This serves a similar role to the ARP cache for Ethernet. (ARP + * *is* used on IPoIB; we have two caches to maintain.) + */ +struct ipoib_cached_path { + /** Destination GID */ + struct ib_gid gid; + /** Destination LID */ + unsigned int dlid; + /** Service level */ + unsigned int sl; + /** Rate */ + unsigned int rate; +}; + +/** Number of IPoIB path cache entries */ +#define IPOIB_NUM_CACHED_PATHS 2 + +/** IPoIB path cache */ +static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS]; + +/** Oldest IPoIB path cache entry index */ +static unsigned int ipoib_path_cache_idx = 0; + +/** TID half used to identify get path record replies */ +#define IPOIB_TID_GET_PATH_REC 0x11111111UL + +/** TID half used to identify multicast member record replies */ +#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL + +/** IPoIB metadata TID */ +static uint32_t ipoib_meta_tid = 0; + +/** IPv4 broadcast GID */ +static const struct ib_gid ipv4_broadcast_gid = { + { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } } +}; + +/** Maximum time we will wait for the broadcast join to succeed */ +#define IPOIB_JOIN_MAX_DELAY_MS 1000 + +/**************************************************************************** + * + * IPoIB link layer + * + **************************************************************************** + */ + +/** Broadcast QPN used in IPoIB MAC addresses + * + * This is a guaranteed invalid real QPN + */ +#define IPOIB_BROADCAST_QPN 0xffffffffUL + +/** Broadcast IPoIB address */ +static struct ipoib_mac ipoib_broadcast = { + .qpn = ntohl ( IPOIB_BROADCAST_QPN ), +}; + +/** + * Transmit IPoIB packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v ll_dest Link-layer destination address + * + * Prepends the IPoIB link-layer header and transmits the packet. + */ +static int ipoib_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, + const void *ll_dest ) { + struct ipoib_hdr *ipoib_hdr = + iob_push ( iobuf, sizeof ( *ipoib_hdr ) ); + + /* Build IPoIB header */ + memcpy ( &ipoib_hdr->pseudo.peer, ll_dest, + sizeof ( ipoib_hdr->pseudo.peer ) ); + ipoib_hdr->real.proto = net_protocol->net_proto; + ipoib_hdr->real.reserved = 0; + + /* Hand off to network device */ + return netdev_tx ( netdev, iobuf ); +} + +/** + * Process received IPoIB packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * + * Strips off the IPoIB link-layer header and passes up to the + * network-layer protocol. + */ +static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { + struct ipoib_hdr *ipoib_hdr = iobuf->data; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) { + DBG ( "IPoIB packet too short for link-layer header\n" ); + DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + free_iob ( iobuf ); + return -EINVAL; + } + + /* Strip off IPoIB header */ + iob_pull ( iobuf, sizeof ( *ipoib_hdr ) ); + + /* Hand off to network-layer protocol */ + return net_rx ( iobuf, netdev, ipoib_hdr->real.proto, + &ipoib_hdr->pseudo.peer ); +} + +/** + * Transcribe IPoIB address + * + * @v ll_addr Link-layer address + * @ret string Link-layer address in human-readable format + */ +const char * ipoib_ntoa ( const void *ll_addr ) { + static char buf[45]; + const struct ipoib_mac *mac = ll_addr; + + snprintf ( buf, sizeof ( buf ), "%08lx:%08lx:%08lx:%08lx:%08lx", + htonl ( mac->qpn ), htonl ( mac->gid.u.dwords[0] ), + htonl ( mac->gid.u.dwords[1] ), + htonl ( mac->gid.u.dwords[2] ), + htonl ( mac->gid.u.dwords[3] ) ); + return buf; +} + +/** IPoIB protocol */ +struct ll_protocol ipoib_protocol __ll_protocol = { + .name = "IPoIB", + .ll_proto = htons ( ARPHRD_INFINIBAND ), + .ll_addr_len = IPOIB_ALEN, + .ll_header_len = IPOIB_HLEN, + .ll_broadcast = ( uint8_t * ) &ipoib_broadcast, + .tx = ipoib_tx, + .rx = ipoib_rx, + .ntoa = ipoib_ntoa, +}; + +/**************************************************************************** + * + * IPoIB network device + * + **************************************************************************** + */ + +/** + * Destroy queue set + * + * @v ipoib IPoIB device + * @v qset Queue set + */ +static void ipoib_destroy_qset ( struct ipoib_device *ipoib, + struct ipoib_queue_set *qset ) { + struct ib_device *ibdev = ipoib->ibdev; + + if ( qset->qp ) + ib_destroy_qp ( ibdev, qset->qp ); + if ( qset->cq ) + ib_destroy_cq ( ibdev, qset->cq ); + memset ( qset, 0, sizeof ( *qset ) ); +} + +/** + * Create queue set + * + * @v ipoib IPoIB device + * @v qset Queue set + * @ret rc Return status code + */ +static int ipoib_create_qset ( struct ipoib_device *ipoib, + struct ipoib_queue_set *qset, + unsigned int num_cqes, + unsigned int num_send_wqes, + unsigned int num_recv_wqes, + unsigned long qkey ) { + struct ib_device *ibdev = ipoib->ibdev; + int rc; + + /* Store queue parameters */ + qset->recv_max_fill = num_recv_wqes; + + /* Allocate completion queue */ + qset->cq = ib_create_cq ( ibdev, num_cqes ); + if ( ! qset->cq ) { + DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n", + ipoib ); + rc = -ENOMEM; + goto err; + } + + /* Allocate queue pair */ + qset->qp = ib_create_qp ( ibdev, num_send_wqes, qset->cq, + num_recv_wqes, qset->cq, qkey ); + if ( ! qset->qp ) { + DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n", + ipoib ); + rc = -ENOMEM; + goto err; + } + qset->qp->owner_priv = ipoib->netdev; + + return 0; + + err: + ipoib_destroy_qset ( ipoib, qset ); + return rc; +} + +/** + * Find path cache entry by GID + * + * @v gid GID + * @ret entry Path cache entry, or NULL + */ +static struct ipoib_cached_path * +ipoib_find_cached_path ( struct ib_gid *gid ) { + struct ipoib_cached_path *path; + unsigned int i; + + for ( i = 0 ; i < IPOIB_NUM_CACHED_PATHS ; i++ ) { + path = &ipoib_path_cache[i]; + if ( memcmp ( &path->gid, gid, sizeof ( *gid ) ) == 0 ) + return path; + } + DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n", + htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ), + htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ) ); + return NULL; +} + +/** + * Transmit path record request + * + * @v ipoib IPoIB device + * @v gid Destination GID + * @ret rc Return status code + */ +static int ipoib_get_path_record ( struct ipoib_device *ipoib, + struct ib_gid *gid ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + struct ib_mad_path_record *path_record; + struct ib_address_vector av; + int rc; + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( sizeof ( *path_record ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_put ( iobuf, sizeof ( *path_record ) ); + path_record = iobuf->data; + memset ( path_record, 0, sizeof ( *path_record ) ); + + /* Construct path record request */ + path_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + path_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + path_record->mad_hdr.class_version = 2; + path_record->mad_hdr.method = IB_MGMT_METHOD_GET; + path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); + path_record->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC; + path_record->mad_hdr.tid[1] = ipoib_meta_tid++; + path_record->sa_hdr.comp_mask[1] = + htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); + memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) ); + memcpy ( &path_record->sgid, &ibdev->port_gid, + sizeof ( path_record->sgid ) ); + + /* Construct address vector */ + memset ( &av, 0, sizeof ( av ) ); + av.dlid = ibdev->sm_lid; + av.dest_qp = IB_SA_QPN; + av.qkey = IB_GLOBAL_QKEY; + + /* Post send request */ + if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, + iobuf ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n", + ipoib, strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + +/** + * Transmit multicast group membership request + * + * @v ipoib IPoIB device + * @v gid Multicast GID + * @v join Join (rather than leave) group + * @ret rc Return status code + */ +static int ipoib_mc_member_record ( struct ipoib_device *ipoib, + struct ib_gid *gid, int join ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + struct ib_mad_mc_member_record *mc_member_record; + struct ib_address_vector av; + int rc; + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( sizeof ( *mc_member_record ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_put ( iobuf, sizeof ( *mc_member_record ) ); + mc_member_record = iobuf->data; + memset ( mc_member_record, 0, sizeof ( *mc_member_record ) ); + + /* Construct path record request */ + mc_member_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + mc_member_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + mc_member_record->mad_hdr.class_version = 2; + mc_member_record->mad_hdr.method = + ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); + mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); + mc_member_record->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC; + mc_member_record->mad_hdr.tid[1] = ipoib_meta_tid++; + mc_member_record->sa_hdr.comp_mask[1] = + htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE ); + mc_member_record->scope__join_state = 1; + memcpy ( &mc_member_record->mgid, gid, + sizeof ( mc_member_record->mgid ) ); + memcpy ( &mc_member_record->port_gid, &ibdev->port_gid, + sizeof ( mc_member_record->port_gid ) ); + + /* Construct address vector */ + memset ( &av, 0, sizeof ( av ) ); + av.dlid = ibdev->sm_lid; + av.dest_qp = IB_SA_QPN; + av.qkey = IB_GLOBAL_QKEY; + + /* Post send request */ + if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, + iobuf ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n", + ipoib, strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + +/** + * Transmit packet via IPoIB network device + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int ipoib_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; + struct ib_address_vector av; + struct ib_gid *gid; + struct ipoib_cached_path *path; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { + DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib ); + return -EINVAL; + } + iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); + + /* Construct address vector */ + memset ( &av, 0, sizeof ( av ) ); + av.qkey = IB_GLOBAL_QKEY; + av.gid_present = 1; + if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) { + /* Broadcast address */ + av.dest_qp = IB_BROADCAST_QPN; + av.dlid = ipoib->broadcast_lid; + gid = &ipoib->broadcast_gid; + } else { + /* Unicast - look in path cache */ + path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid ); + if ( ! path ) { + /* No path entry - get path record */ + rc = ipoib_get_path_record ( ipoib, + &ipoib_pshdr->peer.gid ); + netdev_tx_complete ( netdev, iobuf ); + return rc; + } + av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn ); + av.dlid = path->dlid; + av.rate = path->rate; + av.sl = path->sl; + gid = &ipoib_pshdr->peer.gid; + } + memcpy ( &av.gid, gid, sizeof ( av.gid ) ); + + return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf ); +} + +/** + * Handle IPoIB data send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_data_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + + netdev_tx_complete_err ( netdev, iobuf, + ( completion->syndrome ? -EIO : 0 ) ); +} + +/** + * Handle IPoIB data receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + struct ipoib_pseudo_hdr *ipoib_pshdr; + + if ( completion->syndrome ) { + netdev_rx_err ( netdev, iobuf, -EIO ); + goto done; + } + + iob_put ( iobuf, completion->len ); + if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) { + DBGC ( ipoib, "IPoIB %p received data packet too short to " + "contain GRH\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + netdev_rx_err ( netdev, iobuf, -EIO ); + goto done; + } + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + + if ( iob_len ( iobuf ) < sizeof ( struct ipoib_real_hdr ) ) { + DBGC ( ipoib, "IPoIB %p received data packet too short to " + "contain IPoIB header\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + netdev_rx_err ( netdev, iobuf, -EIO ); + goto done; + } + + ipoib_pshdr = iob_push ( iobuf, sizeof ( *ipoib_pshdr ) ); + /* FIXME: fill in a MAC address for the sake of AoE! */ + + netdev_rx ( netdev, iobuf ); + + done: + ipoib->data.recv_fill--; +} + +/** + * Handle IPoIB metadata send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + + if ( completion->syndrome ) { + DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n", + ipoib, completion->syndrome ); + } + free_iob ( iobuf ); +} + +/** + * Handle received IPoIB path record + * + * @v ipoib IPoIB device + * @v path_record Path record + */ +static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused, + struct ib_mad_path_record *path_record ) { + struct ipoib_cached_path *path; + + /* Update path cache entry */ + path = &ipoib_path_cache[ipoib_path_cache_idx]; + memcpy ( &path->gid, &path_record->dgid, sizeof ( path->gid ) ); + path->dlid = ntohs ( path_record->dlid ); + path->sl = ( path_record->reserved__sl & 0x0f ); + path->rate = ( path_record->rate_selector__rate & 0x3f ); + + DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n", + htonl ( path->gid.u.dwords[0] ), htonl ( path->gid.u.dwords[1] ), + htonl ( path->gid.u.dwords[2] ), htonl ( path->gid.u.dwords[3] ), + path->dlid, path->sl, path->rate ); + + /* Update path cache index */ + ipoib_path_cache_idx++; + if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS ) + ipoib_path_cache_idx = 0; +} + +/** + * Handle received IPoIB multicast membership record + * + * @v ipoib IPoIB device + * @v mc_member_record Multicast membership record + */ +static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib, + struct ib_mad_mc_member_record *mc_member_record ) { + /* Record parameters */ + ipoib->broadcast_joined = + ( mc_member_record->scope__join_state & 0x0f ); + ipoib->data_qkey = ntohl ( mc_member_record->qkey ); + ipoib->broadcast_lid = ntohs ( mc_member_record->mlid ); + DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n", + ipoib, ( ipoib->broadcast_joined ? "joined" : "left" ), + ipoib->data_qkey, ipoib->broadcast_lid ); +} + +/** + * Handle IPoIB metadata receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + union ib_mad *mad; + + if ( completion->syndrome ) { + DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n", + ipoib, completion->syndrome ); + goto done; + } + + iob_put ( iobuf, completion->len ); + if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) { + DBGC ( ipoib, "IPoIB %p received metadata packet too short " + "to contain GRH\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + goto done; + } + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + if ( iob_len ( iobuf ) < sizeof ( *mad ) ) { + DBGC ( ipoib, "IPoIB %p received metadata packet too short " + "to contain reply\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + goto done; + } + mad = iobuf->data; + + if ( mad->mad_hdr.status != 0 ) { + DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n", + ipoib, ntohs ( mad->mad_hdr.status ) ); + goto done; + } + + switch ( mad->mad_hdr.tid[0] ) { + case IPOIB_TID_GET_PATH_REC: + ipoib_recv_path_record ( ipoib, &mad->path_record ); + break; + case IPOIB_TID_MC_MEMBER_REC: + ipoib_recv_mc_member_record ( ipoib, &mad->mc_member_record ); + break; + default: + DBGC ( ipoib, "IPoIB %p unwanted response:\n", + ipoib ); + DBGC_HD ( ipoib, mad, sizeof ( *mad ) ); + break; + } + + done: + ipoib->meta.recv_fill--; + free_iob ( iobuf ); +} + +/** + * Refill IPoIB receive ring + * + * @v ipoib IPoIB device + */ +static void ipoib_refill_recv ( struct ipoib_device *ipoib, + struct ipoib_queue_set *qset ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + int rc; + + while ( qset->recv_fill < qset->recv_max_fill ) { + iobuf = alloc_iob ( IPOIB_MTU ); + if ( ! iobuf ) + break; + if ( ( rc = ib_post_recv ( ibdev, qset->qp, iobuf ) ) != 0 ) { + free_iob ( iobuf ); + break; + } + qset->recv_fill++; + } +} + +/** + * Poll IPoIB network device + * + * @v netdev Network device + */ +static void ipoib_poll ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + + ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send, + ipoib_meta_complete_recv ); + ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send, + ipoib_data_complete_recv ); + ipoib_refill_recv ( ipoib, &ipoib->meta ); + ipoib_refill_recv ( ipoib, &ipoib->data ); +} + +/** + * Enable/disable interrupts on IPoIB network device + * + * @v netdev Network device + * @v enable Interrupts should be enabled + */ +static void ipoib_irq ( struct net_device *netdev __unused, + int enable __unused ) { + /* No implementation */ +} + +/** + * Open IPoIB network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int ipoib_open ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + int rc; + + /* Attach to broadcast multicast GID */ + if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp, + &ipoib->broadcast_gid ) ) != 0 ) { + DBG ( "Could not attach to broadcast GID: %s\n", + strerror ( rc ) ); + return rc; + } + + /* Fill receive rings */ + ipoib_refill_recv ( ipoib, &ipoib->meta ); + ipoib_refill_recv ( ipoib, &ipoib->data ); + + return 0; +} + +/** + * Close IPoIB network device + * + * @v netdev Network device + */ +static void ipoib_close ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + + /* Detach from broadcast multicast GID */ + ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib->broadcast_gid ); + + /* FIXME: should probably flush the receive ring */ +} + +/** IPoIB network device operations */ +static struct net_device_operations ipoib_operations = { + .open = ipoib_open, + .close = ipoib_close, + .transmit = ipoib_transmit, + .poll = ipoib_poll, + .irq = ipoib_irq, +}; + +/** + * Join IPoIB broadcast group + * + * @v ipoib IPoIB device + * @ret rc Return status code + */ +static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { + struct ib_device *ibdev = ipoib->ibdev; + unsigned int delay_ms; + int rc; + + /* Make sure we have some receive descriptors */ + ipoib_refill_recv ( ipoib, &ipoib->meta ); + + /* Send join request */ + if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid, + 1 ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n", + ipoib, strerror ( rc ) ); + return rc; + } + + /* Wait for join to complete. Ideally we wouldn't delay for + * this long, but we need the queue key before we can set up + * the data queue pair, which we need before we can know the + * MAC address. + */ + for ( delay_ms = IPOIB_JOIN_MAX_DELAY_MS ; delay_ms ; delay_ms-- ) { + mdelay ( 1 ); + ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send, + ipoib_meta_complete_recv ); + ipoib_refill_recv ( ipoib, &ipoib->meta ); + if ( ipoib->broadcast_joined ) + return 0; + } + DBGC ( ipoib, "IPoIB %p timed out waiting for broadcast join\n", + ipoib ); + + return -ETIMEDOUT; +} + +/** + * Probe IPoIB device + * + * @v ibdev Infiniband device + * @ret rc Return status code + */ +int ipoib_probe ( struct ib_device *ibdev ) { + struct net_device *netdev; + struct ipoib_device *ipoib; + struct ipoib_mac *mac; + int rc; + + /* Allocate network device */ + netdev = alloc_ipoibdev ( sizeof ( *ipoib ) ); + if ( ! netdev ) + return -ENOMEM; + netdev_init ( netdev, &ipoib_operations ); + ipoib = netdev->priv; + ib_set_ownerdata ( ibdev, netdev ); + netdev->dev = ibdev->dev; + memset ( ipoib, 0, sizeof ( *ipoib ) ); + ipoib->netdev = netdev; + ipoib->ibdev = ibdev; + + /* Calculate broadcast GID */ + memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid, + sizeof ( ipoib->broadcast_gid ) ); + ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey ); + + /* Allocate metadata queue set */ + if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta, + IPOIB_META_NUM_CQES, + IPOIB_META_NUM_SEND_WQES, + IPOIB_META_NUM_RECV_WQES, + IB_GLOBAL_QKEY ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n", + ipoib, strerror ( rc ) ); + goto err_create_meta_qset; + } + + /* Join broadcast group */ + if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n", + ipoib, strerror ( rc ) ); + goto err_join_broadcast_group; + } + + /* Allocate data queue set */ + if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data, + IPOIB_DATA_NUM_CQES, + IPOIB_DATA_NUM_SEND_WQES, + IPOIB_DATA_NUM_RECV_WQES, + ipoib->data_qkey ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n", + ipoib, strerror ( rc ) ); + goto err_create_data_qset; + } + + /* Construct MAC address */ + mac = ( ( struct ipoib_mac * ) netdev->ll_addr ); + mac->qpn = htonl ( ipoib->data.qp->qpn ); + memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) ); + + /* Register network device */ + if ( ( rc = register_netdev ( netdev ) ) != 0 ) + goto err_register_netdev; + + return 0; + + err_register_netdev: + ipoib_destroy_qset ( ipoib, &ipoib->data ); + err_join_broadcast_group: + err_create_data_qset: + ipoib_destroy_qset ( ipoib, &ipoib->meta ); + err_create_meta_qset: + netdev_nullify ( netdev ); + netdev_put ( netdev ); + return rc; +} + +/** + * Remove IPoIB device + * + * @v ibdev Infiniband device + */ +void ipoib_remove ( struct ib_device *ibdev ) { + struct net_device *netdev = ib_get_ownerdata ( ibdev ); + struct ipoib_device *ipoib = netdev->priv; + + unregister_netdev ( netdev ); + ipoib_destroy_qset ( ipoib, &ipoib->data ); + ipoib_destroy_qset ( ipoib, &ipoib->meta ); + netdev_nullify ( netdev ); + netdev_put ( netdev ); +} diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h new file mode 100644 index 00000000..06745ba9 --- /dev/null +++ b/src/include/gpxe/infiniband.h @@ -0,0 +1,578 @@ +#ifndef _GPXE_INFINIBAND_H +#define _GPXE_INFINIBAND_H + +/** @file + * + * Infiniband protocol + * + */ + +#include +#include + +/** Subnet administrator QPN */ +#define IB_SA_QPN 1 + +/** Broadcast QPN */ +#define IB_BROADCAST_QPN 0xffffffUL + +/** Subnet administrator queue key */ +#define IB_GLOBAL_QKEY 0x80010000UL + +/** An Infiniband Global Identifier */ +struct ib_gid { + union { + uint8_t bytes[16]; + uint16_t words[8]; + uint32_t dwords[4]; + } u; +}; + +/** An Infiniband Global Route Header */ +struct ib_global_route_header { + /** IP version, traffic class, and flow label + * + * 4 bits : Version of the GRH + * 8 bits : Traffic class + * 20 bits : Flow label + */ + uint32_t ipver_tclass_flowlabel; + /** Payload length */ + uint16_t paylen; + /** Next header */ + uint8_t nxthdr; + /** Hop limit */ + uint8_t hoplmt; + /** Source GID */ + struct ib_gid sgid; + /** Destiniation GID */ + struct ib_gid dgid; +} __attribute__ (( packed )); + +struct ib_device; +struct ib_queue_pair; +struct ib_completion_queue; + +/** An Infiniband Work Queue */ +struct ib_work_queue { + /** Containing queue pair */ + struct ib_queue_pair *qp; + /** "Is a send queue" flag */ + int is_send; + /** Associated completion queue */ + struct ib_completion_queue *cq; + /** List of work queues on this completion queue */ + struct list_head list; + /** Number of work queue entries */ + unsigned int num_wqes; + /** Next work queue entry index + * + * This is the index of the next entry to be filled (i.e. the + * first empty entry). This value is not bounded by num_wqes; + * users must logical-AND with (num_wqes-1) to generate an + * array index. + */ + unsigned long next_idx; + /** I/O buffers assigned to work queue */ + struct io_buffer **iobufs; + /** Device private data */ + void *dev_priv; +}; + +/** An Infiniband Queue Pair */ +struct ib_queue_pair { + /** Queue Pair Number */ + unsigned long qpn; + /** Queue key */ + unsigned long qkey; + /** Send queue */ + struct ib_work_queue send; + /** Receive queue */ + struct ib_work_queue recv; + /** Device private data */ + void *dev_priv; + /** Queue owner private data */ + void *owner_priv; +}; + +/** An Infiniband Completion Queue */ +struct ib_completion_queue { + /** Completion queue number */ + unsigned long cqn; + /** Number of completion queue entries */ + unsigned int num_cqes; + /** Next completion queue entry index + * + * This is the index of the next entry to be filled (i.e. the + * first empty entry). This value is not bounded by num_wqes; + * users must logical-AND with (num_wqes-1) to generate an + * array index. + */ + unsigned long next_idx; + /** List of work queues completing to this queue */ + struct list_head work_queues; + /** Device private data */ + void *dev_priv; +}; + +/** An Infiniband completion */ +struct ib_completion { + /** Syndrome + * + * If non-zero, then the completion is in error. + */ + unsigned int syndrome; + /** Length */ + size_t len; +}; + +/** An Infiniband completion handler + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +typedef void ( * ib_completer_t ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ); + +/** An Infiniband Address Vector */ +struct ib_address_vector { + /** Destination Queue Pair */ + unsigned int dest_qp; + /** Queue key */ + unsigned long qkey; + /** Destination Local ID */ + unsigned int dlid; + /** Rate */ + unsigned int rate; + /** Service level */ + unsigned int sl; + /** GID is present */ + unsigned int gid_present; + /** GID */ + struct ib_gid gid; +}; + +/** + * Infiniband device operations + * + * These represent a subset of the Infiniband Verbs. + */ +struct ib_device_operations { + /** Create completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @ret rc Return status code + */ + int ( * create_cq ) ( struct ib_device *ibdev, + struct ib_completion_queue *cq ); + /** Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ + void ( * destroy_cq ) ( struct ib_device *ibdev, + struct ib_completion_queue *cq ); + /** Create queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @ret rc Return status code + */ + int ( * create_qp ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp ); + /** Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ + void ( * destroy_qp ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp ); + /** Post send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @ret rc Return status code + * + * If this method returns success, the I/O buffer remains + * owned by the queue pair. If this method returns failure, + * the I/O buffer is immediately released; the failure is + * interpreted as "failure to enqueue buffer". + */ + int ( * post_send ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_address_vector *av, + struct io_buffer *iobuf ); + /** Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + * + * If this method returns success, the I/O buffer remains + * owned by the queue pair. If this method returns failure, + * the I/O buffer is immediately released; the failure is + * interpreted as "failure to enqueue buffer". + */ + int ( * post_recv ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ); + /** Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + * + * The completion handler takes ownership of the I/O buffer. + */ + void ( * poll_cq ) ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ); + /** Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ + int ( * mcast_attach ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_gid *gid ); + /** Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ + void ( * mcast_detach ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_gid *gid ); +}; + +/** An Infiniband device */ +struct ib_device { + /** Port GID */ + struct ib_gid port_gid; + /** Subnet manager LID */ + unsigned long sm_lid; + /** Partition key */ + unsigned int pkey; + /** Underlying device */ + struct device *dev; + /** Infiniband operations */ + struct ib_device_operations *op; + /** Device private data */ + void *dev_priv; + /** Owner private data */ + void *owner_priv; +}; + +extern struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, + unsigned int num_cqes ); +extern void ib_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ); +extern struct ib_queue_pair * +ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes, + struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, + struct ib_completion_queue *recv_cq, unsigned long qkey ); +extern void ib_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ); +extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ); +extern struct ib_device * alloc_ibdev ( size_t priv_size ); +extern void free_ibdev ( struct ib_device *ibdev ); + +/** + * Post send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_address_vector *av, struct io_buffer *iobuf ) { + return ibdev->op->post_send ( ibdev, qp, av, iobuf ); +} + +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + return ibdev->op->post_recv ( ibdev, qp, iobuf ); +} + +/** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + */ +static inline __attribute__ (( always_inline )) void +ib_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, + ib_completer_t complete_send, ib_completer_t complete_recv ) { + ibdev->op->poll_cq ( ibdev, cq, complete_send, complete_recv ); +} + + +/** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + return ibdev->op->mcast_attach ( ibdev, qp, gid ); +} + +/** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +static inline __attribute__ (( always_inline )) void +ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + ibdev->op->mcast_detach ( ibdev, qp, gid ); +} + +/** + * Set Infiniband owner-private data + * + * @v pci Infiniband device + * @v priv Private data + */ +static inline void ib_set_ownerdata ( struct ib_device *ibdev, + void *owner_priv ) { + ibdev->owner_priv = owner_priv; +} + +/** + * Get Infiniband owner-private data + * + * @v pci Infiniband device + * @ret priv Private data + */ +static inline void * ib_get_ownerdata ( struct ib_device *ibdev ) { + return ibdev->owner_priv; +} + +/***************************************************************************** + * + * Management datagrams + * + * Portions Copyright (c) 2004 Mellanox Technologies Ltd. All rights + * reserved. + * + */ + +/* Management base version */ +#define IB_MGMT_BASE_VERSION 1 + +/* Management classes */ +#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 +#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 +#define IB_MGMT_CLASS_SUBN_ADM 0x03 +#define IB_MGMT_CLASS_PERF_MGMT 0x04 +#define IB_MGMT_CLASS_BM 0x05 +#define IB_MGMT_CLASS_DEVICE_MGMT 0x06 +#define IB_MGMT_CLASS_CM 0x07 +#define IB_MGMT_CLASS_SNMP 0x08 +#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 +#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F + +/* Management methods */ +#define IB_MGMT_METHOD_GET 0x01 +#define IB_MGMT_METHOD_SET 0x02 +#define IB_MGMT_METHOD_GET_RESP 0x81 +#define IB_MGMT_METHOD_SEND 0x03 +#define IB_MGMT_METHOD_TRAP 0x05 +#define IB_MGMT_METHOD_REPORT 0x06 +#define IB_MGMT_METHOD_REPORT_RESP 0x86 +#define IB_MGMT_METHOD_TRAP_REPRESS 0x07 +#define IB_MGMT_METHOD_DELETE 0x15 +#define IB_MGMT_METHOD_RESP 0x80 + +/* Subnet management attributes */ +#define IB_SMP_ATTR_NOTICE 0x0002 +#define IB_SMP_ATTR_NODE_DESC 0x0010 +#define IB_SMP_ATTR_NODE_INFO 0x0011 +#define IB_SMP_ATTR_SWITCH_INFO 0x0012 +#define IB_SMP_ATTR_GUID_INFO 0x0014 +#define IB_SMP_ATTR_PORT_INFO 0x0015 +#define IB_SMP_ATTR_PKEY_TABLE 0x0016 +#define IB_SMP_ATTR_SL_TO_VL_TABLE 0x0017 +#define IB_SMP_ATTR_VL_ARB_TABLE 0x0018 +#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE 0x0019 +#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE 0x001A +#define IB_SMP_ATTR_MCAST_FORWARD_TABLE 0x001B +#define IB_SMP_ATTR_SM_INFO 0x0020 +#define IB_SMP_ATTR_VENDOR_DIAG 0x0030 +#define IB_SMP_ATTR_LED_INFO 0x0031 +#define IB_SMP_ATTR_VENDOR_MASK 0xFF00 + +#define IB_SA_ATTR_MC_MEMBER_REC 0x38 +#define IB_SA_ATTR_PATH_REC 0x35 + +#define IB_SA_MCMEMBER_REC_MGID (1<<0) +#define IB_SA_MCMEMBER_REC_PORT_GID (1<<1) +#define IB_SA_MCMEMBER_REC_QKEY (1<<2) +#define IB_SA_MCMEMBER_REC_MLID (1<<3) +#define IB_SA_MCMEMBER_REC_MTU_SELECTOR (1<<4) +#define IB_SA_MCMEMBER_REC_MTU (1<<5) +#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS (1<<6) +#define IB_SA_MCMEMBER_REC_PKEY (1<<7) +#define IB_SA_MCMEMBER_REC_RATE_SELECTOR (1<<8) +#define IB_SA_MCMEMBER_REC_RATE (1<<9) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR (1<<10) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME (1<<11) +#define IB_SA_MCMEMBER_REC_SL (1<<12) +#define IB_SA_MCMEMBER_REC_FLOW_LABEL (1<<13) +#define IB_SA_MCMEMBER_REC_HOP_LIMIT (1<<14) +#define IB_SA_MCMEMBER_REC_SCOPE (1<<15) +#define IB_SA_MCMEMBER_REC_JOIN_STATE (1<<16) +#define IB_SA_MCMEMBER_REC_PROXY_JOIN (1<<17) + +#define IB_SA_PATH_REC_DGID (1<<2) +#define IB_SA_PATH_REC_SGID (1<<3) + +struct ib_mad_hdr { + uint8_t base_version; + uint8_t mgmt_class; + uint8_t class_version; + uint8_t method; + uint16_t status; + uint16_t class_specific; + uint32_t tid[2]; + uint16_t attr_id; + uint16_t resv; + uint32_t attr_mod; +} __attribute__ (( packed )); + +struct ib_sa_hdr { + uint32_t sm_key[2]; + uint16_t reserved; + uint16_t attrib_offset; + uint32_t comp_mask[2]; +} __attribute__ (( packed )); + +struct ib_rmpp_hdr { + uint32_t raw[3]; +} __attribute__ (( packed )); + +struct ib_mad_data { + struct ib_mad_hdr mad_hdr; + uint8_t data[232]; +} __attribute__ (( packed )); + +struct ib_mad_guid_info { + struct ib_mad_hdr mad_hdr; + uint32_t mkey[2]; + uint32_t reserved[8]; + uint8_t gid_local[8]; +} __attribute__ (( packed )); + +struct ib_mad_port_info { + struct ib_mad_hdr mad_hdr; + uint32_t mkey[2]; + uint32_t reserved[8]; + uint32_t mkey2[2]; + uint8_t gid_prefix[8]; + uint16_t lid; + uint16_t mastersm_lid; + uint32_t cap_mask; + uint16_t diag_code; + uint16_t mkey_lease_period; + uint8_t local_port_num; + uint8_t link_width_enabled; + uint8_t link_width_supported; + uint8_t link_width_active; + uint8_t port_state__link_speed_supported; + uint8_t link_down_def_state__port_phys_state; + uint8_t lmc__r1__mkey_prot_bits; + uint8_t link_speed_enabled__link_speed_active; +} __attribute__ (( packed )); + +struct ib_mad_pkey_table { + struct ib_mad_hdr mad_hdr; + uint32_t mkey[2]; + uint32_t reserved[8]; + uint16_t pkey[16][2]; +} __attribute__ (( packed )); + +struct ib_mad_path_record { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + uint32_t reserved0[2]; + struct ib_gid dgid; + struct ib_gid sgid; + uint16_t dlid; + uint16_t slid; + uint32_t hop_limit__flow_label__raw_traffic; + uint32_t pkey__numb_path__reversible__tclass; + uint8_t reserved1; + uint8_t reserved__sl; + uint8_t mtu_selector__mtu; + uint8_t rate_selector__rate; + uint32_t preference__packet_lifetime__packet_lifetime_selector; + uint32_t reserved2[35]; +} __attribute__ (( packed )); + +struct ib_mad_mc_member_record { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + struct ib_gid mgid; + struct ib_gid port_gid; + uint32_t qkey; + uint16_t mlid; + uint8_t mtu_selector__mtu; + uint8_t tclass; + uint16_t pkey; + uint8_t rate_selector__rate; + uint8_t packet_lifetime_selector__packet_lifetime; + uint32_t sl__flow_label__hop_limit; + uint8_t scope__join_state; + uint8_t proxy_join__reserved; + uint16_t reserved0; + uint32_t reserved1[37]; +} __attribute__ (( packed )); + +union ib_mad { + struct ib_mad_hdr mad_hdr; + struct ib_mad_data data; + struct ib_mad_guid_info guid_info; + struct ib_mad_port_info port_info; + struct ib_mad_pkey_table pkey_table; + struct ib_mad_path_record path_record; + struct ib_mad_mc_member_record mc_member_record; +} __attribute__ (( packed )); + +#endif /* _GPXE_INFINIBAND_H */ diff --git a/src/include/gpxe/ipoib.h b/src/include/gpxe/ipoib.h new file mode 100644 index 00000000..0551687d --- /dev/null +++ b/src/include/gpxe/ipoib.h @@ -0,0 +1,78 @@ +#ifndef _GPXE_IPOIB_H +#define _GPXE_IPOIB_H + +/** @file + * + * IP over Infiniband + */ + +#include + +/** IPoIB MAC address length */ +#define IPOIB_ALEN 20 + +/** An IPoIB MAC address */ +struct ipoib_mac { + /** Queue pair number + * + * MSB must be zero; QPNs are only 24-bit. + */ + uint32_t qpn; + /** Port GID */ + struct ib_gid gid; +} __attribute__ (( packed )); + +/** IPoIB link-layer header length */ +#define IPOIB_HLEN 24 + +/** + * IPoIB link-layer header pseudo portion + * + * This part doesn't actually exist on the wire, but it provides a + * convenient way to fit into the typical network device model. + */ +struct ipoib_pseudo_hdr { + /** Peer address */ + struct ipoib_mac peer; +} __attribute__ (( packed )); + +/** IPoIB link-layer header real portion */ +struct ipoib_real_hdr { + /** Network-layer protocol */ + uint16_t proto; + /** Reserved, must be zero */ + uint16_t reserved; +} __attribute__ (( packed )); + +/** An IPoIB link-layer header */ +struct ipoib_hdr { + /** Pseudo portion */ + struct ipoib_pseudo_hdr pseudo; + /** Real portion */ + struct ipoib_real_hdr real; +} __attribute__ (( packed )); + +extern struct ll_protocol ipoib_protocol; + +extern const char * ipoib_ntoa ( const void *ll_addr ); + +/** + * Allocate IPoIB device + * + * @v priv_size Size of driver private data + * @ret netdev Network device, or NULL + */ +static inline struct net_device * alloc_ipoibdev ( size_t priv_size ) { + struct net_device *netdev; + + netdev = alloc_netdev ( priv_size ); + if ( netdev ) { + netdev->ll_protocol = &ipoib_protocol; + } + return netdev; +} + +extern int ipoib_probe ( struct ib_device *ibdev ); +extern void ipoib_remove ( struct ib_device *ibdev ); + +#endif /* _GPXE_IPOIB_H */ diff --git a/src/include/gpxe/tcp.h b/src/include/gpxe/tcp.h index d967791f..e2753120 100644 --- a/src/include/gpxe/tcp.h +++ b/src/include/gpxe/tcp.h @@ -275,7 +275,8 @@ struct tcp_options { * actually use 65536, we use a window size of (65536-4) to ensure * that payloads remain dword-aligned. */ -#define TCP_MAX_WINDOW_SIZE ( 65536 - 4 ) +//#define TCP_MAX_WINDOW_SIZE ( 65536 - 4 ) +#define TCP_MAX_WINDOW_SIZE 4096 /** * Path MTU diff --git a/src/net/infiniband.c b/src/net/infiniband.c new file mode 100644 index 00000000..ed186d18 --- /dev/null +++ b/src/net/infiniband.c @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** @file + * + * Infiniband protocol + * + */ + +/** + * Create completion queue + * + * @v ibdev Infiniband device + * @v num_cqes Number of completion queue entries + * @ret cq New completion queue + */ +struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, + unsigned int num_cqes ) { + struct ib_completion_queue *cq; + int rc; + + DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev ); + + /* Allocate and initialise data structure */ + cq = zalloc ( sizeof ( *cq ) ); + if ( ! cq ) + return NULL; + cq->num_cqes = num_cqes; + INIT_LIST_HEAD ( &cq->work_queues ); + + /* Perform device-specific initialisation and get CQN */ + if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise completion " + "queue: %s\n", ibdev, strerror ( rc ) ); + free ( cq ); + return NULL; + } + + DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) " + "with CQN %#lx\n", ibdev, num_cqes, cq, cq->dev_priv, cq->cqn ); + return cq; +} + +/** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +void ib_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n", + ibdev, cq->cqn ); + assert ( list_empty ( &cq->work_queues ) ); + ibdev->op->destroy_cq ( ibdev, cq ); + free ( cq ); +} + +/** + * Create queue pair + * + * @v ibdev Infiniband device + * @v num_send_wqes Number of send work queue entries + * @v send_cq Send completion queue + * @v num_recv_wqes Number of receive work queue entries + * @v recv_cq Receive completion queue + * @v qkey Queue key + * @ret qp Queue pair + */ +struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, + unsigned int num_send_wqes, + struct ib_completion_queue *send_cq, + unsigned int num_recv_wqes, + struct ib_completion_queue *recv_cq, + unsigned long qkey ) { + struct ib_queue_pair *qp; + size_t total_size; + int rc; + + DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev ); + + /* Allocate and initialise data structure */ + total_size = ( sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + + ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); + qp = zalloc ( total_size ); + if ( ! qp ) + return NULL; + qp->qkey = qkey; + qp->send.qp = qp; + qp->send.is_send = 1; + qp->send.cq = send_cq; + list_add ( &qp->send.list, &send_cq->work_queues ); + qp->send.num_wqes = num_send_wqes; + qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) ); + qp->recv.qp = qp; + qp->recv.cq = recv_cq; + list_add ( &qp->recv.list, &recv_cq->work_queues ); + qp->recv.num_wqes = num_recv_wqes; + qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) )); + + /* Perform device-specific initialisation and get QPN */ + if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise queue pair: " + "%s\n", ibdev, strerror ( rc ) ); + free ( qp ); + return NULL; + } + + DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n", + ibdev, qp, qp->dev_priv, qp->qpn ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n", + ibdev, qp->qpn, num_send_wqes, qp->send.iobufs, + qp->recv.iobufs ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n", + ibdev, qp->qpn, num_send_wqes, qp->recv.iobufs, + ( ( ( void * ) qp ) + total_size ) ); + return qp; +} + +/** + * Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +void ib_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ) { + DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n", + ibdev, qp->qpn ); + ibdev->op->destroy_qp ( ibdev, qp ); + list_del ( &qp->send.list ); + list_del ( &qp->recv.list ); + free ( qp ); +} + +/** + * Find work queue belonging to completion queue + * + * @v cq Completion queue + * @v qpn Queue pair number + * @v is_send Find send work queue (rather than receive) + * @ret wq Work queue, or NULL if not found + */ +struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ) { + struct ib_work_queue *wq; + + list_for_each_entry ( wq, &cq->work_queues, list ) { + if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) ) + return wq; + } + return NULL; +} + +/** + * Allocate Infiniband device + * + * @v priv_size Size of private data area + * @ret ibdev Infiniband device, or NULL + */ +struct ib_device * alloc_ibdev ( size_t priv_size ) { + struct ib_device *ibdev; + size_t total_len; + + total_len = ( sizeof ( *ibdev ) + priv_size ); + ibdev = zalloc ( total_len ); + if ( ibdev ) { + ibdev->dev_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); + } + return ibdev; +} + +/** + * Free Infiniband device + * + * @v ibdev Infiniband device + */ +void free_ibdev ( struct ib_device *ibdev ) { + free ( ibdev ); +}