From 37fc40bc8cd857a5e922b21b9e41580b39091c76 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 23:33:25 +0100 Subject: [PATCH] post_recv() now works, and we can pass data on the IPoIB queue pair using entirely our own code. --- src/drivers/net/mlx_ipoib/arbel.h | 27 ++- src/drivers/net/mlx_ipoib/ib_mt25218.c | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 224 ++++++++++++++++++------- src/include/gpxe/infiniband.h | 36 +++- src/net/infiniband.c | 19 ++- 5 files changed, 234 insertions(+), 74 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index f35ef26b..9da6bef9 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -25,6 +25,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ctrl_send ); @@ -37,13 +38,28 @@ struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud ); * */ -#define ARBELPRM_MAX_GATHER 1 +#define ARBEL_MAX_GATHER 1 struct arbelprm_ud_send_wqe { struct arbelprm_wqe_segment_next next; struct arbelprm_wqe_segment_ctrl_send ctrl; struct arbelprm_wqe_segment_ud ud; - struct arbelprm_wqe_segment_data_ptr data[ARBELPRM_MAX_GATHER]; + struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_GATHER]; +} __attribute__ (( packed )); + +#define ARBEL_MAX_SCATTER 1 + +struct arbelprm_recv_wqe { + /* The autogenerated header is inconsistent between send and + * receive WQEs. The "ctrl" structure for receive WQEs is + * defined to include the "next" structure. Since the "ctrl" + * part of the "ctrl" structure contains only "reserved, must + * be zero" bits, we ignore its definition and provide + * something more usable. + */ + struct arbelprm_recv_wqe_segment_next next; + uint32_t ctrl[2]; /* All "reserved, must be zero" */ + struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_SCATTER]; } __attribute__ (( packed )); union arbelprm_completion_entry { @@ -88,6 +104,7 @@ struct arbel_send_work_queue { /** An Arbel receive work queue entry */ union arbel_recv_wqe { + struct arbelprm_recv_wqe recv; uint8_t force_align[ARBEL_RECV_WQE_ALIGN]; } __attribute__ (( packed )); @@ -113,6 +130,12 @@ struct arbel { void *uar; /** Doorbell records */ union arbelprm_doorbell_record *db_rec; + /** Reserved LKey + * + * Used to get unrestricted memory access. + */ + unsigned long reserved_lkey; + }; #endif /* _ARBEL_H */ diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index b6552f9f..45d7f46f 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1311,7 +1311,7 @@ static int create_ipoib_qp(void **qp_pp, qp->rcv_buf_sz = IPOIB_RCV_BUF_SZ; qp->max_recv_wqes = NUM_IPOIB_RCV_WQES; - qp->recv_wqe_cur_free = NUM_IPOIB_RCV_WQES; + qp->recv_wqe_cur_free = 0; //NUM_IPOIB_RCV_WQES; qp->rcv_uar_context = dev_ib_data.uar_context_base + 8 * IPOIB_RCV_QP_DB_IDX; diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 385427fb..8fdc5909 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -26,6 +26,7 @@ Skeleton NIC driver for Etherboot #include "arbel.h" +#define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES struct mlx_nic { /** Queue pair handle */ @@ -36,35 +37,65 @@ struct mlx_nic { cq_t snd_cqh; /** Receive completion queue */ cq_t rcv_cqh; + + /** RX fill level */ + unsigned int rx_fill; }; static struct io_buffer *static_ipoib_tx_ring[NUM_IPOIB_SND_WQES]; +static struct io_buffer *static_ipoib_rx_ring[NUM_IPOIB_RCV_WQES]; static struct arbel static_arbel; static struct arbel_send_work_queue static_arbel_ipoib_send_wq = { .doorbell_idx = IPOIB_SND_QP_DB_IDX, }; +static struct arbel_send_work_queue static_arbel_ipoib_recv_wq = { + .doorbell_idx = IPOIB_RCV_QP_DB_IDX, +}; static struct arbel_completion_queue static_arbel_ipoib_send_cq = { .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, }; +static struct arbel_completion_queue static_arbel_ipoib_recv_cq = { + .doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, +}; +static struct ib_completion_queue static_ipoib_send_cq; +static struct ib_completion_queue static_ipoib_recv_cq; static struct ib_device static_ibdev = { .dev_priv = &static_arbel, }; static struct ib_queue_pair static_ipoib_qp = { .send = { + .qp = &static_ipoib_qp, + .is_send = 1, + .cq = &static_ipoib_send_cq, .num_wqes = NUM_IPOIB_SND_WQES, .iobufs = static_ipoib_tx_ring, .dev_priv = &static_arbel_ipoib_send_wq, + .list = LIST_HEAD_INIT ( static_ipoib_qp.send.list ), + }, + .recv = { + .qp = &static_ipoib_qp, + .is_send = 0, + .cq = &static_ipoib_recv_cq, + .num_wqes = NUM_IPOIB_RCV_WQES, + .iobufs = static_ipoib_rx_ring, + .dev_priv = &static_arbel_ipoib_recv_wq, + .list = LIST_HEAD_INIT ( static_ipoib_qp.recv.list ), }, - .list = LIST_HEAD_INIT ( static_ipoib_qp.list ), }; static struct ib_completion_queue static_ipoib_send_cq = { .cqn = 1234, /* Only used for debug messages */ .num_cqes = NUM_IPOIB_SND_CQES, .dev_priv = &static_arbel_ipoib_send_cq, - .queue_pairs = LIST_HEAD_INIT ( static_ipoib_send_cq.queue_pairs ), + .work_queues = LIST_HEAD_INIT ( static_ipoib_send_cq.work_queues ), +}; +static struct ib_completion_queue static_ipoib_recv_cq = { + .cqn = 2345, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_RCV_CQES, + .dev_priv = &static_arbel_ipoib_recv_cq, + .work_queues = LIST_HEAD_INIT ( static_ipoib_recv_cq.work_queues ), }; @@ -157,36 +188,6 @@ static int mlx_transmit_direct ( struct net_device *netdev, } -static void arbel_poll_cq ( struct ib_device *ibdev, - struct ib_completion_queue *cq, - ib_completer_t complete_send, - ib_completer_t complete_recv ); - -static void temp_complete_send ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { - struct net_device *netdev = qp->priv; - - DBG ( "Wahey! TX completion\n" ); - netdev_tx_complete_err ( netdev, iobuf, - ( completion->syndrome ? -EIO : 0 ) ); -} - -static void temp_complete_recv ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp __unused, - struct ib_completion *completion __unused, - struct io_buffer *iobuf __unused ) { - DBG ( "AARGH! recv completion\n" ); -} - -static void mlx_poll_cq_direct ( struct net_device *netdev ) { - struct mlx_nic *mlx = netdev->priv; - - arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, - temp_complete_send, temp_complete_recv ); -} - /** * Handle TX completion * @@ -233,6 +234,44 @@ static void mlx_rx_complete ( struct net_device *netdev, netdev_rx ( netdev, iobuf ); } +static void arbel_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ); + +static void temp_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->priv; + + DBG ( "Wahey! TX completion\n" ); + netdev_tx_complete_err ( netdev, iobuf, + ( completion->syndrome ? -EIO : 0 ) ); +} + +static void temp_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->priv; + struct mlx_nic *mlx = netdev->priv; + + DBG ( "Yay! RX completion on %p len %zx:\n", iobuf, completion->len ); + // DBG_HD ( iobuf, sizeof ( *iobuf ) ); + // DBG_HD ( iobuf->data, 256 ); + if ( completion->syndrome ) { + netdev_rx_err ( netdev, iobuf, -EIO ); + } else { + iob_put ( iobuf, completion->len ); + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + netdev_rx ( netdev, iobuf ); + } + + mlx->rx_fill--; +} + +#if 0 /** * Poll completion queue * @@ -267,6 +306,32 @@ static void mlx_poll_cq ( struct net_device *netdev, cq_t cq, free_wqe ( ib_cqe.wqe ); } } +#endif + +static int arbel_post_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ); + +static void mlx_refill_rx ( struct net_device *netdev ) { + struct mlx_nic *mlx = netdev->priv; + struct io_buffer *iobuf; + int rc; + + while ( mlx->rx_fill < MLX_RX_MAX_FILL ) { + iobuf = alloc_iob ( 2048 ); + if ( ! iobuf ) + break; + DBG ( "Posting RX buffer %p:\n", iobuf ); + // memset ( iobuf->data, 0xaa, 256 ); + // DBG_HD ( iobuf, sizeof ( *iobuf ) ); + if ( ( rc = arbel_post_recv ( &static_ibdev, &static_ipoib_qp, + iobuf ) ) != 0 ) { + free_iob ( iobuf ); + break; + } + mlx->rx_fill++; + } +} /** * Poll for completed and received packets @@ -291,8 +356,13 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ - mlx_poll_cq_direct ( netdev ); - mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); + arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, + temp_complete_send, temp_complete_recv ); + arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq, + temp_complete_send, temp_complete_recv ); + // mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); + + mlx_refill_rx ( netdev ); } /** @@ -397,12 +467,9 @@ static int arbel_post_send ( struct ib_device *ibdev, memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); MLX_FILL_1 ( &wqe->data[0], 3, local_address_l, virt_to_bus ( iobuf->data ) ); - MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); - - DBG ( "Work queue entry:\n" ); - DBG_HD ( wqe, sizeof ( *wqe ) ); /* Update previous work queue entry's "next" field */ nds = ( ( offsetof ( typeof ( *wqe ), data ) + @@ -413,16 +480,11 @@ static int arbel_post_send ( struct ib_device *ibdev, f, 1, always1, 1 ); - DBG ( "Previous work queue entry's next field:\n" ); - DBG_HD ( &prev_wqe->next, sizeof ( prev_wqe->next ) ); - /* Update doorbell record */ + barrier(); db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx]; MLX_FILL_1 ( &db_rec->qp, 0, counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); - barrier(); - DBG ( "Doorbell record:\n" ); - DBG_HD ( db_rec, 8 ); /* Ring doorbell register */ MLX_FILL_4 ( &db_reg.send, 0, @@ -441,6 +503,51 @@ static int arbel_post_send ( struct ib_device *ibdev, return 0; } +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int arbel_post_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + struct arbel *arbel = ibdev->dev_priv; + struct ib_work_queue *wq = &qp->recv; + struct arbel_recv_work_queue *arbel_recv_wq = wq->dev_priv; + struct arbelprm_recv_wqe *wqe; + union arbelprm_doorbell_record *db_rec; + unsigned int wqe_idx_mask; + + /* Allocate work queue entry */ + wqe_idx_mask = ( wq->num_wqes - 1 ); + if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { + DBGC ( arbel, "Arbel %p receive queue full", arbel ); + return -ENOBUFS; + } + wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; + wqe = &arbel_recv_wq->wqe[wq->next_idx & wqe_idx_mask].recv; + + /* Construct work queue entry */ + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_tailroom ( iobuf ) ); + MLX_FILL_1 ( &wqe->data[0], 1, l_key, arbel->reserved_lkey ); + MLX_FILL_1 ( &wqe->data[0], 3, + local_address_l, virt_to_bus ( iobuf->data ) ); + + /* Update doorbell record */ + barrier(); + db_rec = &arbel->db_rec[arbel_recv_wq->doorbell_idx]; + MLX_FILL_1 ( &db_rec->qp, 0, + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); + + /* Update work queue's index */ + wq->next_idx++; + + return 0; +} + /** * Handle completion * @@ -458,7 +565,6 @@ static int arbel_complete ( struct ib_device *ibdev, ib_completer_t complete_recv ) { struct arbel *arbel = ibdev->dev_priv; struct ib_completion completion; - struct ib_queue_pair *qp; struct ib_work_queue *wq; struct io_buffer *iobuf; struct arbel_send_work_queue *arbel_send_wq; @@ -466,7 +572,7 @@ static int arbel_complete ( struct ib_device *ibdev, ib_completer_t complete; unsigned int opcode; unsigned long qpn; - unsigned int is_send; + int is_send; unsigned long wqe_adr; unsigned int wqe_idx; int rc = 0; @@ -489,22 +595,20 @@ static int arbel_complete ( struct ib_device *ibdev, /* Don't return immediately; propagate error to completer */ } - /* Identify queue pair */ - qp = ib_find_qp ( &cq->queue_pairs, qpn ); - if ( ! qp ) { - DBGC ( arbel, "Arbel %p CQN %lx unknown QPN %lx\n", - arbel, cq->cqn, qpn ); + /* Identify work queue */ + wq = ib_find_wq ( cq, qpn, is_send ); + if ( ! wq ) { + DBGC ( arbel, "Arbel %p CQN %lx unknown %s QPN %lx\n", + arbel, cq->cqn, ( is_send ? "send" : "recv" ), qpn ); return -EIO; } /* Identify work queue entry index */ if ( is_send ) { - wq = &qp->send; arbel_send_wq = wq->dev_priv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / sizeof ( arbel_send_wq->wqe[0] ) ); } else { - wq = &qp->recv; arbel_recv_wq = wq->dev_priv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / sizeof ( arbel_recv_wq->wqe[0] ) ); @@ -521,7 +625,7 @@ static int arbel_complete ( struct ib_device *ibdev, /* Pass off to caller's completion handler */ complete = ( is_send ? complete_send : complete_recv ); - complete ( ibdev, qp, &completion, iobuf ); + complete ( ibdev, wq->qp, &completion, iobuf ); return rc; } @@ -577,6 +681,7 @@ static void arbel_poll_cq ( struct ib_device *ibdev, /** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { .post_send = arbel_post_send, + .post_recv = arbel_post_recv, .poll_cq = arbel_poll_cq, }; @@ -636,14 +741,21 @@ static int arbel_probe ( struct pci_device *pci, /* Hack up IB structures */ static_arbel.uar = memfree_pci_dev.uar; static_arbel.db_rec = dev_ib_data.uar_context_base; + static_arbel.reserved_lkey = dev_ib_data.mkey; static_arbel_ipoib_send_wq.wqe = ( ( struct udqp_st * ) qph )->snd_wq; + static_arbel_ipoib_recv_wq.wqe = + ( ( struct udqp_st * ) qph )->rcv_wq; static_arbel_ipoib_send_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_snd_cq )->cq_buf; + static_arbel_ipoib_recv_cq.cqe = + ( ( struct cq_st * ) ib_data.ipoib_rcv_cq )->cq_buf; static_ipoib_qp.qpn = ib_get_qpn ( qph ); static_ipoib_qp.priv = netdev; - list_add ( &static_ipoib_qp.list, - &static_ipoib_send_cq.queue_pairs ); + list_add ( &static_ipoib_qp.send.list, + &static_ipoib_send_cq.work_queues ); + list_add ( &static_ipoib_qp.recv.list, + &static_ipoib_recv_cq.work_queues ); /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 3679a110..85684b63 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -64,9 +64,19 @@ struct ibhdr { struct ib_device; +struct ib_queue_pair; +struct ib_completion_queue; /** An Infiniband Work Queue */ struct ib_work_queue { + /** Containing queue pair */ + struct ib_queue_pair *qp; + /** "Is a send queue" flag */ + int is_send; + /** Associated completion queue */ + struct ib_completion_queue *cq; + /** List of work queues on this completion queue */ + struct list_head list; /** Number of work queue entries */ unsigned int num_wqes; /** Next work queue entry index @@ -85,8 +95,6 @@ struct ib_work_queue { /** An Infiniband Queue Pair */ struct ib_queue_pair { - /** List of queue pairs sharing a completion queue */ - struct list_head list; /** Queue Pair Number */ unsigned long qpn; /** Send queue */ @@ -113,8 +121,8 @@ struct ib_completion_queue { * array index. */ unsigned long next_idx; - /** List of associated queue pairs */ - struct list_head queue_pairs; + /** List of work queues completing to this queue */ + struct list_head work_queues; /** Device private data */ void *dev_priv; }; @@ -183,6 +191,22 @@ struct ib_device_operations { struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ); + /** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + * + * If this method returns success, the I/O buffer remains + * owned by the queue pair. If this method returns failure, + * the I/O buffer is immediately released; the failure is + * interpreted as "failure to enqueue buffer". + */ + int ( * post_recv ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ); /** Poll completion queue * * @v ibdev Infiniband device @@ -205,8 +229,8 @@ struct ib_device { }; -extern struct ib_queue_pair * ib_find_qp ( struct list_head *list, - unsigned long qpn ); +extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ); diff --git a/src/net/infiniband.c b/src/net/infiniband.c index edc93b6e..694c88b1 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -34,19 +34,20 @@ */ /** - * Find queue pair from a list + * Find work queue belonging to completion queue * - * @v list List of queue pairs + * @v cq Completion queue * @v qpn Queue pair number - * @ret qp Queue pair, or NULL if not found + * @v is_send Find send work queue (rather than receive) + * @ret wq Work queue, or NULL if not found */ -struct ib_queue_pair * ib_find_qp ( struct list_head *list, - unsigned long qpn ) { - struct ib_queue_pair *qp; +struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ) { + struct ib_work_queue *wq; - list_for_each_entry ( qp, list, list ) { - if ( qp->qpn == qpn ) - return qp; + list_for_each_entry ( wq, &cq->work_queues, list ) { + if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) ) + return wq; } return NULL; }