From 267a4483abade6f352263cf159052608a0c03833 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 19:01:10 +0100 Subject: [PATCH] Added an almost obscene amount of debugging and assertion code while tracking down a bug that turned out to be a free_iob() used where I needed a netdev_tx_complete(). This left the freed I/O buffer on the net device's TX list, with bad, bad consequences later. Also fixed the bug in question. --- src/drivers/net/ipoib.c | 22 ++++++++++++------ src/drivers/net/mlx_ipoib/mt25218.c | 36 +++++++++++++++++++++++++++-- src/net/infiniband.c | 22 ++++++++++++------ 3 files changed, 64 insertions(+), 16 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 077912b3..f45012ee 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -47,22 +47,22 @@ extern struct ib_address_vector hack_ipoib_bcast_av; #define IPOIB_MTU 2048 /** Number of IPoIB data send work queue entries */ -#define IPOIB_DATA_NUM_SEND_WQES 4 +#define IPOIB_DATA_NUM_SEND_WQES 2 /** Number of IPoIB data receive work queue entries */ -#define IPOIB_DATA_NUM_RECV_WQES 4 +#define IPOIB_DATA_NUM_RECV_WQES 2 /** Number of IPoIB data completion entries */ -#define IPOIB_DATA_NUM_CQES 8 +#define IPOIB_DATA_NUM_CQES 32 /** Number of IPoIB metadata send work queue entries */ -#define IPOIB_META_NUM_SEND_WQES 4 +#define IPOIB_META_NUM_SEND_WQES 2 /** Number of IPoIB metadata receive work queue entries */ -#define IPOIB_META_NUM_RECV_WQES 4 +#define IPOIB_META_NUM_RECV_WQES 2 /** Number of IPoIB metadata completion entries */ -#define IPOIB_META_NUM_CQES 8 +#define IPOIB_META_NUM_CQES 32 /** An IPoIB queue set */ struct ipoib_queue_set { @@ -205,6 +205,14 @@ static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { } /* Strip off IPoIB header */ + int len = iob_len ( iobuf ); + DBG ( "WTF iob_len = %zd\n", len ); + if ( len < 0 ) { + DBG_HD ( iobuf, sizeof ( *iobuf ) ); + DBG ( "locking\n" ); + while ( 1 ) {} + } + iob_pull ( iobuf, sizeof ( *ipoib_hdr ) ); /* Hand off to network-layer protocol */ @@ -492,7 +500,7 @@ static int ipoib_transmit ( struct net_device *netdev, /* No path entry - get path record */ rc = ipoib_get_path_record ( ipoib, &ipoib_pshdr->peer.gid ); - free_iob ( iobuf ); + netdev_tx_complete ( netdev, iobuf ); return rc; } av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn ); diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index dc497add..8afee52f 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -398,6 +398,9 @@ static int arbel_create_cq ( struct ib_device *ibdev, goto err_sw2hw_cq; } + DBGC ( arbel, "Arbel %p CQN %#lx ring at [%p,%p)\n", + arbel, cq->cqn, arbel_cq->cqe, + ( ( ( void * ) arbel_cq->cqe ) + arbel_cq->cqe_size ) ); cq->dev_priv = arbel_cq; return 0; @@ -650,6 +653,12 @@ static int arbel_create_qp ( struct ib_device *ibdev, goto err_rtr2rts_qpee; } + DBGC ( arbel, "Arbel %p QPN %#lx send ring at [%p,%p)\n", + arbel, qp->qpn, arbel_qp->send.wqe, + ( ( (void *) arbel_qp->send.wqe ) + arbel_qp->send.wqe_size ) ); + DBGC ( arbel, "Arbel %p QPN %#lx receive ring at [%p,%p)\n", + arbel, qp->qpn, arbel_qp->recv.wqe, + ( ( (void *) arbel_qp->recv.wqe ) + arbel_qp->recv.wqe_size ) ); qp->dev_priv = arbel_qp; return 0; @@ -904,6 +913,7 @@ static int arbel_complete ( struct ib_device *ibdev, struct arbel_queue_pair *arbel_qp; struct arbel_send_work_queue *arbel_send_wq; struct arbel_recv_work_queue *arbel_recv_wq; + struct arbelprm_recv_wqe *recv_wqe; struct io_buffer *iobuf; ib_completer_t complete; unsigned int opcode; @@ -915,7 +925,6 @@ static int arbel_complete ( struct ib_device *ibdev, /* Parse completion */ memset ( &completion, 0, sizeof ( completion ) ); - completion.len = MLX_GET ( &cqe->normal, byte_cnt ); qpn = MLX_GET ( &cqe->normal, my_qpn ); is_send = MLX_GET ( &cqe->normal, s ); wqe_adr = ( MLX_GET ( &cqe->normal, wqe_adr ) << 6 ); @@ -946,10 +955,12 @@ static int arbel_complete ( struct ib_device *ibdev, arbel_send_wq = &arbel_qp->send; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / sizeof ( arbel_send_wq->wqe[0] ) ); + assert ( wqe_idx < qp->send.num_wqes ); } else { arbel_recv_wq = &arbel_qp->recv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / sizeof ( arbel_recv_wq->wqe[0] ) ); + assert ( wqe_idx < qp->recv.num_wqes ); } /* Identify I/O buffer */ @@ -961,6 +972,27 @@ static int arbel_complete ( struct ib_device *ibdev, } wq->iobufs[wqe_idx] = NULL; + /* Fill in length for received packets */ + if ( ! is_send ) { + completion.len = MLX_GET ( &cqe->normal, byte_cnt ); + recv_wqe = &arbel_recv_wq->wqe[wqe_idx].recv; + assert ( MLX_GET ( &recv_wqe->data[0], local_address_l ) == + virt_to_bus ( iobuf->data ) ); + assert ( MLX_GET ( &recv_wqe->data[0], byte_count ) == + iob_tailroom ( iobuf ) ); + DBG ( "CPQ %lx QPN %lx WQE %x\n", cq->cqn, qp->qpn, wqe_idx ); + // DBG_HD ( iobuf, sizeof ( *iobuf ) ); + MLX_FILL_1 ( &recv_wqe->data[0], 0, byte_count, 0 ); + MLX_FILL_1 ( &recv_wqe->data[0], 1, + l_key, ARBEL_INVALID_LKEY ); + if ( completion.len > iob_tailroom ( iobuf ) ) { + DBGC ( arbel, "Arbel %p CQN %lx QPN %lx IDX %x " + "overlength received packet length %zd\n", + arbel, cq->cqn, qpn, wqe_idx, completion.len ); + return -EIO; + } + } + /* Pass off to caller's completion handler */ complete = ( is_send ? complete_send : complete_recv ); complete ( ibdev, qp, &completion, iobuf ); @@ -1252,7 +1284,7 @@ static int arbel_get_sm_lid ( struct arbel *arbel, return 0; } -static int arbel_get_pkey ( struct arbel *arbel, unsigned long *pkey ) { +static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) { struct ib_mad_pkey_table pkey_table; int rc; diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 9d38767f..ed186d18 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -64,8 +64,8 @@ struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, return NULL; } - DBGC ( ibdev, "IBDEV %p created completion queue %#lx\n", - ibdev, cq->cqn ); + DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) " + "with CQN %#lx\n", ibdev, num_cqes, cq, cq->dev_priv, cq->cqn ); return cq; } @@ -102,14 +102,16 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, struct ib_completion_queue *recv_cq, unsigned long qkey ) { struct ib_queue_pair *qp; + size_t total_size; int rc; DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev ); /* Allocate and initialise data structure */ - qp = zalloc ( sizeof ( *qp ) + - ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + - ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); + total_size = ( sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + + ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); + qp = zalloc ( total_size ); if ( ! qp ) return NULL; qp->qkey = qkey; @@ -134,8 +136,14 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, return NULL; } - DBGC ( ibdev, "IBDEV %p created queue pair %#lx\n", - ibdev, qp->qpn ); + DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n", + ibdev, qp, qp->dev_priv, qp->qpn ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n", + ibdev, qp->qpn, num_send_wqes, qp->send.iobufs, + qp->recv.iobufs ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n", + ibdev, qp->qpn, num_send_wqes, qp->recv.iobufs, + ( ( ( void * ) qp ) + total_size ) ); return qp; }