[ipoib] Fix a race when chain-loading undionly.kpxe in IPoIB
[ipxe.git] / src / net / infiniband.c
1 /*
2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
22 */
23
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <byteswap.h>
32 #include <errno.h>
33 #include <assert.h>
34 #include <ipxe/list.h>
35 #include <ipxe/errortab.h>
36 #include <ipxe/if_arp.h>
37 #include <ipxe/netdevice.h>
38 #include <ipxe/iobuf.h>
39 #include <ipxe/process.h>
40 #include <ipxe/infiniband.h>
41 #include <ipxe/ib_mi.h>
42 #include <ipxe/ib_sma.h>
43
44 /** @file
45 *
46 * Infiniband protocol
47 *
48 */
49
50 /** List of Infiniband devices */
51 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
52
53 /** List of open Infiniband devices, in reverse order of opening */
54 static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
55
56 /* Disambiguate the various possible EINPROGRESSes */
57 #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
58 #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
59 ( EINFO_EINPROGRESS, 0x01, "Initialising" )
60 #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
61 #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
62 ( EINFO_EINPROGRESS, 0x02, "Armed" )
63
64 /** Human-readable message for the link statuses */
65 struct errortab infiniband_errors[] __errortab = {
66 __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
67 __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
68 };
69
70 /***************************************************************************
71 *
72 * Completion queues
73 *
74 ***************************************************************************
75 */
76
77 /**
78 * Create completion queue
79 *
80 * @v ibdev Infiniband device
81 * @v num_cqes Number of completion queue entries
82 * @v op Completion queue operations
83 * @ret cq New completion queue
84 */
85 struct ib_completion_queue *
86 ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
87 struct ib_completion_queue_operations *op ) {
88 struct ib_completion_queue *cq;
89 int rc;
90
91 DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
92
93 /* Allocate and initialise data structure */
94 cq = zalloc ( sizeof ( *cq ) );
95 if ( ! cq )
96 goto err_alloc_cq;
97 cq->ibdev = ibdev;
98 list_add ( &cq->list, &ibdev->cqs );
99 cq->num_cqes = num_cqes;
100 INIT_LIST_HEAD ( &cq->work_queues );
101 cq->op = op;
102
103 /* Perform device-specific initialisation and get CQN */
104 if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
105 DBGC ( ibdev, "IBDEV %p could not initialise completion "
106 "queue: %s\n", ibdev, strerror ( rc ) );
107 goto err_dev_create_cq;
108 }
109
110 DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
111 "with CQN %#lx\n", ibdev, num_cqes, cq,
112 ib_cq_get_drvdata ( cq ), cq->cqn );
113 return cq;
114
115 ibdev->op->destroy_cq ( ibdev, cq );
116 err_dev_create_cq:
117 list_del ( &cq->list );
118 free ( cq );
119 err_alloc_cq:
120 return NULL;
121 }
122
123 /**
124 * Destroy completion queue
125 *
126 * @v ibdev Infiniband device
127 * @v cq Completion queue
128 */
129 void ib_destroy_cq ( struct ib_device *ibdev,
130 struct ib_completion_queue *cq ) {
131 DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
132 ibdev, cq->cqn );
133 assert ( list_empty ( &cq->work_queues ) );
134 ibdev->op->destroy_cq ( ibdev, cq );
135 list_del ( &cq->list );
136 free ( cq );
137 }
138
139 /**
140 * Poll completion queue
141 *
142 * @v ibdev Infiniband device
143 * @v cq Completion queue
144 */
145 void ib_poll_cq ( struct ib_device *ibdev,
146 struct ib_completion_queue *cq ) {
147 struct ib_work_queue *wq;
148
149 /* Poll completion queue */
150 ibdev->op->poll_cq ( ibdev, cq );
151
152 /* Refill receive work queues */
153 list_for_each_entry ( wq, &cq->work_queues, list ) {
154 if ( ! wq->is_send )
155 ib_refill_recv ( ibdev, wq->qp );
156 }
157 }
158
159 /***************************************************************************
160 *
161 * Work queues
162 *
163 ***************************************************************************
164 */
165
166 /**
167 * Create queue pair
168 *
169 * @v ibdev Infiniband device
170 * @v type Queue pair type
171 * @v num_send_wqes Number of send work queue entries
172 * @v send_cq Send completion queue
173 * @v num_recv_wqes Number of receive work queue entries
174 * @v recv_cq Receive completion queue
175 * @v op Queue pair operations
176 * @ret qp Queue pair
177 *
178 * The queue pair will be left in the INIT state; you must call
179 * ib_modify_qp() before it is ready to use for sending and receiving.
180 */
181 struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
182 enum ib_queue_pair_type type,
183 unsigned int num_send_wqes,
184 struct ib_completion_queue *send_cq,
185 unsigned int num_recv_wqes,
186 struct ib_completion_queue *recv_cq,
187 struct ib_queue_pair_operations *op ) {
188 struct ib_queue_pair *qp;
189 size_t total_size;
190 int rc;
191
192 DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
193
194 /* Allocate and initialise data structure */
195 total_size = ( sizeof ( *qp ) +
196 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
197 ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
198 qp = zalloc ( total_size );
199 if ( ! qp )
200 goto err_alloc_qp;
201 qp->ibdev = ibdev;
202 list_add ( &qp->list, &ibdev->qps );
203 qp->type = type;
204 qp->send.qp = qp;
205 qp->send.is_send = 1;
206 qp->send.cq = send_cq;
207 list_add ( &qp->send.list, &send_cq->work_queues );
208 qp->send.psn = ( random() & 0xffffffUL );
209 qp->send.num_wqes = num_send_wqes;
210 qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
211 qp->recv.qp = qp;
212 qp->recv.cq = recv_cq;
213 list_add ( &qp->recv.list, &recv_cq->work_queues );
214 qp->recv.psn = ( random() & 0xffffffUL );
215 qp->recv.num_wqes = num_recv_wqes;
216 qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
217 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
218 INIT_LIST_HEAD ( &qp->mgids );
219 qp->op = op;
220
221 /* Perform device-specific initialisation and get QPN */
222 if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
223 DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
224 "%s\n", ibdev, strerror ( rc ) );
225 goto err_dev_create_qp;
226 }
227 DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
228 ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
229 DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
230 ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
231 qp->recv.iobufs );
232 DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
233 ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
234 ( ( ( void * ) qp ) + total_size ) );
235
236 /* Calculate externally-visible QPN */
237 switch ( type ) {
238 case IB_QPT_SMI:
239 qp->ext_qpn = IB_QPN_SMI;
240 break;
241 case IB_QPT_GSI:
242 qp->ext_qpn = IB_QPN_GSI;
243 break;
244 default:
245 qp->ext_qpn = qp->qpn;
246 break;
247 }
248 if ( qp->ext_qpn != qp->qpn ) {
249 DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
250 ibdev, qp->qpn, qp->ext_qpn );
251 }
252
253 return qp;
254
255 ibdev->op->destroy_qp ( ibdev, qp );
256 err_dev_create_qp:
257 list_del ( &qp->send.list );
258 list_del ( &qp->recv.list );
259 list_del ( &qp->list );
260 free ( qp );
261 err_alloc_qp:
262 return NULL;
263 }
264
265 /**
266 * Modify queue pair
267 *
268 * @v ibdev Infiniband device
269 * @v qp Queue pair
270 * @ret rc Return status code
271 */
272 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
273 int rc;
274
275 DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
276
277 if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
278 DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
279 ibdev, qp->qpn, strerror ( rc ) );
280 return rc;
281 }
282
283 return 0;
284 }
285
286 /**
287 * Destroy queue pair
288 *
289 * @v ibdev Infiniband device
290 * @v qp Queue pair
291 */
292 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
293 struct io_buffer *iobuf;
294 unsigned int i;
295
296 DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
297 ibdev, qp->qpn );
298
299 assert ( list_empty ( &qp->mgids ) );
300
301 /* Perform device-specific destruction */
302 ibdev->op->destroy_qp ( ibdev, qp );
303
304 /* Complete any remaining I/O buffers with errors */
305 for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
306 if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
307 ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
308 }
309 for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
310 if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
311 ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
312 -ECANCELED );
313 }
314 }
315
316 /* Remove work queues from completion queue */
317 list_del ( &qp->send.list );
318 list_del ( &qp->recv.list );
319
320 /* Free QP */
321 list_del ( &qp->list );
322 free ( qp );
323 }
324
325 /**
326 * Find queue pair by QPN
327 *
328 * @v ibdev Infiniband device
329 * @v qpn Queue pair number
330 * @ret qp Queue pair, or NULL
331 */
332 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
333 unsigned long qpn ) {
334 struct ib_queue_pair *qp;
335
336 list_for_each_entry ( qp, &ibdev->qps, list ) {
337 if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
338 return qp;
339 }
340 return NULL;
341 }
342
343 /**
344 * Find queue pair by multicast GID
345 *
346 * @v ibdev Infiniband device
347 * @v gid Multicast GID
348 * @ret qp Queue pair, or NULL
349 */
350 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
351 union ib_gid *gid ) {
352 struct ib_queue_pair *qp;
353 struct ib_multicast_gid *mgid;
354
355 list_for_each_entry ( qp, &ibdev->qps, list ) {
356 list_for_each_entry ( mgid, &qp->mgids, list ) {
357 if ( memcmp ( &mgid->gid, gid,
358 sizeof ( mgid->gid ) ) == 0 ) {
359 return qp;
360 }
361 }
362 }
363 return NULL;
364 }
365
366 /**
367 * Find work queue belonging to completion queue
368 *
369 * @v cq Completion queue
370 * @v qpn Queue pair number
371 * @v is_send Find send work queue (rather than receive)
372 * @ret wq Work queue, or NULL if not found
373 */
374 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
375 unsigned long qpn, int is_send ) {
376 struct ib_work_queue *wq;
377
378 list_for_each_entry ( wq, &cq->work_queues, list ) {
379 if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
380 return wq;
381 }
382 return NULL;
383 }
384
385 /**
386 * Post send work queue entry
387 *
388 * @v ibdev Infiniband device
389 * @v qp Queue pair
390 * @v dest Destination address vector
391 * @v iobuf I/O buffer
392 * @ret rc Return status code
393 */
394 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
395 struct ib_address_vector *dest,
396 struct io_buffer *iobuf ) {
397 struct ib_address_vector dest_copy;
398 int rc;
399
400 /* Check queue fill level */
401 if ( qp->send.fill >= qp->send.num_wqes ) {
402 DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
403 ibdev, qp->qpn );
404 return -ENOBUFS;
405 }
406
407 /* Use default address vector if none specified */
408 if ( ! dest )
409 dest = &qp->av;
410
411 /* Make modifiable copy of address vector */
412 memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
413 dest = &dest_copy;
414
415 /* Fill in optional parameters in address vector */
416 if ( ! dest->qkey )
417 dest->qkey = qp->qkey;
418 if ( ! dest->rate )
419 dest->rate = IB_RATE_2_5;
420
421 /* Post to hardware */
422 if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
423 DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
424 "%s\n", ibdev, qp->qpn, strerror ( rc ) );
425 return rc;
426 }
427
428 qp->send.fill++;
429 return 0;
430 }
431
432 /**
433 * Post receive work queue entry
434 *
435 * @v ibdev Infiniband device
436 * @v qp Queue pair
437 * @v iobuf I/O buffer
438 * @ret rc Return status code
439 */
440 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
441 struct io_buffer *iobuf ) {
442 int rc;
443
444 /* Check packet length */
445 if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
446 DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
447 ibdev, qp->qpn, iob_tailroom ( iobuf ) );
448 return -EINVAL;
449 }
450
451 /* Check queue fill level */
452 if ( qp->recv.fill >= qp->recv.num_wqes ) {
453 DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
454 ibdev, qp->qpn );
455 return -ENOBUFS;
456 }
457
458 /* Post to hardware */
459 if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
460 DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
461 "%s\n", ibdev, qp->qpn, strerror ( rc ) );
462 return rc;
463 }
464
465 qp->recv.fill++;
466 return 0;
467 }
468
469 /**
470 * Complete send work queue entry
471 *
472 * @v ibdev Infiniband device
473 * @v qp Queue pair
474 * @v iobuf I/O buffer
475 * @v rc Completion status code
476 */
477 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
478 struct io_buffer *iobuf, int rc ) {
479
480 if ( qp->send.cq->op->complete_send ) {
481 qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
482 } else {
483 free_iob ( iobuf );
484 }
485 qp->send.fill--;
486 }
487
488 /**
489 * Complete receive work queue entry
490 *
491 * @v ibdev Infiniband device
492 * @v qp Queue pair
493 * @v dest Destination address vector, or NULL
494 * @v source Source address vector, or NULL
495 * @v iobuf I/O buffer
496 * @v rc Completion status code
497 */
498 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
499 struct ib_address_vector *dest,
500 struct ib_address_vector *source,
501 struct io_buffer *iobuf, int rc ) {
502
503 if ( qp->recv.cq->op->complete_recv ) {
504 qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
505 iobuf, rc );
506 } else {
507 free_iob ( iobuf );
508 }
509 qp->recv.fill--;
510 }
511
512 /**
513 * Refill receive work queue
514 *
515 * @v ibdev Infiniband device
516 * @v qp Queue pair
517 */
518 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
519 struct io_buffer *iobuf;
520 int rc;
521
522 /* Keep filling while unfilled entries remain */
523 while ( qp->recv.fill < qp->recv.num_wqes ) {
524
525 /* Allocate I/O buffer */
526 iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
527 if ( ! iobuf ) {
528 /* Non-fatal; we will refill on next attempt */
529 return;
530 }
531
532 /* Post I/O buffer */
533 if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
534 DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
535 ibdev, strerror ( rc ) );
536 free_iob ( iobuf );
537 /* Give up */
538 return;
539 }
540 }
541 }
542
543 /***************************************************************************
544 *
545 * Link control
546 *
547 ***************************************************************************
548 */
549
550 /**
551 * Get link state
552 *
553 * @v ibdev Infiniband device
554 * @ret rc Link status code
555 */
556 int ib_link_rc ( struct ib_device *ibdev ) {
557 switch ( ibdev->port_state ) {
558 case IB_PORT_STATE_DOWN: return -ENOTCONN;
559 case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
560 case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
561 case IB_PORT_STATE_ACTIVE: return 0;
562 default: return -EINVAL;
563 }
564 }
565
566 /**
567 * Textual representation of Infiniband link state
568 *
569 * @v ibdev Infiniband device
570 * @ret link_text Link state text
571 */
572 static const char * ib_link_state_text ( struct ib_device *ibdev ) {
573 switch ( ibdev->port_state ) {
574 case IB_PORT_STATE_DOWN: return "DOWN";
575 case IB_PORT_STATE_INIT: return "INIT";
576 case IB_PORT_STATE_ARMED: return "ARMED";
577 case IB_PORT_STATE_ACTIVE: return "ACTIVE";
578 default: return "UNKNOWN";
579 }
580 }
581
582 /**
583 * Notify drivers of Infiniband device or link state change
584 *
585 * @v ibdev Infiniband device
586 */
587 static void ib_notify ( struct ib_device *ibdev ) {
588 struct ib_driver *driver;
589
590 for_each_table_entry ( driver, IB_DRIVERS )
591 driver->notify ( ibdev );
592 }
593
594 /**
595 * Notify of Infiniband link state change
596 *
597 * @v ibdev Infiniband device
598 */
599 void ib_link_state_changed ( struct ib_device *ibdev ) {
600
601 DBGC ( ibdev, "IBDEV %p link state is %s\n",
602 ibdev, ib_link_state_text ( ibdev ) );
603
604 /* Notify drivers of link state change */
605 ib_notify ( ibdev );
606 }
607
608 /**
609 * Open port
610 *
611 * @v ibdev Infiniband device
612 * @ret rc Return status code
613 */
614 int ib_open ( struct ib_device *ibdev ) {
615 int rc;
616
617 /* Increment device open request counter */
618 if ( ibdev->open_count++ > 0 ) {
619 /* Device was already open; do nothing */
620 return 0;
621 }
622
623 /* Open device */
624 if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
625 DBGC ( ibdev, "IBDEV %p could not open: %s\n",
626 ibdev, strerror ( rc ) );
627 goto err_open;
628 }
629
630 /* Create subnet management interface */
631 ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
632 if ( ! ibdev->smi ) {
633 DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
634 rc = -ENOMEM;
635 goto err_create_smi;
636 }
637
638 /* Create subnet management agent */
639 if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
640 DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
641 ibdev, strerror ( rc ) );
642 goto err_create_sma;
643 }
644
645 /* Create general services interface */
646 ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
647 if ( ! ibdev->gsi ) {
648 DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
649 rc = -ENOMEM;
650 goto err_create_gsi;
651 }
652
653 /* Add to head of open devices list */
654 list_add ( &ibdev->open_list, &open_ib_devices );
655
656 /* Notify drivers of device state change */
657 ib_notify ( ibdev );
658
659 assert ( ibdev->open_count == 1 );
660 return 0;
661
662 ib_destroy_mi ( ibdev, ibdev->gsi );
663 err_create_gsi:
664 ib_destroy_sma ( ibdev, ibdev->smi );
665 err_create_sma:
666 ib_destroy_mi ( ibdev, ibdev->smi );
667 err_create_smi:
668 ibdev->op->close ( ibdev );
669 err_open:
670 assert ( ibdev->open_count == 1 );
671 ibdev->open_count = 0;
672 return rc;
673 }
674
675 /**
676 * Close port
677 *
678 * @v ibdev Infiniband device
679 */
680 void ib_close ( struct ib_device *ibdev ) {
681
682 /* Decrement device open request counter */
683 ibdev->open_count--;
684
685 /* Close device if this was the last remaining requested opening */
686 if ( ibdev->open_count == 0 ) {
687 ib_notify ( ibdev );
688 list_del ( &ibdev->open_list );
689 ib_destroy_mi ( ibdev, ibdev->gsi );
690 ib_destroy_sma ( ibdev, ibdev->smi );
691 ib_destroy_mi ( ibdev, ibdev->smi );
692 ibdev->op->close ( ibdev );
693 ibdev->port_state = IB_PORT_STATE_DOWN;
694 }
695 }
696
697 /***************************************************************************
698 *
699 * Multicast
700 *
701 ***************************************************************************
702 */
703
704 /**
705 * Attach to multicast group
706 *
707 * @v ibdev Infiniband device
708 * @v qp Queue pair
709 * @v gid Multicast GID
710 * @ret rc Return status code
711 *
712 * Note that this function handles only the local device's attachment
713 * to the multicast GID; it does not issue the relevant MADs to join
714 * the multicast group on the subnet.
715 */
716 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
717 union ib_gid *gid ) {
718 struct ib_multicast_gid *mgid;
719 int rc;
720
721 /* Sanity check */
722 assert ( qp != NULL );
723
724 /* Add to software multicast GID list */
725 mgid = zalloc ( sizeof ( *mgid ) );
726 if ( ! mgid ) {
727 rc = -ENOMEM;
728 goto err_alloc_mgid;
729 }
730 memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
731 list_add ( &mgid->list, &qp->mgids );
732
733 /* Add to hardware multicast GID list */
734 if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
735 goto err_dev_mcast_attach;
736
737 return 0;
738
739 err_dev_mcast_attach:
740 list_del ( &mgid->list );
741 free ( mgid );
742 err_alloc_mgid:
743 return rc;
744 }
745
746 /**
747 * Detach from multicast group
748 *
749 * @v ibdev Infiniband device
750 * @v qp Queue pair
751 * @v gid Multicast GID
752 */
753 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
754 union ib_gid *gid ) {
755 struct ib_multicast_gid *mgid;
756
757 /* Sanity check */
758 assert ( qp != NULL );
759
760 /* Remove from hardware multicast GID list */
761 ibdev->op->mcast_detach ( ibdev, qp, gid );
762
763 /* Remove from software multicast GID list */
764 list_for_each_entry ( mgid, &qp->mgids, list ) {
765 if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
766 list_del ( &mgid->list );
767 free ( mgid );
768 break;
769 }
770 }
771 }
772
773 /***************************************************************************
774 *
775 * Miscellaneous
776 *
777 ***************************************************************************
778 */
779
780 /**
781 * Count Infiniband HCA ports
782 *
783 * @v ibdev Infiniband device
784 * @ret num_ports Number of ports
785 */
786 int ib_count_ports ( struct ib_device *ibdev ) {
787 struct ib_device *tmp;
788 int num_ports = 0;
789
790 /* Search for IB devices with the same physical device to
791 * identify port count.
792 */
793 for_each_ibdev ( tmp ) {
794 if ( tmp->dev == ibdev->dev )
795 num_ports++;
796 }
797 return num_ports;
798 }
799
800 /**
801 * Set port information
802 *
803 * @v ibdev Infiniband device
804 * @v mad Set port information MAD
805 */
806 int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
807 int rc;
808
809 /* Adapters with embedded SMAs do not need to support this method */
810 if ( ! ibdev->op->set_port_info ) {
811 DBGC ( ibdev, "IBDEV %p does not support setting port "
812 "information\n", ibdev );
813 return -ENOTSUP;
814 }
815
816 if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
817 DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
818 ibdev, strerror ( rc ) );
819 return rc;
820 }
821
822 return 0;
823 };
824
825 /**
826 * Set partition key table
827 *
828 * @v ibdev Infiniband device
829 * @v mad Set partition key table MAD
830 */
831 int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
832 int rc;
833
834 /* Adapters with embedded SMAs do not need to support this method */
835 if ( ! ibdev->op->set_pkey_table ) {
836 DBGC ( ibdev, "IBDEV %p does not support setting partition "
837 "key table\n", ibdev );
838 return -ENOTSUP;
839 }
840
841 if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
842 DBGC ( ibdev, "IBDEV %p could not set partition key table: "
843 "%s\n", ibdev, strerror ( rc ) );
844 return rc;
845 }
846
847 return 0;
848 };
849
850 /***************************************************************************
851 *
852 * Event queues
853 *
854 ***************************************************************************
855 */
856
857 /**
858 * Poll event queue
859 *
860 * @v ibdev Infiniband device
861 */
862 void ib_poll_eq ( struct ib_device *ibdev ) {
863 struct ib_completion_queue *cq;
864
865 /* Poll device's event queue */
866 ibdev->op->poll_eq ( ibdev );
867
868 /* Poll all completion queues */
869 list_for_each_entry ( cq, &ibdev->cqs, list )
870 ib_poll_cq ( ibdev, cq );
871 }
872
873 /**
874 * Single-step the Infiniband event queue
875 *
876 * @v process Infiniband event queue process
877 */
878 static void ib_step ( struct process *process __unused ) {
879 struct ib_device *ibdev;
880
881 list_for_each_entry ( ibdev, &open_ib_devices, open_list )
882 ib_poll_eq ( ibdev );
883 }
884
885 /** Infiniband event queue process */
886 PERMANENT_PROCESS ( ib_process, ib_step );
887
888 /***************************************************************************
889 *
890 * Infiniband device creation/destruction
891 *
892 ***************************************************************************
893 */
894
895 /**
896 * Allocate Infiniband device
897 *
898 * @v priv_size Size of driver private data area
899 * @ret ibdev Infiniband device, or NULL
900 */
901 struct ib_device * alloc_ibdev ( size_t priv_size ) {
902 struct ib_device *ibdev;
903 void *drv_priv;
904 size_t total_len;
905
906 total_len = ( sizeof ( *ibdev ) + priv_size );
907 ibdev = zalloc ( total_len );
908 if ( ibdev ) {
909 drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
910 ib_set_drvdata ( ibdev, drv_priv );
911 INIT_LIST_HEAD ( &ibdev->list );
912 INIT_LIST_HEAD ( &ibdev->open_list );
913 INIT_LIST_HEAD ( &ibdev->cqs );
914 INIT_LIST_HEAD ( &ibdev->qps );
915 ibdev->port_state = IB_PORT_STATE_DOWN;
916 ibdev->lid = IB_LID_NONE;
917 ibdev->pkey = IB_PKEY_DEFAULT;
918 }
919 return ibdev;
920 }
921
922 /**
923 * Register Infiniband device
924 *
925 * @v ibdev Infiniband device
926 * @ret rc Return status code
927 */
928 int register_ibdev ( struct ib_device *ibdev ) {
929 struct ib_driver *driver;
930 int rc;
931
932 /* Add to device list */
933 ibdev_get ( ibdev );
934 list_add_tail ( &ibdev->list, &ib_devices );
935 DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
936 ibdev->dev->name );
937
938 /* Probe device */
939 for_each_table_entry ( driver, IB_DRIVERS ) {
940 if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
941 DBGC ( ibdev, "IBDEV %p could not add %s device: %s\n",
942 ibdev, driver->name, strerror ( rc ) );
943 goto err_probe;
944 }
945 }
946
947 return 0;
948
949 err_probe:
950 for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
951 driver->remove ( ibdev );
952 list_del ( &ibdev->list );
953 ibdev_put ( ibdev );
954 return rc;
955 }
956
957 /**
958 * Unregister Infiniband device
959 *
960 * @v ibdev Infiniband device
961 */
962 void unregister_ibdev ( struct ib_device *ibdev ) {
963 struct ib_driver *driver;
964
965 /* Remove device */
966 for_each_table_entry_reverse ( driver, IB_DRIVERS )
967 driver->remove ( ibdev );
968
969 /* Remove from device list */
970 list_del ( &ibdev->list );
971 ibdev_put ( ibdev );
972 DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
973 }
974
975 /**
976 * Find Infiniband device by GID
977 *
978 * @v gid GID
979 * @ret ibdev Infiniband device, or NULL
980 */
981 struct ib_device * find_ibdev ( union ib_gid *gid ) {
982 struct ib_device *ibdev;
983
984 for_each_ibdev ( ibdev ) {
985 if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
986 return ibdev;
987 }
988 return NULL;
989 }
990
991 /**
992 * Get most recently opened Infiniband device
993 *
994 * @ret ibdev Most recently opened Infiniband device, or NULL
995 */
996 struct ib_device * last_opened_ibdev ( void ) {
997 struct ib_device *ibdev;
998
999 ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
1000 open_list );
1001 if ( ! ibdev )
1002 return NULL;
1003
1004 assert ( ibdev->open_count != 0 );
1005 return ibdev;
1006 }
1007
1008 /* Drag in objects via register_ibdev() */
1009 REQUIRING_SYMBOL ( register_ibdev );
1010
1011 /* Drag in Infiniband configuration */
1012 REQUIRE_OBJECT ( config_infiniband );
1013
1014 /* Drag in IPoIB */
1015 REQUIRE_OBJECT ( ipoib );