[infiniband] Parse MLID, rate, and SL from multicast membership record
[ipxe.git] / src / net / infiniband.c
1 /*
2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
22 */
23
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <byteswap.h>
32 #include <errno.h>
33 #include <assert.h>
34 #include <ipxe/list.h>
35 #include <ipxe/errortab.h>
36 #include <ipxe/if_arp.h>
37 #include <ipxe/netdevice.h>
38 #include <ipxe/iobuf.h>
39 #include <ipxe/process.h>
40 #include <ipxe/profile.h>
41 #include <ipxe/infiniband.h>
42 #include <ipxe/ib_mi.h>
43 #include <ipxe/ib_sma.h>
44
45 /** @file
46 *
47 * Infiniband protocol
48 *
49 */
50
51 /** List of Infiniband devices */
52 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
53
54 /** List of open Infiniband devices, in reverse order of opening */
55 static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
56
57 /** Infiniband device index */
58 static unsigned int ibdev_index = 0;
59
60 /** Post send work queue entry profiler */
61 static struct profiler ib_post_send_profiler __profiler =
62 { .name = "ib.post_send" };
63
64 /** Post receive work queue entry profiler */
65 static struct profiler ib_post_recv_profiler __profiler =
66 { .name = "ib.post_recv" };
67
68 /* Disambiguate the various possible EINPROGRESSes */
69 #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
70 #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
71 ( EINFO_EINPROGRESS, 0x01, "Initialising" )
72 #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
73 #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
74 ( EINFO_EINPROGRESS, 0x02, "Armed" )
75
76 /** Human-readable message for the link statuses */
77 struct errortab infiniband_errors[] __errortab = {
78 __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
79 __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
80 };
81
82 /***************************************************************************
83 *
84 * Completion queues
85 *
86 ***************************************************************************
87 */
88
89 /**
90 * Create completion queue
91 *
92 * @v ibdev Infiniband device
93 * @v num_cqes Number of completion queue entries
94 * @v op Completion queue operations
95 * @ret cq New completion queue
96 */
97 struct ib_completion_queue *
98 ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
99 struct ib_completion_queue_operations *op ) {
100 struct ib_completion_queue *cq;
101 int rc;
102
103 DBGC ( ibdev, "IBDEV %s creating completion queue\n", ibdev->name );
104
105 /* Allocate and initialise data structure */
106 cq = zalloc ( sizeof ( *cq ) );
107 if ( ! cq )
108 goto err_alloc_cq;
109 cq->ibdev = ibdev;
110 list_add ( &cq->list, &ibdev->cqs );
111 cq->num_cqes = num_cqes;
112 INIT_LIST_HEAD ( &cq->work_queues );
113 cq->op = op;
114
115 /* Perform device-specific initialisation and get CQN */
116 if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
117 DBGC ( ibdev, "IBDEV %s could not initialise completion "
118 "queue: %s\n", ibdev->name, strerror ( rc ) );
119 goto err_dev_create_cq;
120 }
121
122 DBGC ( ibdev, "IBDEV %s created %d-entry completion queue %p (%p) "
123 "with CQN %#lx\n", ibdev->name, num_cqes, cq,
124 ib_cq_get_drvdata ( cq ), cq->cqn );
125 return cq;
126
127 ibdev->op->destroy_cq ( ibdev, cq );
128 err_dev_create_cq:
129 list_del ( &cq->list );
130 free ( cq );
131 err_alloc_cq:
132 return NULL;
133 }
134
135 /**
136 * Destroy completion queue
137 *
138 * @v ibdev Infiniband device
139 * @v cq Completion queue
140 */
141 void ib_destroy_cq ( struct ib_device *ibdev,
142 struct ib_completion_queue *cq ) {
143 DBGC ( ibdev, "IBDEV %s destroying completion queue %#lx\n",
144 ibdev->name, cq->cqn );
145 assert ( list_empty ( &cq->work_queues ) );
146 ibdev->op->destroy_cq ( ibdev, cq );
147 list_del ( &cq->list );
148 free ( cq );
149 }
150
151 /**
152 * Poll completion queue
153 *
154 * @v ibdev Infiniband device
155 * @v cq Completion queue
156 */
157 void ib_poll_cq ( struct ib_device *ibdev,
158 struct ib_completion_queue *cq ) {
159 struct ib_work_queue *wq;
160
161 /* Poll completion queue */
162 ibdev->op->poll_cq ( ibdev, cq );
163
164 /* Refill receive work queues */
165 list_for_each_entry ( wq, &cq->work_queues, list ) {
166 if ( ! wq->is_send )
167 ib_refill_recv ( ibdev, wq->qp );
168 }
169 }
170
171 /***************************************************************************
172 *
173 * Work queues
174 *
175 ***************************************************************************
176 */
177
178 /**
179 * Create queue pair
180 *
181 * @v ibdev Infiniband device
182 * @v type Queue pair type
183 * @v num_send_wqes Number of send work queue entries
184 * @v send_cq Send completion queue
185 * @v num_recv_wqes Number of receive work queue entries
186 * @v recv_cq Receive completion queue
187 * @v op Queue pair operations
188 * @ret qp Queue pair
189 *
190 * The queue pair will be left in the INIT state; you must call
191 * ib_modify_qp() before it is ready to use for sending and receiving.
192 */
193 struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
194 enum ib_queue_pair_type type,
195 unsigned int num_send_wqes,
196 struct ib_completion_queue *send_cq,
197 unsigned int num_recv_wqes,
198 struct ib_completion_queue *recv_cq,
199 struct ib_queue_pair_operations *op ) {
200 struct ib_queue_pair *qp;
201 size_t total_size;
202 int rc;
203
204 DBGC ( ibdev, "IBDEV %s creating queue pair\n", ibdev->name );
205
206 /* Allocate and initialise data structure */
207 total_size = ( sizeof ( *qp ) +
208 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
209 ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
210 qp = zalloc ( total_size );
211 if ( ! qp )
212 goto err_alloc_qp;
213 qp->ibdev = ibdev;
214 list_add ( &qp->list, &ibdev->qps );
215 qp->type = type;
216 qp->send.qp = qp;
217 qp->send.is_send = 1;
218 qp->send.cq = send_cq;
219 list_add ( &qp->send.list, &send_cq->work_queues );
220 qp->send.psn = ( random() & 0xffffffUL );
221 qp->send.num_wqes = num_send_wqes;
222 qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
223 qp->recv.qp = qp;
224 qp->recv.cq = recv_cq;
225 list_add ( &qp->recv.list, &recv_cq->work_queues );
226 qp->recv.psn = ( random() & 0xffffffUL );
227 qp->recv.num_wqes = num_recv_wqes;
228 qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
229 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
230 INIT_LIST_HEAD ( &qp->mgids );
231 qp->op = op;
232
233 /* Perform device-specific initialisation and get QPN */
234 if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
235 DBGC ( ibdev, "IBDEV %s could not initialise queue pair: "
236 "%s\n", ibdev->name, strerror ( rc ) );
237 goto err_dev_create_qp;
238 }
239 DBGC ( ibdev, "IBDEV %s created queue pair %p (%p) with QPN %#lx\n",
240 ibdev->name, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
241 DBGC ( ibdev, "IBDEV %s QPN %#lx has %d send entries at [%p,%p)\n",
242 ibdev->name, qp->qpn, num_send_wqes, qp->send.iobufs,
243 qp->recv.iobufs );
244 DBGC ( ibdev, "IBDEV %s QPN %#lx has %d receive entries at [%p,%p)\n",
245 ibdev->name, qp->qpn, num_recv_wqes, qp->recv.iobufs,
246 ( ( ( void * ) qp ) + total_size ) );
247
248 /* Calculate externally-visible QPN */
249 switch ( type ) {
250 case IB_QPT_SMI:
251 qp->ext_qpn = IB_QPN_SMI;
252 break;
253 case IB_QPT_GSI:
254 qp->ext_qpn = IB_QPN_GSI;
255 break;
256 default:
257 qp->ext_qpn = qp->qpn;
258 break;
259 }
260 if ( qp->ext_qpn != qp->qpn ) {
261 DBGC ( ibdev, "IBDEV %s QPN %#lx has external QPN %#lx\n",
262 ibdev->name, qp->qpn, qp->ext_qpn );
263 }
264
265 return qp;
266
267 ibdev->op->destroy_qp ( ibdev, qp );
268 err_dev_create_qp:
269 list_del ( &qp->send.list );
270 list_del ( &qp->recv.list );
271 list_del ( &qp->list );
272 free ( qp );
273 err_alloc_qp:
274 return NULL;
275 }
276
277 /**
278 * Modify queue pair
279 *
280 * @v ibdev Infiniband device
281 * @v qp Queue pair
282 * @ret rc Return status code
283 */
284 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
285 int rc;
286
287 DBGC ( ibdev, "IBDEV %s modifying QPN %#lx\n", ibdev->name, qp->qpn );
288
289 if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
290 DBGC ( ibdev, "IBDEV %s could not modify QPN %#lx: %s\n",
291 ibdev->name, qp->qpn, strerror ( rc ) );
292 return rc;
293 }
294
295 return 0;
296 }
297
298 /**
299 * Destroy queue pair
300 *
301 * @v ibdev Infiniband device
302 * @v qp Queue pair
303 */
304 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
305 struct io_buffer *iobuf;
306 unsigned int i;
307
308 DBGC ( ibdev, "IBDEV %s destroying QPN %#lx\n",
309 ibdev->name, qp->qpn );
310
311 assert ( list_empty ( &qp->mgids ) );
312
313 /* Perform device-specific destruction */
314 ibdev->op->destroy_qp ( ibdev, qp );
315
316 /* Complete any remaining I/O buffers with errors */
317 for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
318 if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
319 ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
320 }
321 for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
322 if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
323 ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
324 -ECANCELED );
325 }
326 }
327
328 /* Remove work queues from completion queue */
329 list_del ( &qp->send.list );
330 list_del ( &qp->recv.list );
331
332 /* Free QP */
333 list_del ( &qp->list );
334 free ( qp );
335 }
336
337 /**
338 * Find queue pair by QPN
339 *
340 * @v ibdev Infiniband device
341 * @v qpn Queue pair number
342 * @ret qp Queue pair, or NULL
343 */
344 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
345 unsigned long qpn ) {
346 struct ib_queue_pair *qp;
347
348 list_for_each_entry ( qp, &ibdev->qps, list ) {
349 if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
350 return qp;
351 }
352 return NULL;
353 }
354
355 /**
356 * Find queue pair by multicast GID
357 *
358 * @v ibdev Infiniband device
359 * @v gid Multicast GID
360 * @ret qp Queue pair, or NULL
361 */
362 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
363 union ib_gid *gid ) {
364 struct ib_queue_pair *qp;
365 struct ib_multicast_gid *mgid;
366
367 list_for_each_entry ( qp, &ibdev->qps, list ) {
368 list_for_each_entry ( mgid, &qp->mgids, list ) {
369 if ( memcmp ( &mgid->gid, gid,
370 sizeof ( mgid->gid ) ) == 0 ) {
371 return qp;
372 }
373 }
374 }
375 return NULL;
376 }
377
378 /**
379 * Find work queue belonging to completion queue
380 *
381 * @v cq Completion queue
382 * @v qpn Queue pair number
383 * @v is_send Find send work queue (rather than receive)
384 * @ret wq Work queue, or NULL if not found
385 */
386 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
387 unsigned long qpn, int is_send ) {
388 struct ib_work_queue *wq;
389
390 list_for_each_entry ( wq, &cq->work_queues, list ) {
391 if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
392 return wq;
393 }
394 return NULL;
395 }
396
397 /**
398 * Post send work queue entry
399 *
400 * @v ibdev Infiniband device
401 * @v qp Queue pair
402 * @v dest Destination address vector
403 * @v iobuf I/O buffer
404 * @ret rc Return status code
405 */
406 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
407 struct ib_address_vector *dest,
408 struct io_buffer *iobuf ) {
409 struct ib_address_vector dest_copy;
410 int rc;
411
412 /* Start profiling */
413 profile_start ( &ib_post_send_profiler );
414
415 /* Check queue fill level */
416 if ( qp->send.fill >= qp->send.num_wqes ) {
417 DBGC ( ibdev, "IBDEV %s QPN %#lx send queue full\n",
418 ibdev->name, qp->qpn );
419 return -ENOBUFS;
420 }
421
422 /* Use default address vector if none specified */
423 if ( ! dest )
424 dest = &qp->av;
425
426 /* Make modifiable copy of address vector */
427 memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
428 dest = &dest_copy;
429
430 /* Fill in optional parameters in address vector */
431 if ( ! dest->qkey )
432 dest->qkey = qp->qkey;
433 if ( ! dest->rate )
434 dest->rate = IB_RATE_2_5;
435
436 /* Post to hardware */
437 if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
438 DBGC ( ibdev, "IBDEV %s QPN %#lx could not post send WQE: "
439 "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
440 return rc;
441 }
442
443 /* Increase fill level */
444 qp->send.fill++;
445
446 /* Stop profiling */
447 profile_stop ( &ib_post_send_profiler );
448
449 return 0;
450 }
451
452 /**
453 * Post receive work queue entry
454 *
455 * @v ibdev Infiniband device
456 * @v qp Queue pair
457 * @v iobuf I/O buffer
458 * @ret rc Return status code
459 */
460 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
461 struct io_buffer *iobuf ) {
462 int rc;
463
464 /* Start profiling */
465 profile_start ( &ib_post_recv_profiler );
466
467 /* Check packet length */
468 if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
469 DBGC ( ibdev, "IBDEV %s QPN %#lx wrong RX buffer size (%zd)\n",
470 ibdev->name, qp->qpn, iob_tailroom ( iobuf ) );
471 return -EINVAL;
472 }
473
474 /* Check queue fill level */
475 if ( qp->recv.fill >= qp->recv.num_wqes ) {
476 DBGC ( ibdev, "IBDEV %s QPN %#lx receive queue full\n",
477 ibdev->name, qp->qpn );
478 return -ENOBUFS;
479 }
480
481 /* Post to hardware */
482 if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
483 DBGC ( ibdev, "IBDEV %s QPN %#lx could not post receive WQE: "
484 "%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
485 return rc;
486 }
487
488 /* Increase fill level */
489 qp->recv.fill++;
490
491 /* Stop profiling */
492 profile_stop ( &ib_post_recv_profiler );
493
494 return 0;
495 }
496
497 /**
498 * Complete send work queue entry
499 *
500 * @v ibdev Infiniband device
501 * @v qp Queue pair
502 * @v iobuf I/O buffer
503 * @v rc Completion status code
504 */
505 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
506 struct io_buffer *iobuf, int rc ) {
507
508 if ( qp->send.cq->op->complete_send ) {
509 qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
510 } else {
511 free_iob ( iobuf );
512 }
513 qp->send.fill--;
514 }
515
516 /**
517 * Complete receive work queue entry
518 *
519 * @v ibdev Infiniband device
520 * @v qp Queue pair
521 * @v dest Destination address vector, or NULL
522 * @v source Source address vector, or NULL
523 * @v iobuf I/O buffer
524 * @v rc Completion status code
525 */
526 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
527 struct ib_address_vector *dest,
528 struct ib_address_vector *source,
529 struct io_buffer *iobuf, int rc ) {
530
531 if ( qp->recv.cq->op->complete_recv ) {
532 qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
533 iobuf, rc );
534 } else {
535 free_iob ( iobuf );
536 }
537 qp->recv.fill--;
538 }
539
540 /**
541 * Refill receive work queue
542 *
543 * @v ibdev Infiniband device
544 * @v qp Queue pair
545 */
546 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
547 struct io_buffer *iobuf;
548 int rc;
549
550 /* Keep filling while unfilled entries remain */
551 while ( qp->recv.fill < qp->recv.num_wqes ) {
552
553 /* Allocate I/O buffer */
554 iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
555 if ( ! iobuf ) {
556 /* Non-fatal; we will refill on next attempt */
557 return;
558 }
559
560 /* Post I/O buffer */
561 if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
562 DBGC ( ibdev, "IBDEV %s could not refill: %s\n",
563 ibdev->name, strerror ( rc ) );
564 free_iob ( iobuf );
565 /* Give up */
566 return;
567 }
568 }
569 }
570
571 /***************************************************************************
572 *
573 * Link control
574 *
575 ***************************************************************************
576 */
577
578 /**
579 * Get link state
580 *
581 * @v ibdev Infiniband device
582 * @ret rc Link status code
583 */
584 int ib_link_rc ( struct ib_device *ibdev ) {
585 switch ( ibdev->port_state ) {
586 case IB_PORT_STATE_DOWN: return -ENOTCONN;
587 case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
588 case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
589 case IB_PORT_STATE_ACTIVE: return 0;
590 default: return -EINVAL;
591 }
592 }
593
594 /**
595 * Textual representation of Infiniband link state
596 *
597 * @v ibdev Infiniband device
598 * @ret link_text Link state text
599 */
600 static const char * ib_link_state_text ( struct ib_device *ibdev ) {
601 switch ( ibdev->port_state ) {
602 case IB_PORT_STATE_DOWN: return "DOWN";
603 case IB_PORT_STATE_INIT: return "INIT";
604 case IB_PORT_STATE_ARMED: return "ARMED";
605 case IB_PORT_STATE_ACTIVE: return "ACTIVE";
606 default: return "UNKNOWN";
607 }
608 }
609
610 /**
611 * Notify drivers of Infiniband device or link state change
612 *
613 * @v ibdev Infiniband device
614 */
615 static void ib_notify ( struct ib_device *ibdev ) {
616 struct ib_driver *driver;
617
618 for_each_table_entry ( driver, IB_DRIVERS )
619 driver->notify ( ibdev );
620 }
621
622 /**
623 * Notify of Infiniband link state change
624 *
625 * @v ibdev Infiniband device
626 */
627 void ib_link_state_changed ( struct ib_device *ibdev ) {
628
629 DBGC ( ibdev, "IBDEV %s link state is %s\n",
630 ibdev->name, ib_link_state_text ( ibdev ) );
631
632 /* Notify drivers of link state change */
633 ib_notify ( ibdev );
634 }
635
636 /**
637 * Open port
638 *
639 * @v ibdev Infiniband device
640 * @ret rc Return status code
641 */
642 int ib_open ( struct ib_device *ibdev ) {
643 int rc;
644
645 /* Increment device open request counter */
646 if ( ibdev->open_count++ > 0 ) {
647 /* Device was already open; do nothing */
648 return 0;
649 }
650
651 /* Open device */
652 if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
653 DBGC ( ibdev, "IBDEV %s could not open: %s\n",
654 ibdev->name, strerror ( rc ) );
655 goto err_open;
656 }
657
658 /* Create subnet management interface */
659 ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
660 if ( ! ibdev->smi ) {
661 DBGC ( ibdev, "IBDEV %s could not create SMI\n", ibdev->name );
662 rc = -ENOMEM;
663 goto err_create_smi;
664 }
665
666 /* Create subnet management agent */
667 if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
668 DBGC ( ibdev, "IBDEV %s could not create SMA: %s\n",
669 ibdev->name, strerror ( rc ) );
670 goto err_create_sma;
671 }
672
673 /* Create general services interface */
674 ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
675 if ( ! ibdev->gsi ) {
676 DBGC ( ibdev, "IBDEV %s could not create GSI\n", ibdev->name );
677 rc = -ENOMEM;
678 goto err_create_gsi;
679 }
680
681 /* Add to head of open devices list */
682 list_add ( &ibdev->open_list, &open_ib_devices );
683
684 /* Notify drivers of device state change */
685 ib_notify ( ibdev );
686
687 assert ( ibdev->open_count == 1 );
688 return 0;
689
690 ib_destroy_mi ( ibdev, ibdev->gsi );
691 err_create_gsi:
692 ib_destroy_sma ( ibdev, ibdev->smi );
693 err_create_sma:
694 ib_destroy_mi ( ibdev, ibdev->smi );
695 err_create_smi:
696 ibdev->op->close ( ibdev );
697 err_open:
698 assert ( ibdev->open_count == 1 );
699 ibdev->open_count = 0;
700 return rc;
701 }
702
703 /**
704 * Close port
705 *
706 * @v ibdev Infiniband device
707 */
708 void ib_close ( struct ib_device *ibdev ) {
709
710 /* Decrement device open request counter */
711 ibdev->open_count--;
712
713 /* Close device if this was the last remaining requested opening */
714 if ( ibdev->open_count == 0 ) {
715 ib_notify ( ibdev );
716 list_del ( &ibdev->open_list );
717 ib_destroy_mi ( ibdev, ibdev->gsi );
718 ib_destroy_sma ( ibdev, ibdev->smi );
719 ib_destroy_mi ( ibdev, ibdev->smi );
720 ibdev->op->close ( ibdev );
721 ibdev->port_state = IB_PORT_STATE_DOWN;
722 }
723 }
724
725 /***************************************************************************
726 *
727 * Multicast
728 *
729 ***************************************************************************
730 */
731
732 /**
733 * Attach to multicast group
734 *
735 * @v ibdev Infiniband device
736 * @v qp Queue pair
737 * @v gid Multicast GID
738 * @ret rc Return status code
739 *
740 * Note that this function handles only the local device's attachment
741 * to the multicast GID; it does not issue the relevant MADs to join
742 * the multicast group on the subnet.
743 */
744 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
745 union ib_gid *gid ) {
746 struct ib_multicast_gid *mgid;
747 int rc;
748
749 /* Sanity check */
750 assert ( qp != NULL );
751
752 /* Add to software multicast GID list */
753 mgid = zalloc ( sizeof ( *mgid ) );
754 if ( ! mgid ) {
755 rc = -ENOMEM;
756 goto err_alloc_mgid;
757 }
758 memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
759 list_add ( &mgid->list, &qp->mgids );
760
761 /* Add to hardware multicast GID list */
762 if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
763 goto err_dev_mcast_attach;
764
765 return 0;
766
767 err_dev_mcast_attach:
768 list_del ( &mgid->list );
769 free ( mgid );
770 err_alloc_mgid:
771 return rc;
772 }
773
774 /**
775 * Detach from multicast group
776 *
777 * @v ibdev Infiniband device
778 * @v qp Queue pair
779 * @v gid Multicast GID
780 */
781 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
782 union ib_gid *gid ) {
783 struct ib_multicast_gid *mgid;
784
785 /* Sanity check */
786 assert ( qp != NULL );
787
788 /* Remove from hardware multicast GID list */
789 ibdev->op->mcast_detach ( ibdev, qp, gid );
790
791 /* Remove from software multicast GID list */
792 list_for_each_entry ( mgid, &qp->mgids, list ) {
793 if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
794 list_del ( &mgid->list );
795 free ( mgid );
796 break;
797 }
798 }
799 }
800
801 /***************************************************************************
802 *
803 * Miscellaneous
804 *
805 ***************************************************************************
806 */
807
808 /**
809 * Count Infiniband HCA ports
810 *
811 * @v ibdev Infiniband device
812 * @ret num_ports Number of ports
813 */
814 int ib_count_ports ( struct ib_device *ibdev ) {
815 struct ib_device *tmp;
816 int num_ports = 0;
817
818 /* Search for IB devices with the same physical device to
819 * identify port count.
820 */
821 for_each_ibdev ( tmp ) {
822 if ( tmp->dev == ibdev->dev )
823 num_ports++;
824 }
825 return num_ports;
826 }
827
828 /**
829 * Set port information
830 *
831 * @v ibdev Infiniband device
832 * @v mad Set port information MAD
833 */
834 int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
835 int rc;
836
837 /* Adapters with embedded SMAs do not need to support this method */
838 if ( ! ibdev->op->set_port_info ) {
839 DBGC ( ibdev, "IBDEV %s does not support setting port "
840 "information\n", ibdev->name );
841 return -ENOTSUP;
842 }
843
844 if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
845 DBGC ( ibdev, "IBDEV %s could not set port information: %s\n",
846 ibdev->name, strerror ( rc ) );
847 return rc;
848 }
849
850 return 0;
851 };
852
853 /**
854 * Set partition key table
855 *
856 * @v ibdev Infiniband device
857 * @v mad Set partition key table MAD
858 */
859 int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
860 int rc;
861
862 /* Adapters with embedded SMAs do not need to support this method */
863 if ( ! ibdev->op->set_pkey_table ) {
864 DBGC ( ibdev, "IBDEV %s does not support setting partition "
865 "key table\n", ibdev->name );
866 return -ENOTSUP;
867 }
868
869 if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
870 DBGC ( ibdev, "IBDEV %s could not set partition key table: "
871 "%s\n", ibdev->name, strerror ( rc ) );
872 return rc;
873 }
874
875 return 0;
876 };
877
878 /***************************************************************************
879 *
880 * Event queues
881 *
882 ***************************************************************************
883 */
884
885 /**
886 * Poll event queue
887 *
888 * @v ibdev Infiniband device
889 */
890 void ib_poll_eq ( struct ib_device *ibdev ) {
891 struct ib_completion_queue *cq;
892
893 /* Poll device's event queue */
894 ibdev->op->poll_eq ( ibdev );
895
896 /* Poll all completion queues */
897 list_for_each_entry ( cq, &ibdev->cqs, list )
898 ib_poll_cq ( ibdev, cq );
899 }
900
901 /**
902 * Single-step the Infiniband event queue
903 *
904 * @v process Infiniband event queue process
905 */
906 static void ib_step ( struct process *process __unused ) {
907 struct ib_device *ibdev;
908
909 list_for_each_entry ( ibdev, &open_ib_devices, open_list )
910 ib_poll_eq ( ibdev );
911 }
912
913 /** Infiniband event queue process */
914 PERMANENT_PROCESS ( ib_process, ib_step );
915
916 /***************************************************************************
917 *
918 * Infiniband device creation/destruction
919 *
920 ***************************************************************************
921 */
922
923 /**
924 * Allocate Infiniband device
925 *
926 * @v priv_size Size of driver private data area
927 * @ret ibdev Infiniband device, or NULL
928 */
929 struct ib_device * alloc_ibdev ( size_t priv_size ) {
930 struct ib_device *ibdev;
931 void *drv_priv;
932 size_t total_len;
933
934 total_len = ( sizeof ( *ibdev ) + priv_size );
935 ibdev = zalloc ( total_len );
936 if ( ibdev ) {
937 drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
938 ib_set_drvdata ( ibdev, drv_priv );
939 INIT_LIST_HEAD ( &ibdev->list );
940 INIT_LIST_HEAD ( &ibdev->open_list );
941 INIT_LIST_HEAD ( &ibdev->cqs );
942 INIT_LIST_HEAD ( &ibdev->qps );
943 ibdev->port_state = IB_PORT_STATE_DOWN;
944 ibdev->lid = IB_LID_NONE;
945 ibdev->pkey = IB_PKEY_DEFAULT;
946 }
947 return ibdev;
948 }
949
950 /**
951 * Register Infiniband device
952 *
953 * @v ibdev Infiniband device
954 * @ret rc Return status code
955 */
956 int register_ibdev ( struct ib_device *ibdev ) {
957 struct ib_driver *driver;
958 int rc;
959
960 /* Record device index and create device name */
961 if ( ibdev->name[0] == '\0' ) {
962 snprintf ( ibdev->name, sizeof ( ibdev->name ), "inf%d",
963 ibdev_index );
964 }
965 ibdev->index = ++ibdev_index;
966
967 /* Add to device list */
968 ibdev_get ( ibdev );
969 list_add_tail ( &ibdev->list, &ib_devices );
970 DBGC ( ibdev, "IBDEV %s registered (phys %s)\n", ibdev->name,
971 ibdev->dev->name );
972
973 /* Probe device */
974 for_each_table_entry ( driver, IB_DRIVERS ) {
975 if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
976 DBGC ( ibdev, "IBDEV %s could not add %s device: %s\n",
977 ibdev->name, driver->name, strerror ( rc ) );
978 goto err_probe;
979 }
980 }
981
982 return 0;
983
984 err_probe:
985 for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
986 driver->remove ( ibdev );
987 list_del ( &ibdev->list );
988 ibdev_put ( ibdev );
989 return rc;
990 }
991
992 /**
993 * Unregister Infiniband device
994 *
995 * @v ibdev Infiniband device
996 */
997 void unregister_ibdev ( struct ib_device *ibdev ) {
998 struct ib_driver *driver;
999
1000 /* Remove device */
1001 for_each_table_entry_reverse ( driver, IB_DRIVERS )
1002 driver->remove ( ibdev );
1003
1004 /* Remove from device list */
1005 list_del ( &ibdev->list );
1006 ibdev_put ( ibdev );
1007 DBGC ( ibdev, "IBDEV %s unregistered\n", ibdev->name );
1008
1009 /* Reset device index if no devices remain */
1010 if ( list_empty ( &ib_devices ) )
1011 ibdev_index = 0;
1012 }
1013
1014 /**
1015 * Find Infiniband device by GID
1016 *
1017 * @v gid GID
1018 * @ret ibdev Infiniband device, or NULL
1019 */
1020 struct ib_device * find_ibdev ( union ib_gid *gid ) {
1021 struct ib_device *ibdev;
1022
1023 for_each_ibdev ( ibdev ) {
1024 if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
1025 return ibdev;
1026 }
1027 return NULL;
1028 }
1029
1030 /**
1031 * Get most recently opened Infiniband device
1032 *
1033 * @ret ibdev Most recently opened Infiniband device, or NULL
1034 */
1035 struct ib_device * last_opened_ibdev ( void ) {
1036 struct ib_device *ibdev;
1037
1038 ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
1039 open_list );
1040 if ( ! ibdev )
1041 return NULL;
1042
1043 assert ( ibdev->open_count != 0 );
1044 return ibdev;
1045 }
1046
1047 /* Drag in objects via register_ibdev() */
1048 REQUIRING_SYMBOL ( register_ibdev );
1049
1050 /* Drag in Infiniband configuration */
1051 REQUIRE_OBJECT ( config_infiniband );
1052
1053 /* Drag in IPoIB */
1054 REQUIRE_OBJECT ( ipoib );