[prefix] Use garbage-collectable section names
[ipxe.git] / src / net / infiniband.c
1 /*
2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
22 */
23
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <byteswap.h>
32 #include <errno.h>
33 #include <assert.h>
34 #include <ipxe/list.h>
35 #include <ipxe/errortab.h>
36 #include <ipxe/if_arp.h>
37 #include <ipxe/netdevice.h>
38 #include <ipxe/iobuf.h>
39 #include <ipxe/process.h>
40 #include <ipxe/profile.h>
41 #include <ipxe/infiniband.h>
42 #include <ipxe/ib_mi.h>
43 #include <ipxe/ib_sma.h>
44
45 /** @file
46 *
47 * Infiniband protocol
48 *
49 */
50
51 /** List of Infiniband devices */
52 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
53
54 /** List of open Infiniband devices, in reverse order of opening */
55 static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
56
57 /** Post send work queue entry profiler */
58 static struct profiler ib_post_send_profiler __profiler =
59 { .name = "ib.post_send" };
60
61 /** Post receive work queue entry profiler */
62 static struct profiler ib_post_recv_profiler __profiler =
63 { .name = "ib.post_recv" };
64
65 /* Disambiguate the various possible EINPROGRESSes */
66 #define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
67 #define EINFO_EINPROGRESS_INIT __einfo_uniqify \
68 ( EINFO_EINPROGRESS, 0x01, "Initialising" )
69 #define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
70 #define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
71 ( EINFO_EINPROGRESS, 0x02, "Armed" )
72
73 /** Human-readable message for the link statuses */
74 struct errortab infiniband_errors[] __errortab = {
75 __einfo_errortab ( EINFO_EINPROGRESS_INIT ),
76 __einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
77 };
78
79 /***************************************************************************
80 *
81 * Completion queues
82 *
83 ***************************************************************************
84 */
85
86 /**
87 * Create completion queue
88 *
89 * @v ibdev Infiniband device
90 * @v num_cqes Number of completion queue entries
91 * @v op Completion queue operations
92 * @ret cq New completion queue
93 */
94 struct ib_completion_queue *
95 ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
96 struct ib_completion_queue_operations *op ) {
97 struct ib_completion_queue *cq;
98 int rc;
99
100 DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
101
102 /* Allocate and initialise data structure */
103 cq = zalloc ( sizeof ( *cq ) );
104 if ( ! cq )
105 goto err_alloc_cq;
106 cq->ibdev = ibdev;
107 list_add ( &cq->list, &ibdev->cqs );
108 cq->num_cqes = num_cqes;
109 INIT_LIST_HEAD ( &cq->work_queues );
110 cq->op = op;
111
112 /* Perform device-specific initialisation and get CQN */
113 if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
114 DBGC ( ibdev, "IBDEV %p could not initialise completion "
115 "queue: %s\n", ibdev, strerror ( rc ) );
116 goto err_dev_create_cq;
117 }
118
119 DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
120 "with CQN %#lx\n", ibdev, num_cqes, cq,
121 ib_cq_get_drvdata ( cq ), cq->cqn );
122 return cq;
123
124 ibdev->op->destroy_cq ( ibdev, cq );
125 err_dev_create_cq:
126 list_del ( &cq->list );
127 free ( cq );
128 err_alloc_cq:
129 return NULL;
130 }
131
132 /**
133 * Destroy completion queue
134 *
135 * @v ibdev Infiniband device
136 * @v cq Completion queue
137 */
138 void ib_destroy_cq ( struct ib_device *ibdev,
139 struct ib_completion_queue *cq ) {
140 DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
141 ibdev, cq->cqn );
142 assert ( list_empty ( &cq->work_queues ) );
143 ibdev->op->destroy_cq ( ibdev, cq );
144 list_del ( &cq->list );
145 free ( cq );
146 }
147
148 /**
149 * Poll completion queue
150 *
151 * @v ibdev Infiniband device
152 * @v cq Completion queue
153 */
154 void ib_poll_cq ( struct ib_device *ibdev,
155 struct ib_completion_queue *cq ) {
156 struct ib_work_queue *wq;
157
158 /* Poll completion queue */
159 ibdev->op->poll_cq ( ibdev, cq );
160
161 /* Refill receive work queues */
162 list_for_each_entry ( wq, &cq->work_queues, list ) {
163 if ( ! wq->is_send )
164 ib_refill_recv ( ibdev, wq->qp );
165 }
166 }
167
168 /***************************************************************************
169 *
170 * Work queues
171 *
172 ***************************************************************************
173 */
174
175 /**
176 * Create queue pair
177 *
178 * @v ibdev Infiniband device
179 * @v type Queue pair type
180 * @v num_send_wqes Number of send work queue entries
181 * @v send_cq Send completion queue
182 * @v num_recv_wqes Number of receive work queue entries
183 * @v recv_cq Receive completion queue
184 * @v op Queue pair operations
185 * @ret qp Queue pair
186 *
187 * The queue pair will be left in the INIT state; you must call
188 * ib_modify_qp() before it is ready to use for sending and receiving.
189 */
190 struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
191 enum ib_queue_pair_type type,
192 unsigned int num_send_wqes,
193 struct ib_completion_queue *send_cq,
194 unsigned int num_recv_wqes,
195 struct ib_completion_queue *recv_cq,
196 struct ib_queue_pair_operations *op ) {
197 struct ib_queue_pair *qp;
198 size_t total_size;
199 int rc;
200
201 DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
202
203 /* Allocate and initialise data structure */
204 total_size = ( sizeof ( *qp ) +
205 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
206 ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
207 qp = zalloc ( total_size );
208 if ( ! qp )
209 goto err_alloc_qp;
210 qp->ibdev = ibdev;
211 list_add ( &qp->list, &ibdev->qps );
212 qp->type = type;
213 qp->send.qp = qp;
214 qp->send.is_send = 1;
215 qp->send.cq = send_cq;
216 list_add ( &qp->send.list, &send_cq->work_queues );
217 qp->send.psn = ( random() & 0xffffffUL );
218 qp->send.num_wqes = num_send_wqes;
219 qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
220 qp->recv.qp = qp;
221 qp->recv.cq = recv_cq;
222 list_add ( &qp->recv.list, &recv_cq->work_queues );
223 qp->recv.psn = ( random() & 0xffffffUL );
224 qp->recv.num_wqes = num_recv_wqes;
225 qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
226 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
227 INIT_LIST_HEAD ( &qp->mgids );
228 qp->op = op;
229
230 /* Perform device-specific initialisation and get QPN */
231 if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
232 DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
233 "%s\n", ibdev, strerror ( rc ) );
234 goto err_dev_create_qp;
235 }
236 DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
237 ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
238 DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
239 ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
240 qp->recv.iobufs );
241 DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
242 ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
243 ( ( ( void * ) qp ) + total_size ) );
244
245 /* Calculate externally-visible QPN */
246 switch ( type ) {
247 case IB_QPT_SMI:
248 qp->ext_qpn = IB_QPN_SMI;
249 break;
250 case IB_QPT_GSI:
251 qp->ext_qpn = IB_QPN_GSI;
252 break;
253 default:
254 qp->ext_qpn = qp->qpn;
255 break;
256 }
257 if ( qp->ext_qpn != qp->qpn ) {
258 DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
259 ibdev, qp->qpn, qp->ext_qpn );
260 }
261
262 return qp;
263
264 ibdev->op->destroy_qp ( ibdev, qp );
265 err_dev_create_qp:
266 list_del ( &qp->send.list );
267 list_del ( &qp->recv.list );
268 list_del ( &qp->list );
269 free ( qp );
270 err_alloc_qp:
271 return NULL;
272 }
273
274 /**
275 * Modify queue pair
276 *
277 * @v ibdev Infiniband device
278 * @v qp Queue pair
279 * @ret rc Return status code
280 */
281 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
282 int rc;
283
284 DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
285
286 if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
287 DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
288 ibdev, qp->qpn, strerror ( rc ) );
289 return rc;
290 }
291
292 return 0;
293 }
294
295 /**
296 * Destroy queue pair
297 *
298 * @v ibdev Infiniband device
299 * @v qp Queue pair
300 */
301 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
302 struct io_buffer *iobuf;
303 unsigned int i;
304
305 DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
306 ibdev, qp->qpn );
307
308 assert ( list_empty ( &qp->mgids ) );
309
310 /* Perform device-specific destruction */
311 ibdev->op->destroy_qp ( ibdev, qp );
312
313 /* Complete any remaining I/O buffers with errors */
314 for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
315 if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
316 ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
317 }
318 for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
319 if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
320 ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
321 -ECANCELED );
322 }
323 }
324
325 /* Remove work queues from completion queue */
326 list_del ( &qp->send.list );
327 list_del ( &qp->recv.list );
328
329 /* Free QP */
330 list_del ( &qp->list );
331 free ( qp );
332 }
333
334 /**
335 * Find queue pair by QPN
336 *
337 * @v ibdev Infiniband device
338 * @v qpn Queue pair number
339 * @ret qp Queue pair, or NULL
340 */
341 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
342 unsigned long qpn ) {
343 struct ib_queue_pair *qp;
344
345 list_for_each_entry ( qp, &ibdev->qps, list ) {
346 if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
347 return qp;
348 }
349 return NULL;
350 }
351
352 /**
353 * Find queue pair by multicast GID
354 *
355 * @v ibdev Infiniband device
356 * @v gid Multicast GID
357 * @ret qp Queue pair, or NULL
358 */
359 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
360 union ib_gid *gid ) {
361 struct ib_queue_pair *qp;
362 struct ib_multicast_gid *mgid;
363
364 list_for_each_entry ( qp, &ibdev->qps, list ) {
365 list_for_each_entry ( mgid, &qp->mgids, list ) {
366 if ( memcmp ( &mgid->gid, gid,
367 sizeof ( mgid->gid ) ) == 0 ) {
368 return qp;
369 }
370 }
371 }
372 return NULL;
373 }
374
375 /**
376 * Find work queue belonging to completion queue
377 *
378 * @v cq Completion queue
379 * @v qpn Queue pair number
380 * @v is_send Find send work queue (rather than receive)
381 * @ret wq Work queue, or NULL if not found
382 */
383 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
384 unsigned long qpn, int is_send ) {
385 struct ib_work_queue *wq;
386
387 list_for_each_entry ( wq, &cq->work_queues, list ) {
388 if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
389 return wq;
390 }
391 return NULL;
392 }
393
394 /**
395 * Post send work queue entry
396 *
397 * @v ibdev Infiniband device
398 * @v qp Queue pair
399 * @v dest Destination address vector
400 * @v iobuf I/O buffer
401 * @ret rc Return status code
402 */
403 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
404 struct ib_address_vector *dest,
405 struct io_buffer *iobuf ) {
406 struct ib_address_vector dest_copy;
407 int rc;
408
409 /* Start profiling */
410 profile_start ( &ib_post_send_profiler );
411
412 /* Check queue fill level */
413 if ( qp->send.fill >= qp->send.num_wqes ) {
414 DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
415 ibdev, qp->qpn );
416 return -ENOBUFS;
417 }
418
419 /* Use default address vector if none specified */
420 if ( ! dest )
421 dest = &qp->av;
422
423 /* Make modifiable copy of address vector */
424 memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
425 dest = &dest_copy;
426
427 /* Fill in optional parameters in address vector */
428 if ( ! dest->qkey )
429 dest->qkey = qp->qkey;
430 if ( ! dest->rate )
431 dest->rate = IB_RATE_2_5;
432
433 /* Post to hardware */
434 if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
435 DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
436 "%s\n", ibdev, qp->qpn, strerror ( rc ) );
437 return rc;
438 }
439
440 /* Increase fill level */
441 qp->send.fill++;
442
443 /* Stop profiling */
444 profile_stop ( &ib_post_send_profiler );
445
446 return 0;
447 }
448
449 /**
450 * Post receive work queue entry
451 *
452 * @v ibdev Infiniband device
453 * @v qp Queue pair
454 * @v iobuf I/O buffer
455 * @ret rc Return status code
456 */
457 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
458 struct io_buffer *iobuf ) {
459 int rc;
460
461 /* Start profiling */
462 profile_start ( &ib_post_recv_profiler );
463
464 /* Check packet length */
465 if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
466 DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
467 ibdev, qp->qpn, iob_tailroom ( iobuf ) );
468 return -EINVAL;
469 }
470
471 /* Check queue fill level */
472 if ( qp->recv.fill >= qp->recv.num_wqes ) {
473 DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
474 ibdev, qp->qpn );
475 return -ENOBUFS;
476 }
477
478 /* Post to hardware */
479 if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
480 DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
481 "%s\n", ibdev, qp->qpn, strerror ( rc ) );
482 return rc;
483 }
484
485 /* Increase fill level */
486 qp->recv.fill++;
487
488 /* Stop profiling */
489 profile_stop ( &ib_post_recv_profiler );
490
491 return 0;
492 }
493
494 /**
495 * Complete send work queue entry
496 *
497 * @v ibdev Infiniband device
498 * @v qp Queue pair
499 * @v iobuf I/O buffer
500 * @v rc Completion status code
501 */
502 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
503 struct io_buffer *iobuf, int rc ) {
504
505 if ( qp->send.cq->op->complete_send ) {
506 qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
507 } else {
508 free_iob ( iobuf );
509 }
510 qp->send.fill--;
511 }
512
513 /**
514 * Complete receive work queue entry
515 *
516 * @v ibdev Infiniband device
517 * @v qp Queue pair
518 * @v dest Destination address vector, or NULL
519 * @v source Source address vector, or NULL
520 * @v iobuf I/O buffer
521 * @v rc Completion status code
522 */
523 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
524 struct ib_address_vector *dest,
525 struct ib_address_vector *source,
526 struct io_buffer *iobuf, int rc ) {
527
528 if ( qp->recv.cq->op->complete_recv ) {
529 qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
530 iobuf, rc );
531 } else {
532 free_iob ( iobuf );
533 }
534 qp->recv.fill--;
535 }
536
537 /**
538 * Refill receive work queue
539 *
540 * @v ibdev Infiniband device
541 * @v qp Queue pair
542 */
543 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
544 struct io_buffer *iobuf;
545 int rc;
546
547 /* Keep filling while unfilled entries remain */
548 while ( qp->recv.fill < qp->recv.num_wqes ) {
549
550 /* Allocate I/O buffer */
551 iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
552 if ( ! iobuf ) {
553 /* Non-fatal; we will refill on next attempt */
554 return;
555 }
556
557 /* Post I/O buffer */
558 if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
559 DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
560 ibdev, strerror ( rc ) );
561 free_iob ( iobuf );
562 /* Give up */
563 return;
564 }
565 }
566 }
567
568 /***************************************************************************
569 *
570 * Link control
571 *
572 ***************************************************************************
573 */
574
575 /**
576 * Get link state
577 *
578 * @v ibdev Infiniband device
579 * @ret rc Link status code
580 */
581 int ib_link_rc ( struct ib_device *ibdev ) {
582 switch ( ibdev->port_state ) {
583 case IB_PORT_STATE_DOWN: return -ENOTCONN;
584 case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
585 case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
586 case IB_PORT_STATE_ACTIVE: return 0;
587 default: return -EINVAL;
588 }
589 }
590
591 /**
592 * Textual representation of Infiniband link state
593 *
594 * @v ibdev Infiniband device
595 * @ret link_text Link state text
596 */
597 static const char * ib_link_state_text ( struct ib_device *ibdev ) {
598 switch ( ibdev->port_state ) {
599 case IB_PORT_STATE_DOWN: return "DOWN";
600 case IB_PORT_STATE_INIT: return "INIT";
601 case IB_PORT_STATE_ARMED: return "ARMED";
602 case IB_PORT_STATE_ACTIVE: return "ACTIVE";
603 default: return "UNKNOWN";
604 }
605 }
606
607 /**
608 * Notify drivers of Infiniband device or link state change
609 *
610 * @v ibdev Infiniband device
611 */
612 static void ib_notify ( struct ib_device *ibdev ) {
613 struct ib_driver *driver;
614
615 for_each_table_entry ( driver, IB_DRIVERS )
616 driver->notify ( ibdev );
617 }
618
619 /**
620 * Notify of Infiniband link state change
621 *
622 * @v ibdev Infiniband device
623 */
624 void ib_link_state_changed ( struct ib_device *ibdev ) {
625
626 DBGC ( ibdev, "IBDEV %p link state is %s\n",
627 ibdev, ib_link_state_text ( ibdev ) );
628
629 /* Notify drivers of link state change */
630 ib_notify ( ibdev );
631 }
632
633 /**
634 * Open port
635 *
636 * @v ibdev Infiniband device
637 * @ret rc Return status code
638 */
639 int ib_open ( struct ib_device *ibdev ) {
640 int rc;
641
642 /* Increment device open request counter */
643 if ( ibdev->open_count++ > 0 ) {
644 /* Device was already open; do nothing */
645 return 0;
646 }
647
648 /* Open device */
649 if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
650 DBGC ( ibdev, "IBDEV %p could not open: %s\n",
651 ibdev, strerror ( rc ) );
652 goto err_open;
653 }
654
655 /* Create subnet management interface */
656 ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
657 if ( ! ibdev->smi ) {
658 DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
659 rc = -ENOMEM;
660 goto err_create_smi;
661 }
662
663 /* Create subnet management agent */
664 if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
665 DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
666 ibdev, strerror ( rc ) );
667 goto err_create_sma;
668 }
669
670 /* Create general services interface */
671 ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
672 if ( ! ibdev->gsi ) {
673 DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
674 rc = -ENOMEM;
675 goto err_create_gsi;
676 }
677
678 /* Add to head of open devices list */
679 list_add ( &ibdev->open_list, &open_ib_devices );
680
681 /* Notify drivers of device state change */
682 ib_notify ( ibdev );
683
684 assert ( ibdev->open_count == 1 );
685 return 0;
686
687 ib_destroy_mi ( ibdev, ibdev->gsi );
688 err_create_gsi:
689 ib_destroy_sma ( ibdev, ibdev->smi );
690 err_create_sma:
691 ib_destroy_mi ( ibdev, ibdev->smi );
692 err_create_smi:
693 ibdev->op->close ( ibdev );
694 err_open:
695 assert ( ibdev->open_count == 1 );
696 ibdev->open_count = 0;
697 return rc;
698 }
699
700 /**
701 * Close port
702 *
703 * @v ibdev Infiniband device
704 */
705 void ib_close ( struct ib_device *ibdev ) {
706
707 /* Decrement device open request counter */
708 ibdev->open_count--;
709
710 /* Close device if this was the last remaining requested opening */
711 if ( ibdev->open_count == 0 ) {
712 ib_notify ( ibdev );
713 list_del ( &ibdev->open_list );
714 ib_destroy_mi ( ibdev, ibdev->gsi );
715 ib_destroy_sma ( ibdev, ibdev->smi );
716 ib_destroy_mi ( ibdev, ibdev->smi );
717 ibdev->op->close ( ibdev );
718 ibdev->port_state = IB_PORT_STATE_DOWN;
719 }
720 }
721
722 /***************************************************************************
723 *
724 * Multicast
725 *
726 ***************************************************************************
727 */
728
729 /**
730 * Attach to multicast group
731 *
732 * @v ibdev Infiniband device
733 * @v qp Queue pair
734 * @v gid Multicast GID
735 * @ret rc Return status code
736 *
737 * Note that this function handles only the local device's attachment
738 * to the multicast GID; it does not issue the relevant MADs to join
739 * the multicast group on the subnet.
740 */
741 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
742 union ib_gid *gid ) {
743 struct ib_multicast_gid *mgid;
744 int rc;
745
746 /* Sanity check */
747 assert ( qp != NULL );
748
749 /* Add to software multicast GID list */
750 mgid = zalloc ( sizeof ( *mgid ) );
751 if ( ! mgid ) {
752 rc = -ENOMEM;
753 goto err_alloc_mgid;
754 }
755 memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
756 list_add ( &mgid->list, &qp->mgids );
757
758 /* Add to hardware multicast GID list */
759 if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
760 goto err_dev_mcast_attach;
761
762 return 0;
763
764 err_dev_mcast_attach:
765 list_del ( &mgid->list );
766 free ( mgid );
767 err_alloc_mgid:
768 return rc;
769 }
770
771 /**
772 * Detach from multicast group
773 *
774 * @v ibdev Infiniband device
775 * @v qp Queue pair
776 * @v gid Multicast GID
777 */
778 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
779 union ib_gid *gid ) {
780 struct ib_multicast_gid *mgid;
781
782 /* Sanity check */
783 assert ( qp != NULL );
784
785 /* Remove from hardware multicast GID list */
786 ibdev->op->mcast_detach ( ibdev, qp, gid );
787
788 /* Remove from software multicast GID list */
789 list_for_each_entry ( mgid, &qp->mgids, list ) {
790 if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
791 list_del ( &mgid->list );
792 free ( mgid );
793 break;
794 }
795 }
796 }
797
798 /***************************************************************************
799 *
800 * Miscellaneous
801 *
802 ***************************************************************************
803 */
804
805 /**
806 * Count Infiniband HCA ports
807 *
808 * @v ibdev Infiniband device
809 * @ret num_ports Number of ports
810 */
811 int ib_count_ports ( struct ib_device *ibdev ) {
812 struct ib_device *tmp;
813 int num_ports = 0;
814
815 /* Search for IB devices with the same physical device to
816 * identify port count.
817 */
818 for_each_ibdev ( tmp ) {
819 if ( tmp->dev == ibdev->dev )
820 num_ports++;
821 }
822 return num_ports;
823 }
824
825 /**
826 * Set port information
827 *
828 * @v ibdev Infiniband device
829 * @v mad Set port information MAD
830 */
831 int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
832 int rc;
833
834 /* Adapters with embedded SMAs do not need to support this method */
835 if ( ! ibdev->op->set_port_info ) {
836 DBGC ( ibdev, "IBDEV %p does not support setting port "
837 "information\n", ibdev );
838 return -ENOTSUP;
839 }
840
841 if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
842 DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
843 ibdev, strerror ( rc ) );
844 return rc;
845 }
846
847 return 0;
848 };
849
850 /**
851 * Set partition key table
852 *
853 * @v ibdev Infiniband device
854 * @v mad Set partition key table MAD
855 */
856 int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
857 int rc;
858
859 /* Adapters with embedded SMAs do not need to support this method */
860 if ( ! ibdev->op->set_pkey_table ) {
861 DBGC ( ibdev, "IBDEV %p does not support setting partition "
862 "key table\n", ibdev );
863 return -ENOTSUP;
864 }
865
866 if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
867 DBGC ( ibdev, "IBDEV %p could not set partition key table: "
868 "%s\n", ibdev, strerror ( rc ) );
869 return rc;
870 }
871
872 return 0;
873 };
874
875 /***************************************************************************
876 *
877 * Event queues
878 *
879 ***************************************************************************
880 */
881
882 /**
883 * Poll event queue
884 *
885 * @v ibdev Infiniband device
886 */
887 void ib_poll_eq ( struct ib_device *ibdev ) {
888 struct ib_completion_queue *cq;
889
890 /* Poll device's event queue */
891 ibdev->op->poll_eq ( ibdev );
892
893 /* Poll all completion queues */
894 list_for_each_entry ( cq, &ibdev->cqs, list )
895 ib_poll_cq ( ibdev, cq );
896 }
897
898 /**
899 * Single-step the Infiniband event queue
900 *
901 * @v process Infiniband event queue process
902 */
903 static void ib_step ( struct process *process __unused ) {
904 struct ib_device *ibdev;
905
906 list_for_each_entry ( ibdev, &open_ib_devices, open_list )
907 ib_poll_eq ( ibdev );
908 }
909
910 /** Infiniband event queue process */
911 PERMANENT_PROCESS ( ib_process, ib_step );
912
913 /***************************************************************************
914 *
915 * Infiniband device creation/destruction
916 *
917 ***************************************************************************
918 */
919
920 /**
921 * Allocate Infiniband device
922 *
923 * @v priv_size Size of driver private data area
924 * @ret ibdev Infiniband device, or NULL
925 */
926 struct ib_device * alloc_ibdev ( size_t priv_size ) {
927 struct ib_device *ibdev;
928 void *drv_priv;
929 size_t total_len;
930
931 total_len = ( sizeof ( *ibdev ) + priv_size );
932 ibdev = zalloc ( total_len );
933 if ( ibdev ) {
934 drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
935 ib_set_drvdata ( ibdev, drv_priv );
936 INIT_LIST_HEAD ( &ibdev->list );
937 INIT_LIST_HEAD ( &ibdev->open_list );
938 INIT_LIST_HEAD ( &ibdev->cqs );
939 INIT_LIST_HEAD ( &ibdev->qps );
940 ibdev->port_state = IB_PORT_STATE_DOWN;
941 ibdev->lid = IB_LID_NONE;
942 ibdev->pkey = IB_PKEY_DEFAULT;
943 }
944 return ibdev;
945 }
946
947 /**
948 * Register Infiniband device
949 *
950 * @v ibdev Infiniband device
951 * @ret rc Return status code
952 */
953 int register_ibdev ( struct ib_device *ibdev ) {
954 struct ib_driver *driver;
955 int rc;
956
957 /* Add to device list */
958 ibdev_get ( ibdev );
959 list_add_tail ( &ibdev->list, &ib_devices );
960 DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
961 ibdev->dev->name );
962
963 /* Probe device */
964 for_each_table_entry ( driver, IB_DRIVERS ) {
965 if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
966 DBGC ( ibdev, "IBDEV %p could not add %s device: %s\n",
967 ibdev, driver->name, strerror ( rc ) );
968 goto err_probe;
969 }
970 }
971
972 return 0;
973
974 err_probe:
975 for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
976 driver->remove ( ibdev );
977 list_del ( &ibdev->list );
978 ibdev_put ( ibdev );
979 return rc;
980 }
981
982 /**
983 * Unregister Infiniband device
984 *
985 * @v ibdev Infiniband device
986 */
987 void unregister_ibdev ( struct ib_device *ibdev ) {
988 struct ib_driver *driver;
989
990 /* Remove device */
991 for_each_table_entry_reverse ( driver, IB_DRIVERS )
992 driver->remove ( ibdev );
993
994 /* Remove from device list */
995 list_del ( &ibdev->list );
996 ibdev_put ( ibdev );
997 DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
998 }
999
1000 /**
1001 * Find Infiniband device by GID
1002 *
1003 * @v gid GID
1004 * @ret ibdev Infiniband device, or NULL
1005 */
1006 struct ib_device * find_ibdev ( union ib_gid *gid ) {
1007 struct ib_device *ibdev;
1008
1009 for_each_ibdev ( ibdev ) {
1010 if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
1011 return ibdev;
1012 }
1013 return NULL;
1014 }
1015
1016 /**
1017 * Get most recently opened Infiniband device
1018 *
1019 * @ret ibdev Most recently opened Infiniband device, or NULL
1020 */
1021 struct ib_device * last_opened_ibdev ( void ) {
1022 struct ib_device *ibdev;
1023
1024 ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
1025 open_list );
1026 if ( ! ibdev )
1027 return NULL;
1028
1029 assert ( ibdev->open_count != 0 );
1030 return ibdev;
1031 }
1032
1033 /* Drag in objects via register_ibdev() */
1034 REQUIRING_SYMBOL ( register_ibdev );
1035
1036 /* Drag in Infiniband configuration */
1037 REQUIRE_OBJECT ( config_infiniband );
1038
1039 /* Drag in IPoIB */
1040 REQUIRE_OBJECT ( ipoib );