[infiniband] Allow for the creation of multicast groups
[ipxe.git] / src / net / infiniband / ib_mi.c
1 /*
2 * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
22 */
23
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <errno.h>
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <byteswap.h>
33 #include <ipxe/infiniband.h>
34 #include <ipxe/iobuf.h>
35 #include <ipxe/ib_mi.h>
36
37 /**
38 * @file
39 *
40 * Infiniband management interfaces
41 *
42 */
43
44 /** Management interface number of send WQEs
45 *
46 * This is a policy decision.
47 */
48 #define IB_MI_NUM_SEND_WQES 4
49
50 /** Management interface number of receive WQEs
51 *
52 * This is a policy decision.
53 */
54 #define IB_MI_NUM_RECV_WQES 2
55
56 /** Management interface number of completion queue entries
57 *
58 * This is a policy decision
59 */
60 #define IB_MI_NUM_CQES 8
61
62 /** TID magic signature */
63 #define IB_MI_TID_MAGIC ( ( 'i' << 24 ) | ( 'P' << 16 ) | ( 'X' << 8 ) | 'E' )
64
65 /** TID to use for next MAD */
66 static unsigned int next_tid;
67
68 /**
69 * Handle received MAD
70 *
71 * @v ibdev Infiniband device
72 * @v mi Management interface
73 * @v mad Received MAD
74 * @v av Source address vector
75 * @ret rc Return status code
76 */
77 static int ib_mi_handle ( struct ib_device *ibdev,
78 struct ib_mad_interface *mi,
79 union ib_mad *mad,
80 struct ib_address_vector *av ) {
81 struct ib_mad_hdr *hdr = &mad->hdr;
82 struct ib_mad_transaction *madx;
83 struct ib_mad_agent *agent;
84
85 /* Look for a matching transaction by TID */
86 list_for_each_entry ( madx, &mi->madx, list ) {
87 if ( memcmp ( &hdr->tid, &madx->mad.hdr.tid,
88 sizeof ( hdr->tid ) ) != 0 )
89 continue;
90 /* Found a matching transaction */
91 madx->op->complete ( ibdev, mi, madx, 0, mad, av );
92 return 0;
93 }
94
95 /* If there is no matching transaction, look for a listening agent */
96 for_each_table_entry ( agent, IB_MAD_AGENTS ) {
97 if ( ( ( agent->mgmt_class & IB_MGMT_CLASS_MASK ) !=
98 ( hdr->mgmt_class & IB_MGMT_CLASS_MASK ) ) ||
99 ( agent->class_version != hdr->class_version ) ||
100 ( agent->attr_id != hdr->attr_id ) )
101 continue;
102 /* Found a matching agent */
103 agent->handle ( ibdev, mi, mad, av );
104 return 0;
105 }
106
107 /* Otherwise, ignore it */
108 DBGC ( mi, "MI %p RX TID %08x%08x ignored\n",
109 mi, ntohl ( hdr->tid.high ), ntohl ( hdr->tid.low ) );
110 return -ENOTSUP;
111 }
112
113 /**
114 * Complete receive via management interface
115 *
116 *
117 * @v ibdev Infiniband device
118 * @v qp Queue pair
119 * @v dest Destination address vector
120 * @v source Source address vector
121 * @v iobuf I/O buffer
122 * @v rc Completion status code
123 */
124 static void ib_mi_complete_recv ( struct ib_device *ibdev,
125 struct ib_queue_pair *qp,
126 struct ib_address_vector *dest __unused,
127 struct ib_address_vector *source,
128 struct io_buffer *iobuf, int rc ) {
129 struct ib_mad_interface *mi = ib_qp_get_ownerdata ( qp );
130 union ib_mad *mad;
131 struct ib_mad_hdr *hdr;
132
133 /* Ignore errors */
134 if ( rc != 0 ) {
135 DBGC ( mi, "MI %p RX error: %s\n", mi, strerror ( rc ) );
136 goto out;
137 }
138
139 /* Sanity checks */
140 if ( iob_len ( iobuf ) != sizeof ( *mad ) ) {
141 DBGC ( mi, "MI %p RX bad size (%zd bytes)\n",
142 mi, iob_len ( iobuf ) );
143 DBGC_HDA ( mi, 0, iobuf->data, iob_len ( iobuf ) );
144 goto out;
145 }
146 mad = iobuf->data;
147 hdr = &mad->hdr;
148 if ( hdr->base_version != IB_MGMT_BASE_VERSION ) {
149 DBGC ( mi, "MI %p RX unsupported base version %x\n",
150 mi, hdr->base_version );
151 DBGC_HDA ( mi, 0, mad, sizeof ( *mad ) );
152 goto out;
153 }
154 DBGC ( mi, "MI %p RX TID %08x%08x (%02x,%02x,%02x,%04x) status "
155 "%04x\n", mi, ntohl ( hdr->tid.high ), ntohl ( hdr->tid.low ),
156 hdr->mgmt_class, hdr->class_version, hdr->method,
157 ntohs ( hdr->attr_id ), ntohs ( hdr->status ) );
158 DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) );
159
160 /* Handle MAD */
161 if ( ( rc = ib_mi_handle ( ibdev, mi, mad, source ) ) != 0 )
162 goto out;
163
164 out:
165 free_iob ( iobuf );
166 }
167
168 /** Management interface completion operations */
169 static struct ib_completion_queue_operations ib_mi_completion_ops = {
170 .complete_recv = ib_mi_complete_recv,
171 };
172
173 /** Management interface queue pair operations */
174 static struct ib_queue_pair_operations ib_mi_queue_pair_ops = {
175 .alloc_iob = alloc_iob,
176 };
177
178 /**
179 * Transmit MAD
180 *
181 * @v ibdev Infiniband device
182 * @v mi Management interface
183 * @v mad MAD
184 * @v av Destination address vector
185 * @ret rc Return status code
186 */
187 int ib_mi_send ( struct ib_device *ibdev, struct ib_mad_interface *mi,
188 union ib_mad *mad, struct ib_address_vector *av ) {
189 struct ib_mad_hdr *hdr = &mad->hdr;
190 struct io_buffer *iobuf;
191 int rc;
192
193 /* Set common fields */
194 hdr->base_version = IB_MGMT_BASE_VERSION;
195 if ( ( hdr->tid.high == 0 ) && ( hdr->tid.low == 0 ) ) {
196 hdr->tid.high = htonl ( IB_MI_TID_MAGIC );
197 hdr->tid.low = htonl ( ++next_tid );
198 }
199 DBGC ( mi, "MI %p TX TID %08x%08x (%02x,%02x,%02x,%04x) status "
200 "%04x\n", mi, ntohl ( hdr->tid.high ), ntohl ( hdr->tid.low ),
201 hdr->mgmt_class, hdr->class_version, hdr->method,
202 ntohs ( hdr->attr_id ), ntohs ( hdr->status ) );
203 DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) );
204
205 /* Construct directed route portion of response, if necessary */
206 if ( hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ) {
207 struct ib_mad_smp *smp = &mad->smp;
208 unsigned int hop_pointer;
209 unsigned int hop_count;
210
211 smp->mad_hdr.status |= htons ( IB_SMP_STATUS_D_INBOUND );
212 hop_pointer = smp->mad_hdr.class_specific.smp.hop_pointer;
213 hop_count = smp->mad_hdr.class_specific.smp.hop_count;
214 assert ( hop_count == hop_pointer );
215 if ( hop_pointer < ( sizeof ( smp->return_path.hops ) /
216 sizeof ( smp->return_path.hops[0] ) ) ) {
217 smp->return_path.hops[hop_pointer] = ibdev->port;
218 } else {
219 DBGC ( mi, "MI %p TX TID %08x%08x invalid hop pointer "
220 "%d\n", mi, ntohl ( hdr->tid.high ),
221 ntohl ( hdr->tid.low ), hop_pointer );
222 return -EINVAL;
223 }
224 }
225
226 /* Construct I/O buffer */
227 iobuf = alloc_iob ( sizeof ( *mad ) );
228 if ( ! iobuf ) {
229 DBGC ( mi, "MI %p could not allocate buffer for TID "
230 "%08x%08x\n",
231 mi, ntohl ( hdr->tid.high ), ntohl ( hdr->tid.low ) );
232 return -ENOMEM;
233 }
234 memcpy ( iob_put ( iobuf, sizeof ( *mad ) ), mad, sizeof ( *mad ) );
235
236 /* Send I/O buffer */
237 if ( ( rc = ib_post_send ( ibdev, mi->qp, av, iobuf ) ) != 0 ) {
238 DBGC ( mi, "MI %p TX TID %08x%08x failed: %s\n",
239 mi, ntohl ( hdr->tid.high ), ntohl ( hdr->tid.low ),
240 strerror ( rc ) );
241 free_iob ( iobuf );
242 return rc;
243 }
244
245 return 0;
246 }
247
248 /**
249 * Handle management transaction timer expiry
250 *
251 * @v timer Retry timer
252 * @v expired Failure indicator
253 */
254 static void ib_mi_timer_expired ( struct retry_timer *timer, int expired ) {
255 struct ib_mad_transaction *madx =
256 container_of ( timer, struct ib_mad_transaction, timer );
257 struct ib_mad_interface *mi = madx->mi;
258 struct ib_device *ibdev = mi->ibdev;
259 struct ib_mad_hdr *hdr = &madx->mad.hdr;
260
261 /* Abandon transaction if we have tried too many times */
262 if ( expired ) {
263 DBGC ( mi, "MI %p abandoning TID %08x%08x\n",
264 mi, ntohl ( hdr->tid.high ), ntohl ( hdr->tid.low ) );
265 madx->op->complete ( ibdev, mi, madx, -ETIMEDOUT, NULL, NULL );
266 return;
267 }
268
269 /* Restart retransmission timer */
270 start_timer ( timer );
271
272 /* Resend MAD */
273 ib_mi_send ( ibdev, mi, &madx->mad, &madx->av );
274 }
275
276 /**
277 * Create management transaction
278 *
279 * @v ibdev Infiniband device
280 * @v mi Management interface
281 * @v mad MAD to send
282 * @v av Destination address, or NULL to use SM's GSI
283 * @v op Management transaction operations
284 * @ret madx Management transaction, or NULL
285 */
286 struct ib_mad_transaction *
287 ib_create_madx ( struct ib_device *ibdev, struct ib_mad_interface *mi,
288 union ib_mad *mad, struct ib_address_vector *av,
289 struct ib_mad_transaction_operations *op ) {
290 struct ib_mad_transaction *madx;
291
292 /* Allocate and initialise structure */
293 madx = zalloc ( sizeof ( *madx ) );
294 if ( ! madx )
295 return NULL;
296 timer_init ( &madx->timer, ib_mi_timer_expired, NULL );
297 madx->mi = mi;
298 madx->op = op;
299
300 /* Determine address vector */
301 if ( av ) {
302 memcpy ( &madx->av, av, sizeof ( madx->av ) );
303 } else {
304 madx->av.lid = ibdev->sm_lid;
305 madx->av.sl = ibdev->sm_sl;
306 madx->av.qpn = IB_QPN_GSI;
307 madx->av.qkey = IB_QKEY_GSI;
308 }
309
310 /* Copy MAD */
311 memcpy ( &madx->mad, mad, sizeof ( madx->mad ) );
312
313 /* Add to list and start timer to send initial MAD */
314 list_add ( &madx->list, &mi->madx );
315 start_timer_nodelay ( &madx->timer );
316
317 return madx;
318 }
319
320 /**
321 * Destroy management transaction
322 *
323 * @v ibdev Infiniband device
324 * @v mi Management interface
325 * @v madx Management transaction
326 */
327 void ib_destroy_madx ( struct ib_device *ibdev __unused,
328 struct ib_mad_interface *mi __unused,
329 struct ib_mad_transaction *madx ) {
330
331 /* Stop timer and remove from list */
332 stop_timer ( &madx->timer );
333 list_del ( &madx->list );
334
335 /* Free transaction */
336 free ( madx );
337 }
338
339 /**
340 * Create management interface
341 *
342 * @v ibdev Infiniband device
343 * @v type Queue pair type
344 * @ret mi Management agent, or NULL
345 */
346 struct ib_mad_interface * ib_create_mi ( struct ib_device *ibdev,
347 enum ib_queue_pair_type type ) {
348 struct ib_mad_interface *mi;
349 int rc;
350
351 /* Allocate and initialise fields */
352 mi = zalloc ( sizeof ( *mi ) );
353 if ( ! mi )
354 goto err_alloc;
355 mi->ibdev = ibdev;
356 INIT_LIST_HEAD ( &mi->madx );
357
358 /* Create completion queue */
359 mi->cq = ib_create_cq ( ibdev, IB_MI_NUM_CQES, &ib_mi_completion_ops );
360 if ( ! mi->cq ) {
361 DBGC ( mi, "MI %p could not allocate completion queue\n", mi );
362 goto err_create_cq;
363 }
364
365 /* Create queue pair */
366 mi->qp = ib_create_qp ( ibdev, type, IB_MI_NUM_SEND_WQES, mi->cq,
367 IB_MI_NUM_RECV_WQES, mi->cq,
368 &ib_mi_queue_pair_ops );
369 if ( ! mi->qp ) {
370 DBGC ( mi, "MI %p could not allocate queue pair\n", mi );
371 goto err_create_qp;
372 }
373 ib_qp_set_ownerdata ( mi->qp, mi );
374 DBGC ( mi, "MI %p (%s) running on QPN %#lx\n",
375 mi, ( ( type == IB_QPT_SMI ) ? "SMI" : "GSI" ), mi->qp->qpn );
376
377 /* Set queue key */
378 mi->qp->qkey = ( ( type == IB_QPT_SMI ) ? IB_QKEY_SMI : IB_QKEY_GSI );
379 if ( ( rc = ib_modify_qp ( ibdev, mi->qp ) ) != 0 ) {
380 DBGC ( mi, "MI %p could not set queue key: %s\n",
381 mi, strerror ( rc ) );
382 goto err_modify_qp;
383 }
384
385 /* Fill receive ring */
386 ib_refill_recv ( ibdev, mi->qp );
387 return mi;
388
389 err_modify_qp:
390 ib_destroy_qp ( ibdev, mi->qp );
391 err_create_qp:
392 ib_destroy_cq ( ibdev, mi->cq );
393 err_create_cq:
394 free ( mi );
395 err_alloc:
396 return NULL;
397 }
398
399 /**
400 * Destroy management interface
401 *
402 * @v mi Management interface
403 */
404 void ib_destroy_mi ( struct ib_device *ibdev, struct ib_mad_interface *mi ) {
405 struct ib_mad_transaction *madx;
406 struct ib_mad_transaction *tmp;
407
408 /* Flush any outstanding requests */
409 list_for_each_entry_safe ( madx, tmp, &mi->madx, list ) {
410 DBGC ( mi, "MI %p destroyed while TID %08x%08x in progress\n",
411 mi, ntohl ( madx->mad.hdr.tid.high ),
412 ntohl ( madx->mad.hdr.tid.low ) );
413 madx->op->complete ( ibdev, mi, madx, -ECANCELED, NULL, NULL );
414 }
415
416 ib_destroy_qp ( ibdev, mi->qp );
417 ib_destroy_cq ( ibdev, mi->cq );
418 free ( mi );
419 }