[infiniband] Use connection's local ID as debug message identifier
[ipxe.git] / src / net / infiniband / ib_cm.c
1 /*
2 * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
22 */
23
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <byteswap.h>
30 #include <errno.h>
31 #include <assert.h>
32 #include <ipxe/infiniband.h>
33 #include <ipxe/ib_mi.h>
34 #include <ipxe/ib_pathrec.h>
35 #include <ipxe/ib_cm.h>
36
37 /**
38 * @file
39 *
40 * Infiniband communication management
41 *
42 */
43
44 /** List of connections */
45 static LIST_HEAD ( ib_cm_conns );
46
47 /**
48 * Find connection by local communication ID
49 *
50 * @v local_id Local communication ID
51 * @ret conn Connection, or NULL
52 */
53 static struct ib_connection * ib_cm_find ( uint32_t local_id ) {
54 struct ib_connection *conn;
55
56 list_for_each_entry ( conn, &ib_cm_conns, list ) {
57 if ( conn->local_id == local_id )
58 return conn;
59 }
60 return NULL;
61 }
62
63 /**
64 * Send "ready to use" response
65 *
66 * @v ibdev Infiniband device
67 * @v mi Management interface
68 * @v av Address vector
69 * @v local_id Local communication ID
70 * @v remote_id Remote communication ID
71 * @ret rc Return status code
72 */
73 static int ib_cm_send_rtu ( struct ib_device *ibdev,
74 struct ib_mad_interface *mi,
75 struct ib_address_vector *av,
76 uint32_t local_id, uint32_t remote_id ) {
77 union ib_mad mad;
78 struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use;
79 int rc;
80
81 /* Construct "ready to use" response */
82 memset ( &mad, 0, sizeof ( mad ) );
83 mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
84 mad.hdr.class_version = IB_CM_CLASS_VERSION;
85 mad.hdr.method = IB_MGMT_METHOD_SEND;
86 mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
87 rtu->local_id = htonl ( local_id );
88 rtu->remote_id = htonl ( remote_id );
89 if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ) {
90 DBGC ( local_id, "CM %08x could not send RTU: %s\n",
91 local_id, strerror ( rc ) );
92 return rc;
93 }
94
95 return 0;
96 }
97
98 /**
99 * Handle duplicate connection replies
100 *
101 * @v ibdev Infiniband device
102 * @v mi Management interface
103 * @v mad Received MAD
104 * @v av Source address vector
105 * @ret rc Return status code
106 *
107 * If a "ready to use" MAD is lost, the peer may resend the connection
108 * reply. We have to respond to these with duplicate "ready to use"
109 * MADs, otherwise the peer may time out and drop the connection.
110 */
111 static void ib_cm_recv_rep ( struct ib_device *ibdev,
112 struct ib_mad_interface *mi,
113 union ib_mad *mad,
114 struct ib_address_vector *av ) {
115 struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
116 struct ib_connection *conn;
117 uint32_t local_id = ntohl ( rep->remote_id );
118 int rc;
119
120 /* Identify connection */
121 conn = ib_cm_find ( local_id );
122 if ( conn ) {
123 /* Try to send "ready to use" reply */
124 if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
125 conn->remote_id ) ) != 0 ) {
126 /* Ignore errors; the remote end will retry */
127 }
128 } else {
129 DBGC ( local_id, "CM %08x unexpected REP\n", local_id );
130 }
131 }
132
133 /**
134 * Send reply to disconnection request
135 *
136 * @v ibdev Infiniband device
137 * @v mi Management interface
138 * @v av Address vector
139 * @v local_id Local communication ID
140 * @v remote_id Remote communication ID
141 * @ret rc Return status code
142 */
143 static int ib_cm_send_drep ( struct ib_device *ibdev,
144 struct ib_mad_interface *mi,
145 struct ib_address_vector *av,
146 uint32_t local_id, uint32_t remote_id ) {
147 union ib_mad mad;
148 struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply;
149 int rc;
150
151 /* Construct reply to disconnection request */
152 memset ( &mad, 0, sizeof ( mad ) );
153 mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
154 mad.hdr.class_version = IB_CM_CLASS_VERSION;
155 mad.hdr.method = IB_MGMT_METHOD_SEND;
156 mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY );
157 drep->local_id = htonl ( local_id );
158 drep->remote_id = htonl ( remote_id );
159 if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ) {
160 DBGC ( local_id, "CM %08x could not send DREP: %s\n",
161 local_id, strerror ( rc ) );
162 return rc;
163 }
164
165 return 0;
166 }
167
168 /**
169 * Handle disconnection requests
170 *
171 * @v ibdev Infiniband device
172 * @v mi Management interface
173 * @v mad Received MAD
174 * @v av Source address vector
175 * @ret rc Return status code
176 */
177 static void ib_cm_recv_dreq ( struct ib_device *ibdev,
178 struct ib_mad_interface *mi,
179 union ib_mad *mad,
180 struct ib_address_vector *av ) {
181 struct ib_cm_disconnect_request *dreq =
182 &mad->cm.cm_data.disconnect_request;
183 struct ib_connection *conn;
184 uint32_t local_id = ntohl ( dreq->remote_id );
185 uint32_t remote_id = ntohl ( dreq->local_id );
186 int rc;
187
188 /* Identify connection */
189 conn = ib_cm_find ( local_id );
190 if ( conn ) {
191 /* Notify upper layer */
192 conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN,
193 &dreq->private_data,
194 sizeof ( dreq->private_data ) );
195 } else {
196 DBGC ( local_id, "CM %08x unexpected DREQ\n", local_id );
197 }
198
199 /* Send reply */
200 if ( ( rc = ib_cm_send_drep ( ibdev, mi, av, local_id,
201 remote_id ) ) != 0 ) {
202 /* Ignore errors; the remote end will retry */
203 }
204 };
205
206 /** Communication management agents */
207 struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
208 {
209 .mgmt_class = IB_MGMT_CLASS_CM,
210 .class_version = IB_CM_CLASS_VERSION,
211 .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
212 .handle = ib_cm_recv_rep,
213 },
214 {
215 .mgmt_class = IB_MGMT_CLASS_CM,
216 .class_version = IB_CM_CLASS_VERSION,
217 .attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ),
218 .handle = ib_cm_recv_dreq,
219 },
220 };
221
222 /**
223 * Convert connection rejection reason to return status code
224 *
225 * @v reason Rejection reason (in network byte order)
226 * @ret rc Return status code
227 */
228 static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
229 switch ( reason ) {
230 case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
231 return -ENODEV;
232 case htons ( IB_CM_REJECT_STALE_CONN ) :
233 return -EALREADY;
234 case htons ( IB_CM_REJECT_CONSUMER ) :
235 return -ENOTTY;
236 default:
237 return -EPERM;
238 }
239 }
240
241 /**
242 * Handle connection request transaction completion
243 *
244 * @v ibdev Infiniband device
245 * @v mi Management interface
246 * @v madx Management transaction
247 * @v rc Status code
248 * @v mad Received MAD (or NULL on error)
249 * @v av Source address vector (or NULL on error)
250 */
251 static void ib_cm_req_complete ( struct ib_device *ibdev,
252 struct ib_mad_interface *mi,
253 struct ib_mad_transaction *madx,
254 int rc, union ib_mad *mad,
255 struct ib_address_vector *av ) {
256 struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
257 struct ib_queue_pair *qp = conn->qp;
258 struct ib_cm_common *common = &mad->cm.cm_data.common;
259 struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
260 struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject;
261 uint32_t local_id = conn->local_id;
262 void *private_data = NULL;
263 size_t private_data_len = 0;
264
265 /* Report failures */
266 if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
267 rc = -EIO;
268 if ( rc != 0 ) {
269 DBGC ( local_id, "CM %08x connection request failed: %s\n",
270 local_id, strerror ( rc ) );
271 goto out;
272 }
273
274 /* Record remote communication ID */
275 conn->remote_id = ntohl ( common->local_id );
276
277 /* Handle response */
278 switch ( mad->hdr.attr_id ) {
279
280 case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
281 /* Extract fields */
282 qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 );
283 qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 );
284 private_data = &rep->private_data;
285 private_data_len = sizeof ( rep->private_data );
286 DBGC ( local_id, "CM %08x connected to QPN %#lx PSN %#x\n",
287 local_id, qp->av.qpn, qp->send.psn );
288
289 /* Modify queue pair */
290 if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
291 DBGC ( local_id, "CM %08x could not modify queue "
292 "pair: %s\n", local_id, strerror ( rc ) );
293 goto out;
294 }
295
296 /* Send "ready to use" reply */
297 if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
298 conn->remote_id ) ) != 0 ) {
299 /* Treat as non-fatal */
300 rc = 0;
301 }
302 break;
303
304 case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
305 /* Extract fields */
306 DBGC ( local_id, "CM %08x connection rejected (reason %d)\n",
307 local_id, ntohs ( rej->reason ) );
308 /* Private data is valid only for a Consumer Reject */
309 if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
310 private_data = &rej->private_data;
311 private_data_len = sizeof ( rej->private_data );
312 }
313 rc = ib_cm_rejection_reason_to_rc ( rej->reason );
314 break;
315
316 default:
317 DBGC ( local_id, "CM %08x unexpected response (attribute "
318 "%04x)\n", local_id, ntohs ( mad->hdr.attr_id ) );
319 rc = -ENOTSUP;
320 break;
321 }
322
323 out:
324 /* Destroy the completed transaction */
325 ib_destroy_madx ( ibdev, ibdev->gsi, madx );
326 conn->madx = NULL;
327
328 /* Hand off to the upper completion handler */
329 conn->op->changed ( ibdev, qp, conn, rc, private_data,
330 private_data_len );
331 }
332
333 /** Connection request operations */
334 static struct ib_mad_transaction_operations ib_cm_req_op = {
335 .complete = ib_cm_req_complete,
336 };
337
338 /**
339 * Handle connection path transaction completion
340 *
341 * @v ibdev Infiniband device
342 * @v path Path
343 * @v rc Status code
344 * @v av Address vector, or NULL on error
345 */
346 static void ib_cm_path_complete ( struct ib_device *ibdev,
347 struct ib_path *path, int rc,
348 struct ib_address_vector *av ) {
349 struct ib_connection *conn = ib_path_get_ownerdata ( path );
350 struct ib_queue_pair *qp = conn->qp;
351 union ib_mad mad;
352 struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request;
353 uint32_t local_id = conn->local_id;
354 size_t private_data_len;
355
356 /* Report failures */
357 if ( rc != 0 ) {
358 DBGC ( local_id, "CM %08x path lookup failed: %s\n",
359 local_id, strerror ( rc ) );
360 conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
361 goto out;
362 }
363
364 /* Update queue pair peer path */
365 memcpy ( &qp->av, av, sizeof ( qp->av ) );
366
367 /* Construct connection request */
368 memset ( &mad, 0, sizeof ( mad ) );
369 mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
370 mad.hdr.class_version = IB_CM_CLASS_VERSION;
371 mad.hdr.method = IB_MGMT_METHOD_SEND;
372 mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
373 req->local_id = htonl ( conn->local_id );
374 memcpy ( &req->service_id, &conn->service_id,
375 sizeof ( req->service_id ) );
376 memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) );
377 req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 );
378 req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
379 req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
380 htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
381 ( 0 << 0 ) );
382 req->starting_psn__local_timeout__retry_count =
383 htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
384 ( 0x07 << 0 ) );
385 req->pkey = htons ( ibdev->pkey );
386 req->payload_mtu__rdc_exists__rnr_retry =
387 ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
388 req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) );
389 req->primary.local_lid = htons ( ibdev->lid );
390 req->primary.remote_lid = htons ( conn->qp->av.lid );
391 memcpy ( &req->primary.local_gid, &ibdev->gid,
392 sizeof ( req->primary.local_gid ) );
393 memcpy ( &req->primary.remote_gid, &conn->qp->av.gid,
394 sizeof ( req->primary.remote_gid ) );
395 req->primary.flow_label__rate =
396 htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
397 req->primary.hop_limit = 0;
398 req->primary.sl__subnet_local =
399 ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
400 req->primary.local_ack_timeout = ( 0x13 << 3 );
401 private_data_len = conn->private_data_len;
402 if ( private_data_len > sizeof ( req->private_data ) )
403 private_data_len = sizeof ( req->private_data );
404 memcpy ( &req->private_data, &conn->private_data, private_data_len );
405
406 /* Create connection request */
407 av->qpn = IB_QPN_GSI;
408 av->qkey = IB_QKEY_GSI;
409 conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
410 &ib_cm_req_op );
411 if ( ! conn->madx ) {
412 DBGC ( local_id, "CM %08x could not create connection "
413 "request\n", local_id );
414 conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
415 goto out;
416 }
417 ib_madx_set_ownerdata ( conn->madx, conn );
418
419 out:
420 /* Destroy the completed transaction */
421 ib_destroy_path ( ibdev, path );
422 conn->path = NULL;
423 }
424
425 /** Connection path operations */
426 static struct ib_path_operations ib_cm_path_op = {
427 .complete = ib_cm_path_complete,
428 };
429
430 /**
431 * Create connection to remote QP
432 *
433 * @v ibdev Infiniband device
434 * @v qp Queue pair
435 * @v dgid Target GID
436 * @v service_id Target service ID
437 * @v private_data Connection request private data
438 * @v private_data_len Length of connection request private data
439 * @v op Connection operations
440 * @ret conn Connection
441 */
442 struct ib_connection *
443 ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
444 union ib_gid *dgid, union ib_guid *service_id,
445 void *private_data, size_t private_data_len,
446 struct ib_connection_operations *op ) {
447 struct ib_connection *conn;
448 uint32_t local_id;
449
450 /* Allocate and initialise request */
451 conn = zalloc ( sizeof ( *conn ) + private_data_len );
452 if ( ! conn )
453 goto err_alloc_conn;
454 conn->ibdev = ibdev;
455 conn->qp = qp;
456 memset ( &qp->av, 0, sizeof ( qp->av ) );
457 qp->av.gid_present = 1;
458 memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
459 conn->local_id = local_id = random();
460 memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
461 conn->op = op;
462 conn->private_data_len = private_data_len;
463 memcpy ( &conn->private_data, private_data, private_data_len );
464
465 /* Create path */
466 conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
467 if ( ! conn->path )
468 goto err_create_path;
469 ib_path_set_ownerdata ( conn->path, conn );
470
471 /* Add to list of connections */
472 list_add ( &conn->list, &ib_cm_conns );
473
474 DBGC ( local_id, "CM %08x created for IBDEV %s QPN %#lx\n",
475 local_id, ibdev->name, qp->qpn );
476 DBGC ( local_id, "CM %08x connecting to " IB_GID_FMT " "
477 IB_GUID_FMT "\n", local_id, IB_GID_ARGS ( dgid ),
478 IB_GUID_ARGS ( service_id ) );
479
480 return conn;
481
482 ib_destroy_path ( ibdev, conn->path );
483 err_create_path:
484 free ( conn );
485 err_alloc_conn:
486 return NULL;
487 }
488
489 /**
490 * Destroy connection to remote QP
491 *
492 * @v ibdev Infiniband device
493 * @v qp Queue pair
494 * @v conn Connection
495 */
496 void ib_destroy_conn ( struct ib_device *ibdev,
497 struct ib_queue_pair *qp __unused,
498 struct ib_connection *conn ) {
499
500 list_del ( &conn->list );
501 if ( conn->madx )
502 ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
503 if ( conn->path )
504 ib_destroy_path ( ibdev, conn->path );
505 free ( conn );
506 }