2 * QEMU PAPR Storage Class Memory Interfaces
4 * Copyright (c) 2019-2020, IBM Corporation.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "qemu/osdep.h"
25 #include "qemu/cutils.h"
26 #include "qapi/error.h"
27 #include "hw/ppc/spapr_drc.h"
28 #include "hw/ppc/spapr_nvdimm.h"
29 #include "hw/mem/nvdimm.h"
30 #include "qemu/nvdimm-utils.h"
31 #include "hw/ppc/fdt.h"
32 #include "qemu/range.h"
33 #include "hw/ppc/spapr_numa.h"
34 #include "block/thread-pool.h"
35 #include "migration/vmstate.h"
36 #include "qemu/pmem.h"
37 #include "hw/qdev-properties.h"
39 /* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */
40 /* SCM device is unable to persist memory contents */
41 #define PAPR_PMEM_UNARMED PPC_BIT(0)
44 * The nvdimm size should be aligned to SCM block size.
45 * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE
46 * in order to have SCM regions not to overlap with dimm memory regions.
47 * The SCM devices can have variable block sizes. For now, fixing the
48 * block size to the minimum value.
50 #define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE
52 /* Have an explicit check for alignment */
53 QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE
% SPAPR_MEMORY_BLOCK_SIZE
);
55 #define TYPE_SPAPR_NVDIMM "spapr-nvdimm"
56 OBJECT_DECLARE_TYPE(SpaprNVDIMMDevice
, SPAPRNVDIMMClass
, SPAPR_NVDIMM
)
58 struct SPAPRNVDIMMClass
{
60 NVDIMMClass parent_class
;
63 void (*realize
)(NVDIMMDevice
*dimm
, Error
**errp
);
64 void (*unrealize
)(NVDIMMDevice
*dimm
, Error
**errp
);
67 bool spapr_nvdimm_validate(HotplugHandler
*hotplug_dev
, NVDIMMDevice
*nvdimm
,
68 uint64_t size
, Error
**errp
)
70 const MachineClass
*mc
= MACHINE_GET_CLASS(hotplug_dev
);
71 const MachineState
*ms
= MACHINE(hotplug_dev
);
72 PCDIMMDevice
*dimm
= PC_DIMM(nvdimm
);
73 MemoryRegion
*mr
= host_memory_backend_get_memory(dimm
->hostmem
);
74 g_autofree
char *uuidstr
= NULL
;
78 if (!mc
->nvdimm_supported
) {
79 error_setg(errp
, "NVDIMM hotplug not supported for this machine");
83 if (!ms
->nvdimms_state
->is_enabled
) {
84 error_setg(errp
, "nvdimm device found but 'nvdimm=off' was set");
88 if (object_property_get_int(OBJECT(nvdimm
), NVDIMM_LABEL_SIZE_PROP
,
90 error_setg(errp
, "PAPR requires NVDIMM devices to have label-size set");
94 if (size
% SPAPR_MINIMUM_SCM_BLOCK_SIZE
) {
95 error_setg(errp
, "PAPR requires NVDIMM memory size (excluding label)"
96 " to be a multiple of %" PRIu64
"MB",
97 SPAPR_MINIMUM_SCM_BLOCK_SIZE
/ MiB
);
101 uuidstr
= object_property_get_str(OBJECT(nvdimm
), NVDIMM_UUID_PROP
,
103 ret
= qemu_uuid_parse(uuidstr
, &uuid
);
106 if (qemu_uuid_is_null(&uuid
)) {
107 error_setg(errp
, "NVDIMM device requires the uuid to be set");
111 if (object_dynamic_cast(OBJECT(nvdimm
), TYPE_SPAPR_NVDIMM
) &&
112 (memory_region_get_fd(mr
) < 0)) {
113 error_setg(errp
, "spapr-nvdimm device requires the "
114 "memdev %s to be of memory-backend-file type",
115 object_get_canonical_path_component(OBJECT(dimm
->hostmem
)));
123 void spapr_add_nvdimm(DeviceState
*dev
, uint64_t slot
)
126 bool hotplugged
= spapr_drc_hotplugged(dev
);
128 drc
= spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM
, slot
);
132 * pc_dimm_get_free_slot() provided a free slot at pre-plug. The
133 * corresponding DRC is thus assumed to be attachable.
135 spapr_drc_attach(drc
, dev
);
138 spapr_hotplug_req_add_by_index(drc
);
142 static int spapr_dt_nvdimm(SpaprMachineState
*spapr
, void *fdt
,
143 int parent_offset
, NVDIMMDevice
*nvdimm
)
149 uint32_t node
= object_property_get_uint(OBJECT(nvdimm
), PC_DIMM_NODE_PROP
,
151 uint64_t slot
= object_property_get_uint(OBJECT(nvdimm
), PC_DIMM_SLOT_PROP
,
153 uint64_t lsize
= nvdimm
->label_size
;
154 uint64_t size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
157 drc
= spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM
, slot
);
160 drc_idx
= spapr_drc_index(drc
);
162 buf
= g_strdup_printf("ibm,pmemory@%x", drc_idx
);
163 child_offset
= fdt_add_subnode(fdt
, parent_offset
, buf
);
168 _FDT((fdt_setprop_cell(fdt
, child_offset
, "reg", drc_idx
)));
169 _FDT((fdt_setprop_string(fdt
, child_offset
, "compatible", "ibm,pmemory")));
170 _FDT((fdt_setprop_string(fdt
, child_offset
, "device_type", "ibm,pmemory")));
172 spapr_numa_write_associativity_dt(spapr
, fdt
, child_offset
, node
);
174 buf
= qemu_uuid_unparse_strdup(&nvdimm
->uuid
);
175 _FDT((fdt_setprop_string(fdt
, child_offset
, "ibm,unit-guid", buf
)));
178 _FDT((fdt_setprop_cell(fdt
, child_offset
, "ibm,my-drc-index", drc_idx
)));
180 _FDT((fdt_setprop_u64(fdt
, child_offset
, "ibm,block-size",
181 SPAPR_MINIMUM_SCM_BLOCK_SIZE
)));
182 _FDT((fdt_setprop_u64(fdt
, child_offset
, "ibm,number-of-blocks",
183 size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
)));
184 _FDT((fdt_setprop_cell(fdt
, child_offset
, "ibm,metadata-size", lsize
)));
186 _FDT((fdt_setprop_string(fdt
, child_offset
, "ibm,pmem-application",
187 "operating-system")));
188 _FDT(fdt_setprop(fdt
, child_offset
, "ibm,cache-flush-required", NULL
, 0));
190 if (object_dynamic_cast(OBJECT(nvdimm
), TYPE_SPAPR_NVDIMM
)) {
191 bool is_pmem
= false, pmem_override
= false;
192 PCDIMMDevice
*dimm
= PC_DIMM(nvdimm
);
193 HostMemoryBackend
*hostmem
= dimm
->hostmem
;
195 is_pmem
= object_property_get_bool(OBJECT(hostmem
), "pmem", NULL
);
196 pmem_override
= object_property_get_bool(OBJECT(nvdimm
),
197 "pmem-override", NULL
);
198 if (!is_pmem
|| pmem_override
) {
199 _FDT(fdt_setprop(fdt
, child_offset
, "ibm,hcall-flush-required",
207 int spapr_pmem_dt_populate(SpaprDrc
*drc
, SpaprMachineState
*spapr
,
208 void *fdt
, int *fdt_start_offset
, Error
**errp
)
210 NVDIMMDevice
*nvdimm
= NVDIMM(drc
->dev
);
212 *fdt_start_offset
= spapr_dt_nvdimm(spapr
, fdt
, 0, nvdimm
);
217 void spapr_dt_persistent_memory(SpaprMachineState
*spapr
, void *fdt
)
219 int offset
= fdt_subnode_offset(fdt
, 0, "ibm,persistent-memory");
220 GSList
*iter
, *nvdimms
= nvdimm_get_device_list();
223 offset
= fdt_add_subnode(fdt
, 0, "ibm,persistent-memory");
225 _FDT((fdt_setprop_cell(fdt
, offset
, "#address-cells", 0x1)));
226 _FDT((fdt_setprop_cell(fdt
, offset
, "#size-cells", 0x0)));
227 _FDT((fdt_setprop_string(fdt
, offset
, "device_type",
228 "ibm,persistent-memory")));
231 /* Create DT entries for cold plugged NVDIMM devices */
232 for (iter
= nvdimms
; iter
; iter
= iter
->next
) {
233 NVDIMMDevice
*nvdimm
= iter
->data
;
235 spapr_dt_nvdimm(spapr
, fdt
, offset
, nvdimm
);
237 g_slist_free(nvdimms
);
242 static target_ulong
h_scm_read_metadata(PowerPCCPU
*cpu
,
243 SpaprMachineState
*spapr
,
247 uint32_t drc_index
= args
[0];
248 uint64_t offset
= args
[1];
249 uint64_t len
= args
[2];
250 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
251 NVDIMMDevice
*nvdimm
;
254 uint8_t buf
[8] = { 0 };
256 if (!drc
|| !drc
->dev
||
257 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
261 if (len
!= 1 && len
!= 2 &&
262 len
!= 4 && len
!= 8) {
266 nvdimm
= NVDIMM(drc
->dev
);
267 if ((offset
+ len
< offset
) ||
268 (nvdimm
->label_size
< len
+ offset
)) {
272 ddc
= NVDIMM_GET_CLASS(nvdimm
);
273 ddc
->read_label_data(nvdimm
, buf
, len
, offset
);
280 data
= lduw_be_p(buf
);
283 data
= ldl_be_p(buf
);
286 data
= ldq_be_p(buf
);
289 g_assert_not_reached();
297 static target_ulong
h_scm_write_metadata(PowerPCCPU
*cpu
,
298 SpaprMachineState
*spapr
,
302 uint32_t drc_index
= args
[0];
303 uint64_t offset
= args
[1];
304 uint64_t data
= args
[2];
305 uint64_t len
= args
[3];
306 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
307 NVDIMMDevice
*nvdimm
;
309 uint8_t buf
[8] = { 0 };
311 if (!drc
|| !drc
->dev
||
312 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
316 if (len
!= 1 && len
!= 2 &&
317 len
!= 4 && len
!= 8) {
321 nvdimm
= NVDIMM(drc
->dev
);
322 if ((offset
+ len
< offset
) ||
323 (nvdimm
->label_size
< len
+ offset
)) {
329 if (data
& 0xffffffffffffff00) {
335 if (data
& 0xffffffffffff0000) {
341 if (data
& 0xffffffff00000000) {
350 g_assert_not_reached();
353 ddc
= NVDIMM_GET_CLASS(nvdimm
);
354 ddc
->write_label_data(nvdimm
, buf
, len
, offset
);
359 static target_ulong
h_scm_bind_mem(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
360 target_ulong opcode
, target_ulong
*args
)
362 uint32_t drc_index
= args
[0];
363 uint64_t starting_idx
= args
[1];
364 uint64_t no_of_scm_blocks_to_bind
= args
[2];
365 uint64_t target_logical_mem_addr
= args
[3];
366 uint64_t continue_token
= args
[4];
368 uint64_t total_no_of_scm_blocks
;
369 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
371 NVDIMMDevice
*nvdimm
;
373 if (!drc
|| !drc
->dev
||
374 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
379 * Currently continue token should be zero qemu has already bound
380 * everything and this hcall doesnt return H_BUSY.
382 if (continue_token
> 0) {
386 /* Currently qemu assigns the address. */
387 if (target_logical_mem_addr
!= 0xffffffffffffffff) {
391 nvdimm
= NVDIMM(drc
->dev
);
393 size
= object_property_get_uint(OBJECT(nvdimm
),
394 PC_DIMM_SIZE_PROP
, &error_abort
);
396 total_no_of_scm_blocks
= size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
398 if (starting_idx
> total_no_of_scm_blocks
) {
402 if (((starting_idx
+ no_of_scm_blocks_to_bind
) < starting_idx
) ||
403 ((starting_idx
+ no_of_scm_blocks_to_bind
) > total_no_of_scm_blocks
)) {
407 addr
= object_property_get_uint(OBJECT(nvdimm
),
408 PC_DIMM_ADDR_PROP
, &error_abort
);
410 addr
+= starting_idx
* SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
412 /* Already bound, Return target logical address in R5 */
414 args
[2] = no_of_scm_blocks_to_bind
;
419 typedef struct SpaprNVDIMMDeviceFlushState
{
420 uint64_t continue_token
;
424 QLIST_ENTRY(SpaprNVDIMMDeviceFlushState
) node
;
425 } SpaprNVDIMMDeviceFlushState
;
427 typedef struct SpaprNVDIMMDevice SpaprNVDIMMDevice
;
428 struct SpaprNVDIMMDevice
{
430 NVDIMMDevice parent_obj
;
432 bool hcall_flush_required
;
433 uint64_t nvdimm_flush_token
;
434 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState
) pending_nvdimm_flush_states
;
435 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState
) completed_nvdimm_flush_states
;
440 * The 'on' value for this property forced the qemu to enable the hcall
441 * flush for the nvdimm device even if the backend is a pmem
446 static int flush_worker_cb(void *opaque
)
448 SpaprNVDIMMDeviceFlushState
*state
= opaque
;
449 SpaprDrc
*drc
= spapr_drc_by_index(state
->drcidx
);
450 PCDIMMDevice
*dimm
= PC_DIMM(drc
->dev
);
451 HostMemoryBackend
*backend
= MEMORY_BACKEND(dimm
->hostmem
);
452 int backend_fd
= memory_region_get_fd(&backend
->mr
);
454 if (object_property_get_bool(OBJECT(backend
), "pmem", NULL
)) {
455 MemoryRegion
*mr
= host_memory_backend_get_memory(dimm
->hostmem
);
456 void *ptr
= memory_region_get_ram_ptr(mr
);
457 size_t size
= object_property_get_uint(OBJECT(dimm
), PC_DIMM_SIZE_PROP
,
460 /* flush pmem backend */
461 pmem_persist(ptr
, size
);
463 /* flush raw backing image */
464 if (qemu_fdatasync(backend_fd
) < 0) {
465 error_report("papr_scm: Could not sync nvdimm to backend file: %s",
474 static void spapr_nvdimm_flush_completion_cb(void *opaque
, int hcall_ret
)
476 SpaprNVDIMMDeviceFlushState
*state
= opaque
;
477 SpaprDrc
*drc
= spapr_drc_by_index(state
->drcidx
);
478 SpaprNVDIMMDevice
*s_nvdimm
= SPAPR_NVDIMM(drc
->dev
);
480 state
->hcall_ret
= hcall_ret
;
481 QLIST_REMOVE(state
, node
);
482 QLIST_INSERT_HEAD(&s_nvdimm
->completed_nvdimm_flush_states
, state
, node
);
485 static int spapr_nvdimm_flush_post_load(void *opaque
, int version_id
)
487 SpaprNVDIMMDevice
*s_nvdimm
= (SpaprNVDIMMDevice
*)opaque
;
488 SpaprNVDIMMDeviceFlushState
*state
;
489 ThreadPool
*pool
= aio_get_thread_pool(qemu_get_aio_context());
490 HostMemoryBackend
*backend
= MEMORY_BACKEND(PC_DIMM(s_nvdimm
)->hostmem
);
491 bool is_pmem
= object_property_get_bool(OBJECT(backend
), "pmem", NULL
);
492 bool pmem_override
= object_property_get_bool(OBJECT(s_nvdimm
),
493 "pmem-override", NULL
);
494 bool dest_hcall_flush_required
= pmem_override
|| !is_pmem
;
496 if (!s_nvdimm
->hcall_flush_required
&& dest_hcall_flush_required
) {
497 error_report("The file backend for the spapr-nvdimm device %s at "
498 "source is a pmem, use pmem=on and pmem-override=off to "
499 "continue.", DEVICE(s_nvdimm
)->id
);
502 if (s_nvdimm
->hcall_flush_required
&& !dest_hcall_flush_required
) {
503 error_report("The guest expects hcall-flush support for the "
504 "spapr-nvdimm device %s, use pmem_override=on to "
505 "continue.", DEVICE(s_nvdimm
)->id
);
509 QLIST_FOREACH(state
, &s_nvdimm
->pending_nvdimm_flush_states
, node
) {
510 thread_pool_submit_aio(pool
, flush_worker_cb
, state
,
511 spapr_nvdimm_flush_completion_cb
, state
);
517 static const VMStateDescription vmstate_spapr_nvdimm_flush_state
= {
518 .name
= "spapr_nvdimm_flush_state",
520 .minimum_version_id
= 1,
521 .fields
= (VMStateField
[]) {
522 VMSTATE_UINT64(continue_token
, SpaprNVDIMMDeviceFlushState
),
523 VMSTATE_INT64(hcall_ret
, SpaprNVDIMMDeviceFlushState
),
524 VMSTATE_UINT32(drcidx
, SpaprNVDIMMDeviceFlushState
),
525 VMSTATE_END_OF_LIST()
529 const VMStateDescription vmstate_spapr_nvdimm_states
= {
530 .name
= "spapr_nvdimm_states",
532 .minimum_version_id
= 1,
533 .post_load
= spapr_nvdimm_flush_post_load
,
534 .fields
= (VMStateField
[]) {
535 VMSTATE_BOOL(hcall_flush_required
, SpaprNVDIMMDevice
),
536 VMSTATE_UINT64(nvdimm_flush_token
, SpaprNVDIMMDevice
),
537 VMSTATE_QLIST_V(completed_nvdimm_flush_states
, SpaprNVDIMMDevice
, 1,
538 vmstate_spapr_nvdimm_flush_state
,
539 SpaprNVDIMMDeviceFlushState
, node
),
540 VMSTATE_QLIST_V(pending_nvdimm_flush_states
, SpaprNVDIMMDevice
, 1,
541 vmstate_spapr_nvdimm_flush_state
,
542 SpaprNVDIMMDeviceFlushState
, node
),
543 VMSTATE_END_OF_LIST()
548 * Assign a token and reserve it for the new flush state.
550 static SpaprNVDIMMDeviceFlushState
*spapr_nvdimm_init_new_flush_state(
551 SpaprNVDIMMDevice
*spapr_nvdimm
)
553 SpaprNVDIMMDeviceFlushState
*state
;
555 state
= g_malloc0(sizeof(*state
));
557 spapr_nvdimm
->nvdimm_flush_token
++;
558 /* Token zero is presumed as no job pending. Assert on overflow to zero */
559 g_assert(spapr_nvdimm
->nvdimm_flush_token
!= 0);
561 state
->continue_token
= spapr_nvdimm
->nvdimm_flush_token
;
563 QLIST_INSERT_HEAD(&spapr_nvdimm
->pending_nvdimm_flush_states
, state
, node
);
569 * spapr_nvdimm_finish_flushes
570 * Waits for all pending flush requests to complete
571 * their execution and free the states
573 void spapr_nvdimm_finish_flushes(void)
575 SpaprNVDIMMDeviceFlushState
*state
, *next
;
576 GSList
*list
, *nvdimms
;
579 * Called on reset path, the main loop thread which calls
580 * the pending BHs has gotten out running in the reset path,
581 * finally reaching here. Other code path being guest
582 * h_client_architecture_support, thats early boot up.
584 nvdimms
= nvdimm_get_device_list();
585 for (list
= nvdimms
; list
; list
= list
->next
) {
586 NVDIMMDevice
*nvdimm
= list
->data
;
587 if (object_dynamic_cast(OBJECT(nvdimm
), TYPE_SPAPR_NVDIMM
)) {
588 SpaprNVDIMMDevice
*s_nvdimm
= SPAPR_NVDIMM(nvdimm
);
589 while (!QLIST_EMPTY(&s_nvdimm
->pending_nvdimm_flush_states
)) {
590 aio_poll(qemu_get_aio_context(), true);
593 QLIST_FOREACH_SAFE(state
, &s_nvdimm
->completed_nvdimm_flush_states
,
595 QLIST_REMOVE(state
, node
);
600 g_slist_free(nvdimms
);
604 * spapr_nvdimm_get_flush_status
605 * Fetches the status of the hcall worker and returns
606 * H_LONG_BUSY_ORDER_10_MSEC if the worker is still running.
608 static int spapr_nvdimm_get_flush_status(SpaprNVDIMMDevice
*s_nvdimm
,
611 SpaprNVDIMMDeviceFlushState
*state
, *node
;
613 QLIST_FOREACH(state
, &s_nvdimm
->pending_nvdimm_flush_states
, node
) {
614 if (state
->continue_token
== token
) {
615 return H_LONG_BUSY_ORDER_10_MSEC
;
619 QLIST_FOREACH_SAFE(state
, &s_nvdimm
->completed_nvdimm_flush_states
,
621 if (state
->continue_token
== token
) {
622 int ret
= state
->hcall_ret
;
623 QLIST_REMOVE(state
, node
);
629 /* If not found in complete list too, invalid token */
635 * Input: drc_index, continue-token
636 * Out: continue-token
637 * Return Value: H_SUCCESS, H_Parameter, H_P2, H_LONG_BUSY_ORDER_10_MSEC,
640 * Given a DRC Index Flush the data to backend NVDIMM device. The hcall returns
641 * H_LONG_BUSY_ORDER_10_MSEC when the flush takes longer time and the hcall
642 * needs to be issued multiple times in order to be completely serviced. The
643 * continue-token from the output to be passed in the argument list of
644 * subsequent hcalls until the hcall is completely serviced at which point
645 * H_SUCCESS or other error is returned.
647 static target_ulong
h_scm_flush(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
648 target_ulong opcode
, target_ulong
*args
)
651 uint32_t drc_index
= args
[0];
652 uint64_t continue_token
= args
[1];
653 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
655 HostMemoryBackend
*backend
= NULL
;
656 SpaprNVDIMMDeviceFlushState
*state
;
657 ThreadPool
*pool
= aio_get_thread_pool(qemu_get_aio_context());
660 if (!drc
|| !drc
->dev
||
661 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
665 dimm
= PC_DIMM(drc
->dev
);
666 if (!object_dynamic_cast(OBJECT(dimm
), TYPE_SPAPR_NVDIMM
)) {
669 if (continue_token
== 0) {
670 bool is_pmem
= false, pmem_override
= false;
671 backend
= MEMORY_BACKEND(dimm
->hostmem
);
672 fd
= memory_region_get_fd(&backend
->mr
);
675 return H_UNSUPPORTED
;
678 is_pmem
= object_property_get_bool(OBJECT(backend
), "pmem", NULL
);
679 pmem_override
= object_property_get_bool(OBJECT(dimm
),
680 "pmem-override", NULL
);
681 if (is_pmem
&& !pmem_override
) {
682 return H_UNSUPPORTED
;
685 state
= spapr_nvdimm_init_new_flush_state(SPAPR_NVDIMM(dimm
));
690 state
->drcidx
= drc_index
;
692 thread_pool_submit_aio(pool
, flush_worker_cb
, state
,
693 spapr_nvdimm_flush_completion_cb
, state
);
695 continue_token
= state
->continue_token
;
698 ret
= spapr_nvdimm_get_flush_status(SPAPR_NVDIMM(dimm
), continue_token
);
699 if (H_IS_LONG_BUSY(ret
)) {
700 args
[0] = continue_token
;
706 static target_ulong
h_scm_unbind_mem(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
707 target_ulong opcode
, target_ulong
*args
)
709 uint32_t drc_index
= args
[0];
710 uint64_t starting_scm_logical_addr
= args
[1];
711 uint64_t no_of_scm_blocks_to_unbind
= args
[2];
712 uint64_t continue_token
= args
[3];
713 uint64_t size_to_unbind
;
714 Range blockrange
= range_empty
;
715 Range nvdimmrange
= range_empty
;
716 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
717 NVDIMMDevice
*nvdimm
;
720 if (!drc
|| !drc
->dev
||
721 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
725 /* continue_token should be zero as this hcall doesn't return H_BUSY. */
726 if (continue_token
> 0) {
730 /* Check if starting_scm_logical_addr is block aligned */
731 if (!QEMU_IS_ALIGNED(starting_scm_logical_addr
,
732 SPAPR_MINIMUM_SCM_BLOCK_SIZE
)) {
736 size_to_unbind
= no_of_scm_blocks_to_unbind
* SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
737 if (no_of_scm_blocks_to_unbind
== 0 || no_of_scm_blocks_to_unbind
!=
738 size_to_unbind
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
) {
742 nvdimm
= NVDIMM(drc
->dev
);
743 size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
745 addr
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_ADDR_PROP
,
748 range_init_nofail(&nvdimmrange
, addr
, size
);
749 range_init_nofail(&blockrange
, starting_scm_logical_addr
, size_to_unbind
);
751 if (!range_contains_range(&nvdimmrange
, &blockrange
)) {
755 args
[1] = no_of_scm_blocks_to_unbind
;
757 /* let unplug take care of actual unbind */
761 #define H_UNBIND_SCOPE_ALL 0x1
762 #define H_UNBIND_SCOPE_DRC 0x2
764 static target_ulong
h_scm_unbind_all(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
765 target_ulong opcode
, target_ulong
*args
)
767 uint64_t target_scope
= args
[0];
768 uint32_t drc_index
= args
[1];
769 uint64_t continue_token
= args
[2];
770 NVDIMMDevice
*nvdimm
;
772 uint64_t no_of_scm_blocks_unbound
= 0;
774 /* continue_token should be zero as this hcall doesn't return H_BUSY. */
775 if (continue_token
> 0) {
779 if (target_scope
== H_UNBIND_SCOPE_DRC
) {
780 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
782 if (!drc
|| !drc
->dev
||
783 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
787 nvdimm
= NVDIMM(drc
->dev
);
788 size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
791 no_of_scm_blocks_unbound
= size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
792 } else if (target_scope
== H_UNBIND_SCOPE_ALL
) {
793 GSList
*list
, *nvdimms
;
795 nvdimms
= nvdimm_get_device_list();
796 for (list
= nvdimms
; list
; list
= list
->next
) {
798 size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
801 no_of_scm_blocks_unbound
+= size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
803 g_slist_free(nvdimms
);
808 args
[1] = no_of_scm_blocks_unbound
;
810 /* let unplug take care of actual unbind */
814 static target_ulong
h_scm_health(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
815 target_ulong opcode
, target_ulong
*args
)
818 NVDIMMDevice
*nvdimm
;
819 uint64_t hbitmap
= 0;
820 uint32_t drc_index
= args
[0];
821 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
822 const uint64_t hbitmap_mask
= PAPR_PMEM_UNARMED
;
825 /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */
826 if (!drc
|| !drc
->dev
||
827 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
831 nvdimm
= NVDIMM(drc
->dev
);
833 /* Update if the nvdimm is unarmed and send its status via health bitmaps */
834 if (object_property_get_bool(OBJECT(nvdimm
), NVDIMM_UNARMED_PROP
, NULL
)) {
835 hbitmap
|= PAPR_PMEM_UNARMED
;
838 /* Update the out args with health bitmap/mask */
840 args
[1] = hbitmap_mask
;
845 static void spapr_scm_register_types(void)
847 /* qemu/scm specific hcalls */
848 spapr_register_hypercall(H_SCM_READ_METADATA
, h_scm_read_metadata
);
849 spapr_register_hypercall(H_SCM_WRITE_METADATA
, h_scm_write_metadata
);
850 spapr_register_hypercall(H_SCM_BIND_MEM
, h_scm_bind_mem
);
851 spapr_register_hypercall(H_SCM_UNBIND_MEM
, h_scm_unbind_mem
);
852 spapr_register_hypercall(H_SCM_UNBIND_ALL
, h_scm_unbind_all
);
853 spapr_register_hypercall(H_SCM_HEALTH
, h_scm_health
);
854 spapr_register_hypercall(H_SCM_FLUSH
, h_scm_flush
);
857 type_init(spapr_scm_register_types
)
859 static void spapr_nvdimm_realize(NVDIMMDevice
*dimm
, Error
**errp
)
861 SpaprNVDIMMDevice
*s_nvdimm
= SPAPR_NVDIMM(dimm
);
862 HostMemoryBackend
*backend
= MEMORY_BACKEND(PC_DIMM(dimm
)->hostmem
);
863 bool is_pmem
= object_property_get_bool(OBJECT(backend
), "pmem", NULL
);
864 bool pmem_override
= object_property_get_bool(OBJECT(dimm
), "pmem-override",
866 if (!is_pmem
|| pmem_override
) {
867 s_nvdimm
->hcall_flush_required
= true;
870 vmstate_register(NULL
, VMSTATE_INSTANCE_ID_ANY
,
871 &vmstate_spapr_nvdimm_states
, dimm
);
874 static void spapr_nvdimm_unrealize(NVDIMMDevice
*dimm
)
876 vmstate_unregister(NULL
, &vmstate_spapr_nvdimm_states
, dimm
);
879 static Property spapr_nvdimm_properties
[] = {
880 #ifdef CONFIG_LIBPMEM
881 DEFINE_PROP_BOOL("pmem-override", SpaprNVDIMMDevice
, pmem_override
, false),
883 DEFINE_PROP_END_OF_LIST(),
886 static void spapr_nvdimm_class_init(ObjectClass
*oc
, void *data
)
888 DeviceClass
*dc
= DEVICE_CLASS(oc
);
889 NVDIMMClass
*nvc
= NVDIMM_CLASS(oc
);
891 nvc
->realize
= spapr_nvdimm_realize
;
892 nvc
->unrealize
= spapr_nvdimm_unrealize
;
894 device_class_set_props(dc
, spapr_nvdimm_properties
);
897 static void spapr_nvdimm_init(Object
*obj
)
899 SpaprNVDIMMDevice
*s_nvdimm
= SPAPR_NVDIMM(obj
);
901 s_nvdimm
->hcall_flush_required
= false;
902 QLIST_INIT(&s_nvdimm
->pending_nvdimm_flush_states
);
903 QLIST_INIT(&s_nvdimm
->completed_nvdimm_flush_states
);
906 static TypeInfo spapr_nvdimm_info
= {
907 .name
= TYPE_SPAPR_NVDIMM
,
908 .parent
= TYPE_NVDIMM
,
909 .class_init
= spapr_nvdimm_class_init
,
910 .class_size
= sizeof(SPAPRNVDIMMClass
),
911 .instance_size
= sizeof(SpaprNVDIMMDevice
),
912 .instance_init
= spapr_nvdimm_init
,
915 static void spapr_nvdimm_register_types(void)
917 type_register_static(&spapr_nvdimm_info
);
920 type_init(spapr_nvdimm_register_types
)