struct page_array { /* Array that stores pages need to pin. */
dma_addr_t *pa_iova; /* Array that receives the pinned pages. */ struct page **pa_page; /* Number of pages pinned from @pa_iova. */ int pa_nr;
};
struct ccwchain { struct list_head next; struct ccw1 *ch_ccw; /* Guest physical address of the current chain. */
u64 ch_iova; /* Count of the valid ccws in chain. */ int ch_len; /* Pinned PAGEs for the original data. */ struct page_array *ch_pa;
};
/* * page_array_alloc() - alloc memory for page array * @pa: page_array on which to perform the operation * @len: number of pages that should be pinned from @iova * * Attempt to allocate memory for page array. * * Usage of page_array: * We expect (pa_nr == 0) and (pa_iova == NULL), any field in * this structure will be filled in by this function. * * Returns: * 0 if page array is allocated * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL * -ENOMEM if alloc failed
*/ staticint page_array_alloc(struct page_array *pa, unsignedint len)
{ if (pa->pa_nr || pa->pa_iova) return -EINVAL;
if (len == 0) return -EINVAL;
pa->pa_nr = len;
pa->pa_iova = kcalloc(len, sizeof(*pa->pa_iova), GFP_KERNEL); if (!pa->pa_iova) return -ENOMEM;
/* * page_array_unpin() - Unpin user pages in memory * @pa: page_array on which to perform the operation * @vdev: the vfio device to perform the operation * @pa_nr: number of user pages to unpin * @unaligned: were pages unaligned on the pin request * * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0, * otherwise only clear pa->pa_nr
*/ staticvoid page_array_unpin(struct page_array *pa, struct vfio_device *vdev, int pa_nr, bool unaligned)
{ int unpinned = 0, npage = 1;
/* * page_array_pin() - Pin user pages in memory * @pa: page_array on which to perform the operation * @vdev: the vfio device to perform pin operations * @unaligned: are pages aligned to 4K boundary? * * Returns number of pages pinned upon success. * If the pin request partially succeeds, or fails completely, * all pages are left unpinned and a negative error value is returned. * * Requests to pin "aligned" pages can be coalesced into a single * vfio_pin_pages request for the sake of efficiency, based on the * expectation of 4K page requests. Unaligned requests are probably * dealing with 2K "pages", and cannot be coalesced without * reworking this logic to incorporate that math.
*/ staticint page_array_pin(struct page_array *pa, struct vfio_device *vdev, bool unaligned)
{ int pinned = 0, npage = 1; int ret = 0;
for (i = 0; i < pa->pa_nr; i++) {
pfn = pa->pa_iova[i] >> PAGE_SHIFT; if (pfn >= iova_pfn_start && pfn <= iova_pfn_end) returntrue;
}
returnfalse;
} /* Create the list of IDAL words for a page_array. */ staticinlinevoid page_array_idal_create_words(struct page_array *pa,
dma64_t *idaws)
{ int i;
/* * Idal words (execept the first one) rely on the memory being 4k * aligned. If a user virtual address is 4K aligned, then it's * corresponding kernel physical address will also be 4K aligned. Thus * there will be no problem here to simply use the phys to create an * idaw.
*/
for (i = 0; i < pa->pa_nr; i++) {
idaws[i] = virt_to_dma64(page_to_virt(pa->pa_page[i]));
/* Incorporate any offset from each starting address */
idaws[i] = dma64_add(idaws[i], pa->pa_iova[i] & ~PAGE_MASK);
}
}
/* * ccw_does_data_transfer() * * Determine whether a CCW will move any data, such that the guest pages * would need to be pinned before performing the I/O. * * Returns 1 if yes, 0 if no.
*/ staticinlineint ccw_does_data_transfer(struct ccw1 *ccw)
{ /* If the count field is zero, then no data will be transferred */ if (ccw->count == 0) return 0;
/* If the command is a NOP, then no data will be transferred */ if (ccw_is_noop(ccw)) return 0;
/* If the skip flag is off, then data will be transferred */ if (!ccw_is_skip(ccw)) return 1;
/* * If the skip flag is on, it is only meaningful if the command * code is a read, read backward, sense, or sense ID. In those * cases, no data will be transferred.
*/ if (ccw_is_read(ccw) || ccw_is_read_backward(ccw)) return 0;
if (ccw_is_sense(ccw)) return 0;
/* The skip flag is on, but it is ignored for this command code. */ return 1;
}
/* * is_cpa_within_range() * * @cpa: channel program address being questioned * @head: address of the beginning of a CCW chain * @len: number of CCWs within the chain * * Determine whether the address of a CCW (whether a new chain, * or the target of a TIC) falls within a range (including the end points). * * Returns 1 if yes, 0 if no.
*/ staticinlineint is_cpa_within_range(dma32_t cpa, u32 head, int len)
{
u32 tail = head + (len - 1) * sizeof(struct ccw1);
u32 gcpa = dma32_to_u32(cpa);
return head <= gcpa && gcpa <= tail;
}
staticinlineint is_tic_within_range(struct ccw1 *ccw, u32 head, int len)
{ if (!ccw_is_tic(ccw)) return 0;
/* Free resource for a ccw that allocated memory for its cda. */ staticvoid ccwchain_cda_free(struct ccwchain *chain, int idx)
{ struct ccw1 *ccw = &chain->ch_ccw[idx];
if (ccw_is_tic(ccw)) return;
kfree(dma32_to_virt(ccw->cda));
}
/** * ccwchain_calc_length - calculate the length of the ccw chain. * @iova: guest physical address of the target ccw chain * @cp: channel_program on which to perform the operation * * This is the chain length not considering any TICs. * You need to do a new round for each TIC target. * * The program is also validated for absence of not yet supported * indirect data addressing scenarios. * * Returns: the length of the ccw chain or -errno.
*/ staticint ccwchain_calc_length(u64 iova, struct channel_program *cp)
{ struct ccw1 *ccw = cp->guest_cp; int cnt = 0;
do {
cnt++;
/* * We want to keep counting if the current CCW has the * command-chaining flag enabled, or if it is a TIC CCW * that loops back into the current chain. The latter * is used for device orientation, where the CCW PRIOR to * the TIC can either jump to the TIC or a CCW immediately * after the TIC, depending on the results of its operation.
*/ if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt)) break;
gcda = dma32_to_u32(cda); /* Copy 2K (the most we support today) of possible CCWs */
ret = vfio_dma_rw(vdev, gcda, cp->guest_cp, CCWCHAIN_LEN_MAX * sizeof(struct ccw1), false); if (ret) return ret;
/* Convert any Format-0 CCWs to Format-1 */ if (!cp->orb.cmd.fmt)
convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
/* Count the CCWs in the current chain */
len = ccwchain_calc_length(gcda, cp); if (len < 0) return len;
/* Need alloc a new chain for this one. */
chain = ccwchain_alloc(cp, len); if (!chain) return -ENOMEM;
chain->ch_len = len;
chain->ch_iova = gcda;
/* Copy the actual CCWs into the new chain */
memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
/* Loop for tics on this new chain. */
ret = ccwchain_loop_tic(chain, cp);
if (ret)
ccwchain_free(chain);
return ret;
}
/* Loop for TICs. */ staticint ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
{ struct ccw1 *tic; int i, ret;
for (i = 0; i < chain->ch_len; i++) {
tic = &chain->ch_ccw[i];
if (!ccw_is_tic(tic)) continue;
/* May transfer to an existing chain. */ if (tic_target_chain_exists(tic, cp)) continue;
/* Build a ccwchain for the next segment */
ret = ccwchain_handle_ccw(tic->cda, cp); if (ret) return ret;
}
if (ccw_is_idal(ccw)) { /* Copy IDAL from guest */
ret = vfio_dma_rw(vdev, dma32_to_u32(ccw->cda), idaws, idal_len, false); if (ret) {
kfree(idaws); return ERR_PTR(ret);
}
} else { /* Fabricate an IDAL based off CCW data address */ if (cp->orb.cmd.c64) {
idaws[0] = u64_to_dma64(dma32_to_u32(ccw->cda)); for (i = 1; i < idaw_nr; i++) {
idaws[i] = dma64_add(idaws[i - 1], idaw_size);
idaws[i] = dma64_and(idaws[i], idaw_mask);
}
} else {
idaws_f1 = (dma32_t *)idaws;
idaws_f1[0] = ccw->cda; for (i = 1; i < idaw_nr; i++) {
idaws_f1[i] = dma32_add(idaws_f1[i - 1], idaw_size);
idaws_f1[i] = dma32_and(idaws_f1[i], idaw_mask);
}
}
}
return idaws;
}
/* * ccw_count_idaws() - Calculate the number of IDAWs needed to transfer * a specified amount of data * * @ccw: The Channel Command Word being translated * @cp: Channel Program being processed * * The ORB is examined, since it specifies what IDAWs could actually be * used by any CCW in the channel program, regardless of whether or not * the CCW actually does. An ORB that does not specify Format-2-IDAW * Control could still contain a CCW with an IDAL, which would be * Format-1 and thus only move 2K with each IDAW. Thus all CCWs within * the channel program must follow the same size requirements.
*/ staticint ccw_count_idaws(struct ccw1 *ccw, struct channel_program *cp)
{ struct vfio_device *vdev =
&container_of(cp, struct vfio_ccw_private, cp)->vdev;
u64 iova; int size = cp->orb.cmd.c64 ? sizeof(u64) : sizeof(u32); int ret; int bytes = 1;
if (ccw->count)
bytes = ccw->count;
if (ccw_is_idal(ccw)) { /* Read first IDAW to check its starting address. */ /* All subsequent IDAWs will be 2K- or 4K-aligned. */
ret = vfio_dma_rw(vdev, dma32_to_u32(ccw->cda), &iova, size, false); if (ret) return ret;
/* * Format-1 IDAWs only occupy the first 32 bits, * and bit 0 is always off.
*/ if (!cp->orb.cmd.c64)
iova = iova >> 32;
} else {
iova = dma32_to_u32(ccw->cda);
}
/* Format-1 IDAWs operate on 2K each */ if (!cp->orb.cmd.c64) return idal_2k_nr_words((void *)iova, bytes);
/* Using the 2K variant of Format-2 IDAWs? */ if (cp->orb.cmd.i2k) return idal_2k_nr_words((void *)iova, bytes);
/* The 'usual' case is 4K Format-2 IDAWs */ return idal_nr_words((void *)iova, bytes);
}
staticint ccwchain_fetch_ccw(struct ccw1 *ccw, struct page_array *pa, struct channel_program *cp)
{ struct vfio_device *vdev =
&container_of(cp, struct vfio_ccw_private, cp)->vdev;
dma64_t *idaws;
dma32_t *idaws_f1; int ret; int idaw_nr; int i;
/* Calculate size of IDAL */
idaw_nr = ccw_count_idaws(ccw, cp); if (idaw_nr < 0) return idaw_nr;
/* Allocate an IDAL from host storage */
idaws = get_guest_idal(ccw, cp, idaw_nr); if (IS_ERR(idaws)) {
ret = PTR_ERR(idaws); goto out_init;
}
/* * Allocate an array of pages to pin/translate. * The number of pages is actually the count of the idaws * required for the data transfer, since we only only support * 4K IDAWs today.
*/
ret = page_array_alloc(pa, idaw_nr); if (ret < 0) goto out_free_idaws;
/* * Copy guest IDAWs into page_array, in case the memory they * occupy is not contiguous.
*/
idaws_f1 = (dma32_t *)idaws; for (i = 0; i < idaw_nr; i++) { if (cp->orb.cmd.c64)
pa->pa_iova[i] = dma64_to_u64(idaws[i]); else
pa->pa_iova[i] = dma32_to_u32(idaws_f1[i]);
}
if (ccw_does_data_transfer(ccw)) {
ret = page_array_pin(pa, vdev, idal_is_2k(cp)); if (ret < 0) goto out_unpin;
} else {
pa->pa_nr = 0;
}
/* * Fetch one ccw. * To reduce memory copy, we'll pin the cda page in memory, * and to get rid of the cda 2G limitation of ccw1, we'll translate * direct ccws to idal ccws.
*/ staticint ccwchain_fetch_one(struct ccw1 *ccw, struct page_array *pa, struct channel_program *cp)
{ if (ccw_is_tic(ccw)) return ccwchain_fetch_tic(ccw, cp);
return ccwchain_fetch_ccw(ccw, pa, cp);
}
/** * cp_init() - allocate ccwchains for a channel program. * @cp: channel_program on which to perform the operation * @orb: control block for the channel program from the guest * * This creates one or more ccwchain(s), and copies the raw data of * the target channel program from @orb->cmd.iova to the new ccwchain(s). * * Limitations: * 1. Supports idal(c64) ccw chaining. * 2. Supports 4k idaw. * * Returns: * %0 on success and a negative error value on failure.
*/ int cp_init(struct channel_program *cp, union orb *orb)
{ struct vfio_device *vdev =
&container_of(cp, struct vfio_ccw_private, cp)->vdev; /* custom ratelimit used to avoid flood during guest IPL */ static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1); int ret;
/* this is an error in the caller */ if (cp->initialized) return -EBUSY;
/* * We only support prefetching the channel program. We assume all channel * programs executed by supported guests likewise support prefetching. * Executing a channel program that does not specify prefetching will * typically not cause an error, but a warning is issued to help identify * the problem if something does break.
*/ if (!orb->cmd.pfch && __ratelimit(&ratelimit_state))
dev_warn(
vdev->dev, "Prefetching channel program even though prefetch not specified in ORB");
/* Build a ccwchain for the first CCW segment */
ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
if (!ret)
cp->initialized = true;
return ret;
}
/** * cp_free() - free resources for channel program. * @cp: channel_program on which to perform the operation * * This unpins the memory pages and frees the memory space occupied by * @cp, which must have been returned by a previous call to cp_init(). * Otherwise, undefined behavior occurs.
*/ void cp_free(struct channel_program *cp)
{ struct vfio_device *vdev =
&container_of(cp, struct vfio_ccw_private, cp)->vdev; struct ccwchain *chain, *temp; int i;
if (!cp->initialized) return;
cp->initialized = false;
list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) { for (i = 0; i < chain->ch_len; i++) {
page_array_unpin_free(&chain->ch_pa[i], vdev, idal_is_2k(cp));
ccwchain_cda_free(chain, i);
}
ccwchain_free(chain);
}
}
/** * cp_prefetch() - translate a guest physical address channel program to * a real-device runnable channel program. * @cp: channel_program on which to perform the operation * * This function translates the guest-physical-address channel program * and stores the result to ccwchain list. @cp must have been * initialized by a previous call with cp_init(). Otherwise, undefined * behavior occurs. * For each chain composing the channel program: * - On entry ch_len holds the count of CCWs to be translated. * - On exit ch_len is adjusted to the count of successfully translated CCWs. * This allows cp_free to find in ch_len the count of CCWs to free in a chain. * * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced * as helpers to do ccw chain translation inside the kernel. Basically * they accept a channel program issued by a virtual machine, and * translate the channel program to a real-device runnable channel * program. * * These APIs will copy the ccws into kernel-space buffers, and update * the guest physical addresses with their corresponding host physical * addresses. Then channel I/O device drivers could issue the * translated channel program to real devices to perform an I/O * operation. * * These interfaces are designed to support translation only for * channel programs, which are generated and formatted by a * guest. Thus this will make it possible for things like VFIO to * leverage the interfaces to passthrough a channel I/O mediated * device in QEMU. * * We support direct ccw chaining by translating them to idal ccws. * * Returns: * %0 on success and a negative error value on failure.
*/ int cp_prefetch(struct channel_program *cp)
{ struct ccwchain *chain; struct ccw1 *ccw; struct page_array *pa; int len, idx, ret;
/* this is an error in the caller */ if (!cp->initialized) return -EINVAL;
list_for_each_entry(chain, &cp->ccwchain_list, next) {
len = chain->ch_len; for (idx = 0; idx < len; idx++) {
ccw = &chain->ch_ccw[idx];
pa = &chain->ch_pa[idx];
ret = ccwchain_fetch_one(ccw, pa, cp); if (ret) goto out_err;
}
}
return 0;
out_err: /* Only cleanup the chain elements that were actually translated. */
chain->ch_len = idx;
list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
chain->ch_len = 0;
} return ret;
}
/** * cp_get_orb() - get the orb of the channel program * @cp: channel_program on which to perform the operation * @sch: subchannel the operation will be performed against * * This function returns the address of the updated orb of the channel * program. Channel I/O device drivers could use this orb to issue a * ssch.
*/ union orb *cp_get_orb(struct channel_program *cp, struct subchannel *sch)
{ union orb *orb; struct ccwchain *chain; struct ccw1 *cpa;
/* this is an error in the caller */ if (!cp->initialized) return NULL;
/* * Everything built by vfio-ccw is a Format-2 IDAL. * If the input was a Format-1 IDAL, indicate that * 2K Format-2 IDAWs were created here.
*/ if (!orb->cmd.c64)
orb->cmd.i2k = 1;
orb->cmd.c64 = 1;
/** * cp_update_scsw() - update scsw for a channel program. * @cp: channel_program on which to perform the operation * @scsw: I/O results of the channel program and also the target to be * updated * * @scsw contains the I/O results of the channel program that pointed * to by @cp. However what @scsw->cpa stores is a host physical * address, which is meaningless for the guest, which is waiting for * the I/O results. * * This function updates @scsw->cpa to its coressponding guest physical * address.
*/ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
{ struct ccwchain *chain;
dma32_t cpa = scsw->cmd.cpa;
u32 ccw_head;
if (!cp->initialized) return;
/* * LATER: * For now, only update the cmd.cpa part. We may need to deal with * other portions of the schib as well, even if we don't return them * in the ioctl directly. Path status changes etc.
*/
list_for_each_entry(chain, &cp->ccwchain_list, next) {
ccw_head = dma32_to_u32(virt_to_dma32(chain->ch_ccw)); /* * On successful execution, cpa points just beyond the end * of the chain.
*/ if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) { /* * (cpa - ccw_head) is the offset value of the host * physical ccw to its chain head. * Adding this value to the guest physical ccw chain * head gets us the guest cpa: * cpa = chain->ch_iova + (cpa - ccw_head)
*/
cpa = dma32_add(cpa, chain->ch_iova - ccw_head); break;
}
}
scsw->cmd.cpa = cpa;
}
/** * cp_iova_pinned() - check if an iova is pinned for a ccw chain. * @cp: channel_program on which to perform the operation * @iova: the iova to check * @length: the length to check from @iova * * If the @iova is currently pinned for the ccw chain, return true; * else return false.
*/ bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length)
{ struct ccwchain *chain; int i;
if (!cp->initialized) returnfalse;
list_for_each_entry(chain, &cp->ccwchain_list, next) { for (i = 0; i < chain->ch_len; i++) if (page_array_iova_pinned(&chain->ch_pa[i], iova, length)) returntrue;
}
returnfalse;
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.30 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.