/* * Multiply a 64-bit count by a 64-bit tick 'period' in units of seconds >> 64 * and add the fractional second part of the reference time. * * The result is a 128-bit value, the top 64 bits of which are seconds, and * the low 64 bits are (seconds >> 64).
*/ static uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta,
uint64_t period, uint8_t shift,
uint64_t frac_sec)
{ unsigned __int128 res = (unsigned __int128)delta * period;
res >>= shift;
res += frac_sec;
*res_hi = res >> 64; return (uint64_t)res;
}
#ifdef CONFIG_X86 /* * We'd expect the hypervisor to know this and to report the clock * status as VMCLOCK_STATUS_UNRELIABLE. But be paranoid.
*/ if (check_tsc_unstable()) return -EINVAL; #endif
while (1) {
seq = le32_to_cpu(st->clk->seq_count) & ~1ULL;
/* * This pairs with a write barrier in the hypervisor * which populates this structure.
*/
virt_rmb();
if (st->clk->clock_status == VMCLOCK_STATUS_UNRELIABLE) return -EINVAL;
/* * When invoked for gettimex64(), fill in the pre/post system * times. The simple case is when system time is based on the * same counter as st->cs_id, in which case all three times * will be derived from the *same* counter value. * * If the system isn't using the same counter, then the value * from ktime_get_snapshot() will still be used as pre_ts, and * ptp_read_system_postts() is called to populate postts after * calling get_cycles(). * * The conversion to timespec64 happens further down, outside * the seq_count loop.
*/ if (sts) {
ktime_get_snapshot(&systime_snapshot); if (systime_snapshot.cs_id == st->cs_id) {
cycle = systime_snapshot.cycles;
} else {
cycle = get_cycles();
ptp_read_system_postts(sts);
}
} else {
cycle = get_cycles();
}
if (!tai_adjust(st->clk, &tspec->tv_sec)) return -EINVAL;
/* * This pairs with a write barrier in the hypervisor * which populates this structure.
*/
virt_rmb(); if (seq == le32_to_cpu(st->clk->seq_count)) break;
if (ktime_after(ktime_get(), deadline)) return -ETIMEDOUT;
}
if (system_counter) {
system_counter->cycles = cycle;
system_counter->cs_id = st->cs_id;
}
if (sts) {
sts->pre_ts = ktime_to_timespec64(systime_snapshot.real); if (systime_snapshot.cs_id == st->cs_id)
sts->post_ts = sts->pre_ts;
}
return 0;
}
#ifdef SUPPORT_KVMCLOCK /* * In the case where the system is using the KVM clock for timekeeping, convert * the TSC value into a KVM clock time in order to return a paired reading that * get_device_system_crosststamp() can cope with.
*/ staticint vmclock_get_crosststamp_kvmclock(struct vmclock_state *st, struct ptp_system_timestamp *sts, struct system_counterval_t *system_counter, struct timespec64 *tspec)
{ struct pvclock_vcpu_time_info *pvti = this_cpu_pvti(); unsignedint pvti_ver; int ret;
preempt_disable_notrace();
do {
pvti_ver = pvclock_read_begin(pvti);
ret = vmclock_get_crosststamp(st, sts, system_counter, tspec); if (ret) break;
/* * This retry should never really happen; if the TSC is * stable and reliable enough across vCPUS that it is sane * for the hypervisor to expose a VMCLOCK device which uses * it as the reference counter, then the KVM clock sohuld be * in 'master clock mode' and basically never changed. But * the KVM clock is a fickle and often broken thing, so do * it "properly" just in case.
*/
} while (pvclock_read_retry(pvti, pvti_ver));
#ifdef SUPPORT_KVMCLOCK if (READ_ONCE(st->sys_cs_id) == CSID_X86_KVM_CLK)
ret = vmclock_get_crosststamp_kvmclock(st, NULL, system_counter,
&tspec); else #endif
ret = vmclock_get_crosststamp(st, NULL, system_counter, &tspec);
if (!ret)
*device_time = timespec64_to_ktime(tspec);
return ret;
}
staticint ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp, struct system_device_crosststamp *xtstamp)
{ struct vmclock_state *st = container_of(ptp, struct vmclock_state,
ptp_clock_info); int ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, st,
NULL, xtstamp); #ifdef SUPPORT_KVMCLOCK /* * On x86, the KVM clock may be used for the system time. We can * actually convert a TSC reading to that, and return a paired * timestamp that get_device_system_crosststamp() *can* handle.
*/ if (ret == -ENODEV) { struct system_time_snapshot systime_snapshot;
/* * This should never happen as this function is only called when * has_acpi_companion(dev) is true, but the logic is sufficiently * complex that Coverity can't see the tautology.
*/ if (!adev) return -ENODEV;
status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
vmclock_acpi_resources, st); if (ACPI_FAILURE(status) || resource_type(&st->res) != IORESOURCE_MEM) {
dev_err(dev, "failed to get resources\n"); return -ENODEV;
}
ret = ida_alloc(&vmclock_ida, GFP_KERNEL); if (ret < 0) return ret;
st->index = ret;
ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st); if (ret) return ret;
st->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "vmclock%d", st->index); if (!st->name) return -ENOMEM;
st->miscdev.minor = MISC_DYNAMIC_MINOR;
ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, st); if (ret) return ret;
/* * If the structure is big enough, it can be mapped to userspace. * Theoretically a guest OS even using larger pages could still * use 4KiB PTEs to map smaller MMIO regions like this, but let's * cross that bridge if/when we come to it.
*/ if (le32_to_cpu(st->clk->size) >= PAGE_SIZE) {
st->miscdev.fops = &vmclock_miscdev_fops;
st->miscdev.name = st->name;
ret = misc_register(&st->miscdev); if (ret) return ret;
}
/* If there is valid clock information, register a PTP clock */ if (VMCLOCK_FIELD_PRESENT(st->clk, time_frac_sec)) { /* Can return a silent NULL, or an error. */
st->ptp_clock = vmclock_ptp_register(dev, st); if (IS_ERR(st->ptp_clock)) {
ret = PTR_ERR(st->ptp_clock);
st->ptp_clock = NULL; return ret;
}
}
if (!st->miscdev.minor && !st->ptp_clock) { /* Neither miscdev nor PTP registered */
dev_info(dev, "vmclock: Neither miscdev nor PTP available; not registering\n"); return -ENODEV;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.