Skip to content

Commit b5ef686

Browse files
committed
kvm-ioctls: add dirty log ring support for all architectures
Implement dirty log ring interface with `enable_dirty_log_ring` and `dirty_log_ring_iter` methods. Enable `VmFd` `enable_cap` and ioctl imports on all architectures. Add memory fences in iterator for proper synchronization on weak memory consistency architectures. Signed-off-by: David Kleymann <[email protected]>
1 parent bd3260e commit b5ef686

File tree

7 files changed

+560
-10
lines changed

7 files changed

+560
-10
lines changed

kvm-bindings/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,7 @@ pub use self::arm64::*;
3939
mod riscv64;
4040
#[cfg(target_arch = "riscv64")]
4141
pub use self::riscv64::*;
42+
43+
// linux defines these based on _BITUL macros and bindgen fails to generate them
44+
pub const KVM_DIRTY_GFN_F_DIRTY: u32 = 0b1;
45+
pub const KVM_DIRTY_GFN_F_RESET: u32 = 0b10;

kvm-ioctls/CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,27 @@
22

33
## Upcoming Release
44

5+
### Fixed
6+
7+
- Fixed `VmFd::enable_cap` available for all architectures
8+
9+
### Added
10+
11+
- Added `KvmDirtyLogRing` structure to mmap the dirty log ring.
12+
- Added `KVM_DIRTY_GFN_F_DIRTY` and `KVM_DIRTY_GFN_F_RESET` bitflags.
13+
- Added `KvmDirtyLogRing` iterator type for accessing dirty log entries.
14+
- Added `dirty_log_ring` field to `VcpuFd` to access per-vCpu dirty rings.
15+
- Inserted fences in KvmDirtyLogRing iterator `next` for architectures with weak memory consistency that require Acquire/Release
16+
- Added `DirtyLogRingInfo` struct and `dirty_log_ring_info` field to `VmFd` to
17+
track dirty ring configuration.
18+
- Added `enable_dirty_log_ring` function on `VmFd` to check corresponding
19+
capabilities and enable KVM's dirty log ring.
20+
- Added `VcpuFd::dirty_log_ring_iter()` to iterate over dirty guest frame numbers.
21+
- Added `VmFd::reset_dirty_rings()` to reset all dirty rings for the VM.
22+
- Added `VcpuExit::DirtyRingFull` for `KVM_EXIT_DIRTY_RING_FULL`.
23+
24+
- Plumb through `KVM_CAP_DIRTY_LOG_RING` as DirtyLogRing cap.
25+
526
## v0.24.0
627

728
### Added

kvm-ioctls/src/cap.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,4 +169,5 @@ pub enum Cap {
169169
NestedState = KVM_CAP_NESTED_STATE,
170170
#[cfg(target_arch = "x86_64")]
171171
X2ApicApi = KVM_CAP_X2APIC_API,
172+
DirtyLogRing = KVM_CAP_DIRTY_LOG_RING,
172173
}

kvm-ioctls/src/ioctls/mod.rs

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88
use std::mem::size_of;
99
use std::os::unix::io::AsRawFd;
1010
use std::ptr::{NonNull, null_mut};
11+
use std::sync::atomic::{Ordering, fence};
1112

1213
use kvm_bindings::{
13-
KVM_COALESCED_MMIO_PAGE_OFFSET, kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_run,
14+
KVM_COALESCED_MMIO_PAGE_OFFSET, KVM_DIRTY_GFN_F_DIRTY, KVM_DIRTY_GFN_F_RESET,
15+
KVM_DIRTY_LOG_PAGE_OFFSET, kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_dirty_gfn, kvm_run,
1416
};
1517
use vmm_sys_util::errno;
1618

@@ -29,6 +31,121 @@ pub mod vm;
2931
/// is otherwise a direct mapping to Result.
3032
pub type Result<T> = std::result::Result<T, errno::Error>;
3133

34+
/// A wrapper around the KVM dirty log ring page.
35+
#[derive(Debug)]
36+
pub(crate) struct KvmDirtyLogRing {
37+
/// Next potentially dirty guest frame number slot index
38+
next_dirty: u64,
39+
/// Memory-mapped array of dirty guest frame number entries
40+
gfns: NonNull<kvm_dirty_gfn>,
41+
/// Ring size mask (size-1) for efficient modulo operations
42+
mask: u64,
43+
/// `true` if we need to use Acquire/Release memory ordering
44+
use_acq_rel: bool,
45+
}
46+
47+
impl KvmDirtyLogRing {
48+
/// Maps the KVM dirty log ring from the vCPU file descriptor.
49+
///
50+
/// # Arguments
51+
/// * `fd` - vCPU file descriptor to mmap from.
52+
/// * `size` - Size of memory region in bytes.
53+
pub(crate) fn mmap_from_fd<F: AsRawFd>(
54+
fd: &F,
55+
bytes: usize,
56+
use_acq_rel: bool,
57+
) -> Result<Self> {
58+
// SAFETY: We trust the sysconf libc function and we're calling it
59+
// with a correct parameter.
60+
let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } {
61+
-1 => return Err(errno::Error::last()),
62+
ps => ps as usize,
63+
};
64+
65+
let offset = page_size * KVM_DIRTY_LOG_PAGE_OFFSET as usize;
66+
67+
if bytes % std::mem::size_of::<kvm_dirty_gfn>() != 0 {
68+
// Size of dirty ring in bytes must be multiples of slot size
69+
return Err(errno::Error::new(libc::EINVAL));
70+
}
71+
let slots = bytes / std::mem::size_of::<kvm_dirty_gfn>();
72+
if !slots.is_power_of_two() {
73+
// Number of slots must be power of two
74+
return Err(errno::Error::new(libc::EINVAL));
75+
}
76+
77+
// SAFETY: KVM guarantees that there is a page at offset
78+
// KVM_DIRTY_LOG_PAGE_OFFSET * PAGE_SIZE if the appropriate
79+
// capability is available. If it is not, the call will simply
80+
// fail.
81+
let gfns = unsafe {
82+
NonNull::<kvm_dirty_gfn>::new(libc::mmap(
83+
null_mut(),
84+
bytes,
85+
libc::PROT_READ | libc::PROT_WRITE,
86+
libc::MAP_SHARED,
87+
fd.as_raw_fd(),
88+
offset as i64,
89+
) as *mut kvm_dirty_gfn)
90+
.filter(|addr| addr.as_ptr() != libc::MAP_FAILED as *mut kvm_dirty_gfn)
91+
.ok_or_else(errno::Error::last)?
92+
};
93+
Ok(Self {
94+
next_dirty: 0,
95+
gfns,
96+
mask: (slots - 1) as u64,
97+
use_acq_rel,
98+
})
99+
}
100+
}
101+
102+
impl Drop for KvmDirtyLogRing {
103+
fn drop(&mut self) {
104+
// SAFETY: This is safe because we mmap the page ourselves, and nobody
105+
// else is holding a reference to it.
106+
unsafe {
107+
libc::munmap(
108+
self.gfns.as_ptr().cast(),
109+
(self.mask + 1) as usize * std::mem::size_of::<kvm_dirty_gfn>(),
110+
);
111+
}
112+
}
113+
}
114+
115+
impl Iterator for KvmDirtyLogRing {
116+
type Item = (u32, u64);
117+
fn next(&mut self) -> Option<Self::Item> {
118+
let i = self.next_dirty & self.mask;
119+
// SAFETY: i is not larger than mask, thus is a valid offset into self.gfns,
120+
// therefore this operation produces a valid pointer to a kvm_dirty_gfn
121+
let gfn_ptr = unsafe { self.gfns.add(i as usize).as_ptr() };
122+
123+
if self.use_acq_rel {
124+
fence(Ordering::Acquire);
125+
}
126+
127+
// SAFETY: Can read a valid pointer to a kvm_dirty_gfn
128+
let gfn = unsafe { gfn_ptr.read_volatile() };
129+
130+
if gfn.flags & KVM_DIRTY_GFN_F_DIRTY == 0 {
131+
// next_dirty stays the same, it will become the next dirty element
132+
None
133+
} else {
134+
self.next_dirty += 1;
135+
let mut updated_gfn = gfn;
136+
updated_gfn.flags ^= KVM_DIRTY_GFN_F_RESET;
137+
// SAFETY: Can write to a valid pointer to a kvm_dirty_gfn
138+
unsafe {
139+
gfn_ptr.write_volatile(updated_gfn);
140+
};
141+
if self.use_acq_rel {
142+
fence(Ordering::Release);
143+
}
144+
Some((gfn.slot, gfn.offset))
145+
}
146+
}
147+
}
148+
32149
/// A wrapper around the coalesced MMIO ring page.
33150
#[derive(Debug)]
34151
pub(crate) struct KvmCoalescedIoRing {

0 commit comments

Comments
 (0)