1#[cfg(all(test, not(all(miri, windows))))]
12mod tests;
13
14use parking_lot::Mutex;
15use std::fs::{File, OpenOptions};
16use std::mem::MaybeUninit;
17use std::path::Path;
18use std::{io, mem, slice};
19
20pub const DISK_PAGE_SIZE: usize = 4096;
22const MAX_READ_SIZE: usize = 1024 * 1024;
24
25const _: () = {
26 assert!(MAX_READ_SIZE.is_multiple_of(AlignedPage::SIZE));
27};
28
29#[derive(Debug, Copy, Clone)]
32#[repr(C, align(4096))]
33pub struct AlignedPage([u8; AlignedPage::SIZE]);
34
35const _: () = {
36 assert!(align_of::<AlignedPage>() == AlignedPage::SIZE);
37};
38
39impl Default for AlignedPage {
40 #[inline(always)]
41 fn default() -> Self {
42 Self([0; AlignedPage::SIZE])
43 }
44}
45
46impl AlignedPage {
47 pub const SIZE: usize = 4096;
50
51 pub fn as_uninit_slice_mut(value: &mut [Self]) -> &mut [MaybeUninit<Self>] {
53 unsafe { mem::transmute(value) }
55 }
56
57 #[inline(always)]
59 pub fn slice_to_repr(value: &[Self]) -> &[[u8; AlignedPage::SIZE]] {
60 unsafe { mem::transmute(value) }
62 }
63
64 #[inline(always)]
66 pub fn uninit_slice_to_repr(
67 value: &[MaybeUninit<Self>],
68 ) -> &[MaybeUninit<[u8; AlignedPage::SIZE]>] {
69 unsafe { mem::transmute(value) }
71 }
72
73 #[inline]
77 pub fn try_slice_from_repr(value: &[[u8; AlignedPage::SIZE]]) -> Option<&[Self]> {
78 let (before, slice, after) = unsafe { value.align_to::<Self>() };
80
81 if before.is_empty() && after.is_empty() {
82 Some(slice)
83 } else {
84 None
85 }
86 }
87
88 #[inline]
92 pub fn try_uninit_slice_from_repr(
93 value: &[MaybeUninit<[u8; AlignedPage::SIZE]>],
94 ) -> Option<&[MaybeUninit<Self>]> {
95 let (before, slice, after) = unsafe { value.align_to::<MaybeUninit<Self>>() };
97
98 if before.is_empty() && after.is_empty() {
99 Some(slice)
100 } else {
101 None
102 }
103 }
104
105 #[inline(always)]
108 pub fn slice_mut_to_repr(slice: &mut [Self]) -> &mut [[u8; AlignedPage::SIZE]] {
109 unsafe { mem::transmute(slice) }
111 }
112
113 #[inline(always)]
116 pub fn uninit_slice_mut_to_repr(
117 slice: &mut [MaybeUninit<Self>],
118 ) -> &mut [MaybeUninit<[u8; AlignedPage::SIZE]>] {
119 unsafe { mem::transmute(slice) }
121 }
122
123 #[inline]
127 pub fn try_slice_mut_from_repr(value: &mut [[u8; AlignedPage::SIZE]]) -> Option<&mut [Self]> {
128 let (before, slice, after) = unsafe { value.align_to_mut::<Self>() };
130
131 if before.is_empty() && after.is_empty() {
132 Some(slice)
133 } else {
134 None
135 }
136 }
137
138 #[inline]
142 pub fn try_uninit_slice_mut_from_repr(
143 value: &mut [MaybeUninit<[u8; AlignedPage::SIZE]>],
144 ) -> Option<&mut [MaybeUninit<Self>]> {
145 let (before, slice, after) = unsafe { value.align_to_mut::<MaybeUninit<Self>>() };
147
148 if before.is_empty() && after.is_empty() {
149 Some(slice)
150 } else {
151 None
152 }
153 }
154}
155
156#[derive(Debug)]
165pub struct DirectIoFile {
166 file: File,
167 scratch_buffer: Mutex<Vec<AlignedPage>>,
169}
170
171impl DirectIoFile {
172 #[inline]
180 pub fn open<P>(
181 #[cfg(any(target_os = "linux", windows))] mut options: OpenOptions,
182 #[cfg(not(any(target_os = "linux", windows)))] options: OpenOptions,
183 path: P,
184 ) -> io::Result<Self>
185 where
186 P: AsRef<Path>,
187 {
188 #[cfg(target_os = "linux")]
190 if !cfg!(miri) {
192 use std::os::unix::fs::OpenOptionsExt;
193
194 options.custom_flags(libc::O_DIRECT);
195 }
196 #[cfg(windows)]
198 if !cfg!(miri) {
200 use std::os::windows::fs::OpenOptionsExt;
201
202 options.custom_flags(
203 windows::Win32::Storage::FileSystem::FILE_FLAG_WRITE_THROUGH.0
204 | windows::Win32::Storage::FileSystem::FILE_FLAG_NO_BUFFERING.0,
205 );
206 }
207 let file = options.open(path)?;
208
209 #[cfg(target_os = "macos")]
211 if !cfg!(miri) {
213 use std::os::unix::io::AsRawFd;
214
215 if unsafe { libc::fcntl(file.as_raw_fd(), libc::F_NOCACHE, 1) } != 0 {
217 return Err(io::Error::last_os_error());
218 }
219 }
220
221 Ok(Self {
222 file,
223 scratch_buffer: Mutex::new(vec![
225 AlignedPage::default();
226 MAX_READ_SIZE / AlignedPage::SIZE
227 ]),
228 })
229 }
230
231 #[inline]
233 pub fn len(&self) -> io::Result<u64> {
234 Ok(self.file.metadata()?.len())
235 }
236
237 #[inline]
239 pub fn is_empty(&self) -> io::Result<bool> {
240 Ok(self.len()? == 0)
241 }
242
243 #[inline(always)]
247 pub fn allocate(&self, len: u64) -> io::Result<()> {
248 fs2::FileExt::allocate(&self.file, len)
249 }
250
251 #[inline(always)]
258 pub fn set_len(&self, len: u64) -> io::Result<()> {
259 self.file.set_len(len)
260 }
261
262 pub fn read_exact_at(&self, buf: &mut [u8], mut offset: u64) -> io::Result<()> {
267 if buf.is_empty() {
268 return Ok(());
269 }
270
271 let mut scratch_buffer = self.scratch_buffer.lock();
272
273 debug_assert!(
275 AlignedPage::slice_to_repr(&scratch_buffer)
276 .as_flattened()
277 .len()
278 <= MAX_READ_SIZE
279 );
280
281 let padding = (offset % AlignedPage::SIZE as u64) as usize;
283 let first_unaligned_chunk_size = (MAX_READ_SIZE - padding).min(buf.len());
284 let (unaligned_start, buf) = buf.split_at_mut(first_unaligned_chunk_size);
285 {
286 let bytes_to_read = unaligned_start.len();
287 unaligned_start.copy_from_slice(self.read_exact_at_internal(
288 &mut scratch_buffer,
289 bytes_to_read,
290 offset,
291 )?);
292 offset += unaligned_start.len() as u64;
293 }
294
295 if buf.is_empty() {
296 return Ok(());
297 }
298
299 for buf in buf.chunks_mut(MAX_READ_SIZE) {
301 let bytes_to_read = buf.len();
302 buf.copy_from_slice(self.read_exact_at_internal(
303 &mut scratch_buffer,
304 bytes_to_read,
305 offset,
306 )?);
307 offset += buf.len() as u64;
308 }
309
310 Ok(())
311 }
312
313 pub fn write_all_at(&self, buf: &[u8], mut offset: u64) -> io::Result<()> {
318 if buf.is_empty() {
319 return Ok(());
320 }
321
322 let mut scratch_buffer = self.scratch_buffer.lock();
323
324 debug_assert!(
326 AlignedPage::slice_to_repr(&scratch_buffer)
327 .as_flattened()
328 .len()
329 <= MAX_READ_SIZE
330 );
331
332 let padding = (offset % AlignedPage::SIZE as u64) as usize;
334 let first_unaligned_chunk_size = (MAX_READ_SIZE - padding).min(buf.len());
335 let (unaligned_start, buf) = buf.split_at(first_unaligned_chunk_size);
336 {
337 self.write_all_at_internal(&mut scratch_buffer, unaligned_start, offset)?;
338 offset += unaligned_start.len() as u64;
339 }
340
341 if buf.is_empty() {
342 return Ok(());
343 }
344
345 for buf in buf.chunks(MAX_READ_SIZE) {
347 self.write_all_at_internal(&mut scratch_buffer, buf, offset)?;
348 offset += buf.len() as u64;
349 }
350
351 Ok(())
352 }
353
354 #[inline]
361 pub fn read_exact_at_raw(
362 &self,
363 buf: &mut [MaybeUninit<AlignedPage>],
364 offset: u64,
365 ) -> io::Result<()> {
366 let buf = AlignedPage::uninit_slice_mut_to_repr(buf);
367
368 let buf = unsafe {
372 slice::from_raw_parts_mut(
373 buf.as_mut_ptr().cast::<[u8; AlignedPage::SIZE]>(),
374 buf.len(),
375 )
376 };
377
378 let buf = buf.as_flattened_mut();
379
380 #[cfg(unix)]
381 {
382 use std::os::unix::fs::FileExt;
383
384 self.file.read_exact_at(buf, offset)
385 }
386 #[cfg(windows)]
387 {
388 use std::os::windows::fs::FileExt;
389
390 let mut buf = buf;
391 let mut offset = offset;
392 while !buf.is_empty() {
393 match self.file.seek_read(buf, offset) {
394 Ok(0) => {
395 break;
396 }
397 Ok(n) => {
398 buf = &mut buf[n..];
399 offset += n as u64;
400 }
401 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
402 }
404 Err(e) => {
405 return Err(e);
406 }
407 }
408 }
409
410 if !buf.is_empty() {
411 Err(io::Error::new(
412 io::ErrorKind::UnexpectedEof,
413 "failed to fill the whole buffer",
414 ))
415 } else {
416 Ok(())
417 }
418 }
419 }
420
421 #[inline]
426 pub fn write_all_at_raw(&self, buf: &[AlignedPage], offset: u64) -> io::Result<()> {
427 let buf = AlignedPage::slice_to_repr(buf).as_flattened();
428
429 #[cfg(unix)]
430 {
431 use std::os::unix::fs::FileExt;
432
433 self.file.write_all_at(buf, offset)
434 }
435 #[cfg(windows)]
436 {
437 use std::os::windows::fs::FileExt;
438
439 let mut buf = buf;
440 let mut offset = offset;
441 while !buf.is_empty() {
442 match self.file.seek_write(buf, offset) {
443 Ok(0) => {
444 return Err(io::Error::new(
445 io::ErrorKind::WriteZero,
446 "failed to write the whole buffer",
447 ));
448 }
449 Ok(n) => {
450 buf = &buf[n..];
451 offset += n as u64;
452 }
453 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
454 }
456 Err(e) => {
457 return Err(e);
458 }
459 }
460 }
461
462 Ok(())
463 }
464 }
465
466 #[inline(always)]
468 pub fn file(&self) -> &File {
469 &self.file
470 }
471
472 fn read_exact_at_internal<'a>(
473 &self,
474 scratch_buffer: &'a mut [AlignedPage],
475 bytes_to_read: usize,
476 offset: u64,
477 ) -> io::Result<&'a [u8]> {
478 let page_aligned_offset = offset / AlignedPage::SIZE as u64 * AlignedPage::SIZE as u64;
479 let padding = (offset - page_aligned_offset) as usize;
480
481 let pages_to_read = (padding + bytes_to_read).div_ceil(AlignedPage::SIZE);
484 let scratch_buffer = &mut scratch_buffer[..pages_to_read];
485
486 self.read_exact_at_raw(
487 AlignedPage::as_uninit_slice_mut(scratch_buffer),
488 page_aligned_offset,
489 )?;
490
491 Ok(&AlignedPage::slice_to_repr(scratch_buffer).as_flattened()[padding..][..bytes_to_read])
492 }
493
494 fn write_all_at_internal(
496 &self,
497 scratch_buffer: &mut [AlignedPage],
498 bytes_to_write: &[u8],
499 offset: u64,
500 ) -> io::Result<()> {
501 let page_aligned_offset = offset / AlignedPage::SIZE as u64 * AlignedPage::SIZE as u64;
502 let padding = (offset - page_aligned_offset) as usize;
503
504 let pages_to_read = (padding + bytes_to_write.len()).div_ceil(AlignedPage::SIZE);
506
507 if padding == 0 && pages_to_read == bytes_to_write.len() {
508 let scratch_buffer = &mut scratch_buffer[..pages_to_read];
509 AlignedPage::slice_mut_to_repr(scratch_buffer)
510 .as_flattened_mut()
511 .copy_from_slice(bytes_to_write);
512 self.write_all_at_raw(scratch_buffer, offset)?;
513 } else {
514 let scratch_buffer = &mut scratch_buffer[..pages_to_read];
515 self.read_exact_at_raw(
517 AlignedPage::as_uninit_slice_mut(scratch_buffer),
518 page_aligned_offset,
519 )?;
520 AlignedPage::slice_mut_to_repr(scratch_buffer).as_flattened_mut()[padding..]
522 [..bytes_to_write.len()]
523 .copy_from_slice(bytes_to_write);
524 self.write_all_at_raw(scratch_buffer, page_aligned_offset)?;
525 }
526
527 Ok(())
528 }
529}