1#![feature(const_block_items)]
11
12#[cfg(all(test, not(all(miri, windows))))]
14mod tests;
15
16use parking_lot::Mutex;
17use std::fs::{File, OpenOptions};
18use std::mem::MaybeUninit;
19use std::path::Path;
20use std::{io, mem, slice};
21
22pub const DISK_PAGE_SIZE: usize = 4096;
24const MAX_READ_SIZE: usize = 1024 * 1024;
26
27const {
28 assert!(MAX_READ_SIZE.is_multiple_of(AlignedPage::SIZE));
29}
30
31#[derive(Debug, Copy, Clone)]
34#[repr(C, align(4096))]
35pub struct AlignedPage([u8; AlignedPage::SIZE]);
36
37const {
38 assert!(align_of::<AlignedPage>() == AlignedPage::SIZE);
39}
40
41impl Default for AlignedPage {
42 #[inline(always)]
43 fn default() -> Self {
44 Self([0; AlignedPage::SIZE])
45 }
46}
47
48impl AlignedPage {
49 pub const SIZE: usize = 4096;
52
53 pub fn as_uninit_slice_mut(value: &mut [Self]) -> &mut [MaybeUninit<Self>] {
55 unsafe { mem::transmute(value) }
57 }
58
59 #[inline(always)]
61 pub fn slice_to_repr(value: &[Self]) -> &[[u8; AlignedPage::SIZE]] {
62 unsafe { mem::transmute(value) }
64 }
65
66 #[inline(always)]
68 pub fn uninit_slice_to_repr(
69 value: &[MaybeUninit<Self>],
70 ) -> &[MaybeUninit<[u8; AlignedPage::SIZE]>] {
71 unsafe { mem::transmute(value) }
73 }
74
75 #[inline]
79 pub fn try_slice_from_repr(value: &[[u8; AlignedPage::SIZE]]) -> Option<&[Self]> {
80 let (before, slice, after) = unsafe { value.align_to::<Self>() };
82
83 if before.is_empty() && after.is_empty() {
84 Some(slice)
85 } else {
86 None
87 }
88 }
89
90 #[inline]
94 pub fn try_uninit_slice_from_repr(
95 value: &[MaybeUninit<[u8; AlignedPage::SIZE]>],
96 ) -> Option<&[MaybeUninit<Self>]> {
97 let (before, slice, after) = unsafe { value.align_to::<MaybeUninit<Self>>() };
99
100 if before.is_empty() && after.is_empty() {
101 Some(slice)
102 } else {
103 None
104 }
105 }
106
107 #[inline(always)]
110 pub fn slice_mut_to_repr(slice: &mut [Self]) -> &mut [[u8; AlignedPage::SIZE]] {
111 unsafe { mem::transmute(slice) }
113 }
114
115 #[inline(always)]
118 pub fn uninit_slice_mut_to_repr(
119 slice: &mut [MaybeUninit<Self>],
120 ) -> &mut [MaybeUninit<[u8; AlignedPage::SIZE]>] {
121 unsafe { mem::transmute(slice) }
123 }
124
125 #[inline]
129 pub fn try_slice_mut_from_repr(value: &mut [[u8; AlignedPage::SIZE]]) -> Option<&mut [Self]> {
130 let (before, slice, after) = unsafe { value.align_to_mut::<Self>() };
132
133 if before.is_empty() && after.is_empty() {
134 Some(slice)
135 } else {
136 None
137 }
138 }
139
140 #[inline]
144 pub fn try_uninit_slice_mut_from_repr(
145 value: &mut [MaybeUninit<[u8; AlignedPage::SIZE]>],
146 ) -> Option<&mut [MaybeUninit<Self>]> {
147 let (before, slice, after) = unsafe { value.align_to_mut::<MaybeUninit<Self>>() };
149
150 if before.is_empty() && after.is_empty() {
151 Some(slice)
152 } else {
153 None
154 }
155 }
156}
157
158#[derive(Debug)]
167pub struct DirectIoFile {
168 file: File,
169 scratch_buffer: Mutex<Vec<AlignedPage>>,
171}
172
173impl DirectIoFile {
174 #[inline]
182 pub fn open<P>(
183 #[cfg(any(target_os = "linux", windows))] mut options: OpenOptions,
184 #[cfg(not(any(target_os = "linux", windows)))] options: OpenOptions,
185 path: P,
186 ) -> io::Result<Self>
187 where
188 P: AsRef<Path>,
189 {
190 #[cfg(target_os = "linux")]
192 if !cfg!(miri) {
194 use std::os::unix::fs::OpenOptionsExt;
195
196 options.custom_flags(libc::O_DIRECT);
197 }
198 #[cfg(windows)]
200 if !cfg!(miri) {
202 use std::os::windows::fs::OpenOptionsExt;
203
204 options.custom_flags(
205 windows::Win32::Storage::FileSystem::FILE_FLAG_WRITE_THROUGH.0
206 | windows::Win32::Storage::FileSystem::FILE_FLAG_NO_BUFFERING.0,
207 );
208 }
209 let file = options.open(path)?;
210
211 #[cfg(target_os = "macos")]
213 if !cfg!(miri) {
215 use std::os::unix::io::AsRawFd;
216
217 if unsafe { libc::fcntl(file.as_raw_fd(), libc::F_NOCACHE, 1) } != 0 {
219 return Err(io::Error::last_os_error());
220 }
221 }
222
223 Ok(Self {
224 file,
225 scratch_buffer: Mutex::new(vec![
227 AlignedPage::default();
228 MAX_READ_SIZE / AlignedPage::SIZE
229 ]),
230 })
231 }
232
233 #[inline]
235 pub fn len(&self) -> io::Result<u64> {
236 Ok(self.file.metadata()?.len())
237 }
238
239 #[inline]
241 pub fn is_empty(&self) -> io::Result<bool> {
242 Ok(self.len()? == 0)
243 }
244
245 #[inline(always)]
249 pub fn allocate(&self, len: u64) -> io::Result<()> {
250 fs2::FileExt::allocate(&self.file, len)
251 }
252
253 #[inline(always)]
260 pub fn set_len(&self, len: u64) -> io::Result<()> {
261 self.file.set_len(len)
262 }
263
264 pub fn read_exact_at(&self, buf: &mut [u8], mut offset: u64) -> io::Result<()> {
269 if buf.is_empty() {
270 return Ok(());
271 }
272
273 let mut scratch_buffer = self.scratch_buffer.lock();
274
275 debug_assert!(
277 AlignedPage::slice_to_repr(&scratch_buffer)
278 .as_flattened()
279 .len()
280 <= MAX_READ_SIZE
281 );
282
283 let padding = (offset % AlignedPage::SIZE as u64) as usize;
285 let first_unaligned_chunk_size = (MAX_READ_SIZE - padding).min(buf.len());
286 let (unaligned_start, buf) = buf.split_at_mut(first_unaligned_chunk_size);
287 {
288 let bytes_to_read = unaligned_start.len();
289 unaligned_start.copy_from_slice(self.read_exact_at_internal(
290 &mut scratch_buffer,
291 bytes_to_read,
292 offset,
293 )?);
294 offset += unaligned_start.len() as u64;
295 }
296
297 if buf.is_empty() {
298 return Ok(());
299 }
300
301 for buf in buf.chunks_mut(MAX_READ_SIZE) {
303 let bytes_to_read = buf.len();
304 buf.copy_from_slice(self.read_exact_at_internal(
305 &mut scratch_buffer,
306 bytes_to_read,
307 offset,
308 )?);
309 offset += buf.len() as u64;
310 }
311
312 Ok(())
313 }
314
315 pub fn write_all_at(&self, buf: &[u8], mut offset: u64) -> io::Result<()> {
320 if buf.is_empty() {
321 return Ok(());
322 }
323
324 let mut scratch_buffer = self.scratch_buffer.lock();
325
326 debug_assert!(
328 AlignedPage::slice_to_repr(&scratch_buffer)
329 .as_flattened()
330 .len()
331 <= MAX_READ_SIZE
332 );
333
334 let padding = (offset % AlignedPage::SIZE as u64) as usize;
336 let first_unaligned_chunk_size = (MAX_READ_SIZE - padding).min(buf.len());
337 let (unaligned_start, buf) = buf.split_at(first_unaligned_chunk_size);
338 {
339 self.write_all_at_internal(&mut scratch_buffer, unaligned_start, offset)?;
340 offset += unaligned_start.len() as u64;
341 }
342
343 if buf.is_empty() {
344 return Ok(());
345 }
346
347 for buf in buf.chunks(MAX_READ_SIZE) {
349 self.write_all_at_internal(&mut scratch_buffer, buf, offset)?;
350 offset += buf.len() as u64;
351 }
352
353 Ok(())
354 }
355
356 #[inline]
363 pub fn read_exact_at_raw(
364 &self,
365 buf: &mut [MaybeUninit<AlignedPage>],
366 offset: u64,
367 ) -> io::Result<()> {
368 let buf = AlignedPage::uninit_slice_mut_to_repr(buf);
369
370 let buf = unsafe {
374 slice::from_raw_parts_mut(
375 buf.as_mut_ptr().cast::<[u8; AlignedPage::SIZE]>(),
376 buf.len(),
377 )
378 };
379
380 let buf = buf.as_flattened_mut();
381
382 #[cfg(unix)]
383 {
384 use std::os::unix::fs::FileExt;
385
386 self.file.read_exact_at(buf, offset)
387 }
388 #[cfg(windows)]
389 {
390 use std::os::windows::fs::FileExt;
391
392 let mut buf = buf;
393 let mut offset = offset;
394 while !buf.is_empty() {
395 match self.file.seek_read(buf, offset) {
396 Ok(0) => {
397 break;
398 }
399 Ok(n) => {
400 buf = &mut buf[n..];
401 offset += n as u64;
402 }
403 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
404 }
406 Err(e) => {
407 return Err(e);
408 }
409 }
410 }
411
412 if !buf.is_empty() {
413 Err(io::Error::new(
414 io::ErrorKind::UnexpectedEof,
415 "failed to fill the whole buffer",
416 ))
417 } else {
418 Ok(())
419 }
420 }
421 }
422
423 #[inline]
428 pub fn write_all_at_raw(&self, buf: &[AlignedPage], offset: u64) -> io::Result<()> {
429 let buf = AlignedPage::slice_to_repr(buf).as_flattened();
430
431 #[cfg(unix)]
432 {
433 use std::os::unix::fs::FileExt;
434
435 self.file.write_all_at(buf, offset)
436 }
437 #[cfg(windows)]
438 {
439 use std::os::windows::fs::FileExt;
440
441 let mut buf = buf;
442 let mut offset = offset;
443 while !buf.is_empty() {
444 match self.file.seek_write(buf, offset) {
445 Ok(0) => {
446 return Err(io::Error::new(
447 io::ErrorKind::WriteZero,
448 "failed to write the whole buffer",
449 ));
450 }
451 Ok(n) => {
452 buf = &buf[n..];
453 offset += n as u64;
454 }
455 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {
456 }
458 Err(e) => {
459 return Err(e);
460 }
461 }
462 }
463
464 Ok(())
465 }
466 }
467
468 #[inline(always)]
470 pub fn file(&self) -> &File {
471 &self.file
472 }
473
474 fn read_exact_at_internal<'a>(
475 &self,
476 scratch_buffer: &'a mut [AlignedPage],
477 bytes_to_read: usize,
478 offset: u64,
479 ) -> io::Result<&'a [u8]> {
480 let page_aligned_offset = offset / AlignedPage::SIZE as u64 * AlignedPage::SIZE as u64;
481 let padding = (offset - page_aligned_offset) as usize;
482
483 let pages_to_read = (padding + bytes_to_read).div_ceil(AlignedPage::SIZE);
486 let scratch_buffer = &mut scratch_buffer[..pages_to_read];
487
488 self.read_exact_at_raw(
489 AlignedPage::as_uninit_slice_mut(scratch_buffer),
490 page_aligned_offset,
491 )?;
492
493 Ok(&AlignedPage::slice_to_repr(scratch_buffer).as_flattened()[padding..][..bytes_to_read])
494 }
495
496 fn write_all_at_internal(
498 &self,
499 scratch_buffer: &mut [AlignedPage],
500 bytes_to_write: &[u8],
501 offset: u64,
502 ) -> io::Result<()> {
503 let page_aligned_offset = offset / AlignedPage::SIZE as u64 * AlignedPage::SIZE as u64;
504 let padding = (offset - page_aligned_offset) as usize;
505
506 let pages_to_read = (padding + bytes_to_write.len()).div_ceil(AlignedPage::SIZE);
508
509 if padding == 0 && pages_to_read == bytes_to_write.len() {
510 let scratch_buffer = &mut scratch_buffer[..pages_to_read];
511 AlignedPage::slice_mut_to_repr(scratch_buffer)
512 .as_flattened_mut()
513 .copy_from_slice(bytes_to_write);
514 self.write_all_at_raw(scratch_buffer, offset)?;
515 } else {
516 let scratch_buffer = &mut scratch_buffer[..pages_to_read];
517 self.read_exact_at_raw(
519 AlignedPage::as_uninit_slice_mut(scratch_buffer),
520 page_aligned_offset,
521 )?;
522 AlignedPage::slice_mut_to_repr(scratch_buffer).as_flattened_mut()[padding..]
524 [..bytes_to_write.len()]
525 .copy_from_slice(bytes_to_write);
526 self.write_all_at_raw(scratch_buffer, page_aligned_offset)?;
527 }
528
529 Ok(())
530 }
531}