Skip to content

Commit e2820d2

Browse files
authored
Merge pull request #236 from artichoke/lopopolo/intern-stacked-borrows-miri-gh-235
Address stacked borrows violation in `SymbolTable::intern`
2 parents 6b7568c + 3a31e98 commit e2820d2

File tree

11 files changed

+362
-33
lines changed

11 files changed

+362
-33
lines changed

src/bytes.rs

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
//! In general, one should expect this crate's performance on `&[u8]` to be
5151
//! roughly similar to performance on `&str`.
5252
53-
use core::convert::TryInto;
5453
use core::hash::BuildHasher;
5554
use core::iter::{FromIterator, FusedIterator, Zip};
5655
use core::marker::PhantomData;
@@ -692,23 +691,66 @@ where
692691
if let Some(&id) = self.map.get(&*contents) {
693692
return Ok(id);
694693
}
694+
695+
// The `Interned::Owned` variant is derived from a `Box<T>`. When such
696+
// a structure is moved or assigned, as it is below in the call to
697+
// `self.vec.push`, the allocation is "retagged" in Miri/stacked borrows.
698+
//
699+
// Retagging an allocation pops all of the borrows derived from it off
700+
// of the stack. This means we need to move the `Interned` into the
701+
// `Vec` before calling `Interned::as_static_slice` to ensure the
702+
// reference does not get invalidated by retagging.
703+
//
704+
// However, that alone may be insufficient as the `Interened` may be
705+
// moved when the symbol table grows.
706+
//
707+
// The `SymbolTable` API prevents shared references to the `Interned`
708+
// being invalidated by a retag by tying resolved symbol contents,
709+
// `&'a T`, to `&'a SymbolTable`, which means the `SymbolTable` cannot
710+
// grow, shrink, or otherwise reallocate/move contents while a reference
711+
// to the `Interned`'s inner `T` is alive.
712+
//
713+
// To protect against future updates to stacked borrows or the unsafe
714+
// code operational semantics, we can address this additional invariant
715+
// with updated `Interned` internals which store the `Box<T>` in a raw
716+
// pointer form, which allows moves to be treated as untyped copies.
717+
//
718+
// See:
719+
//
720+
// - <https://github.com/artichoke/intaglio/issues/235>
721+
// - <https://github.com/artichoke/intaglio/pull/236>
695722
let name = Interned::from(contents);
696-
let id = self.map.len().try_into()?;
723+
let id = Symbol::try_from(self.map.len())?;
724+
725+
// Move the `Interned` into the `Vec`, causing it to be retagged under
726+
// stacked borrows, before taking any references to its inner `T`.
727+
self.vec.push(name);
728+
// Ensure we grow the map before we take any shared references to the
729+
// inner `T`.
730+
self.map.reserve(1);
731+
732+
// SAFETY: `self.vec` is non-empty because the preceding line of code
733+
// pushed an entry into it.
734+
let name = unsafe { self.vec.last().unwrap_unchecked() };
735+
697736
// SAFETY: This expression creates a reference with a `'static` lifetime
698737
// from an owned and interned buffer, which is permissible because:
699738
//
700739
// - `Interned` is an internal implementation detail of `SymbolTable`.
701740
// - `SymbolTable` never gives out `'static` references to underlying
702-
// byte string byte contents.
741+
// contents.
703742
// - All slice references given out by the `SymbolTable` have the same
704743
// lifetime as the `SymbolTable`.
705744
// - The `map` field of `SymbolTable`, which contains the `'static`
706745
// references, is dropped before the owned buffers stored in this
707746
// `Interned`.
747+
// - The shared reference may be derived from a `PinBox` which prevents
748+
// moves from retagging the underlying boxed `T` under stacked borrows.
749+
// - The symbol table cannot grow, shrink, or otherwise move its contents
750+
// while this reference is alive.
708751
let slice = unsafe { name.as_static_slice() };
709752

710753
self.map.insert(slice, id);
711-
self.vec.push(name);
712754

713755
debug_assert_eq!(self.get(id), Some(slice));
714756
debug_assert_eq!(self.intern(slice), Ok(id));

src/cstr.rs

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
//! [`CString`]: std::ffi::CString
5656
//! [`&CStr`]: std::ffi::CStr
5757
58-
use core::convert::TryInto;
5958
use core::hash::BuildHasher;
6059
use core::iter::{FromIterator, FusedIterator, Zip};
6160
use core::marker::PhantomData;
@@ -715,23 +714,66 @@ where
715714
if let Some(&id) = self.map.get(&*contents) {
716715
return Ok(id);
717716
}
717+
718+
// The `Interned::Owned` variant is derived from a `Box<T>`. When such
719+
// a structure is moved or assigned, as it is below in the call to
720+
// `self.vec.push`, the allocation is "retagged" in Miri/stacked borrows.
721+
//
722+
// Retagging an allocation pops all of the borrows derived from it off
723+
// of the stack. This means we need to move the `Interned` into the
724+
// `Vec` before calling `Interned::as_static_slice` to ensure the
725+
// reference does not get invalidated by retagging.
726+
//
727+
// However, that alone may be insufficient as the `Interened` may be
728+
// moved when the symbol table grows.
729+
//
730+
// The `SymbolTable` API prevents shared references to the `Interned`
731+
// being invalidated by a retag by tying resolved symbol contents,
732+
// `&'a T`, to `&'a SymbolTable`, which means the `SymbolTable` cannot
733+
// grow, shrink, or otherwise reallocate/move contents while a reference
734+
// to the `Interned`'s inner `T` is alive.
735+
//
736+
// To protect against future updates to stacked borrows or the unsafe
737+
// code operational semantics, we can address this additional invariant
738+
// with updated `Interned` internals which store the `Box<T>` in a raw
739+
// pointer form, which allows moves to be treated as untyped copies.
740+
//
741+
// See:
742+
//
743+
// - <https://github.com/artichoke/intaglio/issues/235>
744+
// - <https://github.com/artichoke/intaglio/pull/236>
718745
let name = Interned::from(contents);
719-
let id = self.map.len().try_into()?;
746+
let id = Symbol::try_from(self.map.len())?;
747+
748+
// Move the `Interned` into the `Vec`, causing it to be retagged under
749+
// stacked borrows, before taking any references to its inner `T`.
750+
self.vec.push(name);
751+
// Ensure we grow the map before we take any shared references to the
752+
// inner `T`.
753+
self.map.reserve(1);
754+
755+
// SAFETY: `self.vec` is non-empty because the preceding line of code
756+
// pushed an entry into it.
757+
let name = unsafe { self.vec.last().unwrap_unchecked() };
758+
720759
// SAFETY: This expression creates a reference with a `'static` lifetime
721760
// from an owned and interned buffer, which is permissible because:
722761
//
723762
// - `Interned` is an internal implementation detail of `SymbolTable`.
724763
// - `SymbolTable` never gives out `'static` references to underlying
725-
// C string byte contents.
764+
// contents.
726765
// - All slice references given out by the `SymbolTable` have the same
727766
// lifetime as the `SymbolTable`.
728767
// - The `map` field of `SymbolTable`, which contains the `'static`
729768
// references, is dropped before the owned buffers stored in this
730769
// `Interned`.
770+
// - The shared reference may be derived from a `PinBox` which prevents
771+
// moves from retagging the underlying boxed `T` under stacked borrows.
772+
// - The symbol table cannot grow, shrink, or otherwise move its contents
773+
// while this reference is alive.
731774
let slice = unsafe { name.as_static_slice() };
732775

733776
self.map.insert(slice, id);
734-
self.vec.push(name);
735777

736778
debug_assert_eq!(self.get(id), Some(slice));
737779
debug_assert_eq!(self.intern(slice), Ok(id));

src/internal.rs

Lines changed: 117 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use std::ffi::{OsStr, OsString};
1010
#[cfg(feature = "path")]
1111
use std::path::{Path, PathBuf};
1212

13+
use self::boxed::PinBox;
14+
1315
/// Wrapper around `&'static` slices that does not allow mutable access to the
1416
/// inner slice.
1517
pub struct Interned<T: 'static + ?Sized>(Slice<T>);
@@ -59,7 +61,7 @@ where
5961
{
6062
/// Return a reference to the inner slice.
6163
#[inline]
62-
pub const fn as_slice(&self) -> &T {
64+
pub fn as_slice(&self) -> &T {
6365
self.0.as_slice()
6466
}
6567

@@ -127,7 +129,7 @@ enum Slice<T: 'static + ?Sized> {
127129
/// True `'static` references.
128130
Static(&'static T),
129131
/// Owned `'static` references.
130-
Owned(Box<T>),
132+
Owned(PinBox<T>),
131133
}
132134

133135
impl<T> From<&'static T> for Slice<T>
@@ -143,7 +145,7 @@ where
143145
impl From<String> for Slice<str> {
144146
#[inline]
145147
fn from(owned: String) -> Self {
146-
Self::Owned(owned.into_boxed_str())
148+
Self::Owned(PinBox::new(owned.into_boxed_str()))
147149
}
148150
}
149151

@@ -161,7 +163,7 @@ impl From<Cow<'static, str>> for Slice<str> {
161163
impl From<Vec<u8>> for Slice<[u8]> {
162164
#[inline]
163165
fn from(owned: Vec<u8>) -> Self {
164-
Self::Owned(owned.into_boxed_slice())
166+
Self::Owned(PinBox::new(owned.into_boxed_slice()))
165167
}
166168
}
167169

@@ -180,7 +182,7 @@ impl From<Cow<'static, [u8]>> for Slice<[u8]> {
180182
impl From<CString> for Slice<CStr> {
181183
#[inline]
182184
fn from(owned: CString) -> Self {
183-
Self::Owned(owned.into_boxed_c_str())
185+
Self::Owned(PinBox::new(owned.into_boxed_c_str()))
184186
}
185187
}
186188

@@ -199,7 +201,7 @@ impl From<Cow<'static, CStr>> for Slice<CStr> {
199201
impl From<OsString> for Slice<OsStr> {
200202
#[inline]
201203
fn from(owned: OsString) -> Self {
202-
Self::Owned(owned.into_boxed_os_str())
204+
Self::Owned(PinBox::new(owned.into_boxed_os_str()))
203205
}
204206
}
205207

@@ -218,7 +220,7 @@ impl From<Cow<'static, OsStr>> for Slice<OsStr> {
218220
impl From<PathBuf> for Slice<Path> {
219221
#[inline]
220222
fn from(owned: PathBuf) -> Self {
221-
Self::Owned(owned.into_boxed_path())
223+
Self::Owned(PinBox::new(owned.into_boxed_path()))
222224
}
223225
}
224226

@@ -239,10 +241,13 @@ where
239241
{
240242
/// Return a reference to the inner slice.
241243
#[inline]
242-
const fn as_slice(&self) -> &T {
244+
fn as_slice(&self) -> &T {
243245
match self {
244246
Self::Static(slice) => slice,
245-
Self::Owned(owned) => owned,
247+
Self::Owned(owned) => {
248+
// SAFETY: `PinBox` acts like `Box`.
249+
unsafe { owned.as_ref() }
250+
}
246251
}
247252
}
248253

@@ -258,8 +263,6 @@ where
258263
match self {
259264
Self::Static(slice) => slice,
260265
Self::Owned(owned) => {
261-
// Coerce the `Box<T>` to a pointer.
262-
let ptr: *const T = &**owned;
263266
// SAFETY: This expression creates a reference with a `'static`
264267
// lifetime from an owned buffer, which is permissible because:
265268
//
@@ -270,9 +273,10 @@ where
270273
// - The `map` field of the various symbol tables which contains
271274
// the `'static` references, is dropped before the owned buffers
272275
// stored in this `Slice`.
276+
// - `PinBox` acts like `Box`.
273277
unsafe {
274278
// Coerce the pointer to a `&'static T`.
275-
&*ptr
279+
owned.as_ref()
276280
}
277281
}
278282
}
@@ -354,6 +358,107 @@ impl fmt::Debug for Slice<Path> {
354358
}
355359
}
356360

361+
/// An abstraction over a `Box<T>` where T is an unsized slice type which moves
362+
/// the box by raw pointer. This type is required to satisfy Miri with
363+
/// `-Zmiri-retag-fields`. See #235, #236.
364+
///
365+
/// The `PinBox` type is derived from:
366+
///
367+
/// - <https://github.com/CAD97/simple-interner/blob/24a836e9f8a0173faf48438d711442c2a86659c1/src/interner.rs#L26-L56>
368+
/// - <https://github.com/artichoke/intaglio/pull/236#issuecomment-1651058752>
369+
/// - <https://github.com/artichoke/intaglio/pull/236#issuecomment-1652003240>
370+
///
371+
/// This code is placed into the public domain by @CAD97:
372+
///
373+
/// - <https://github.com/artichoke/intaglio/pull/236#issuecomment-1652393974>
374+
mod boxed {
375+
use core::fmt;
376+
use core::marker::PhantomData;
377+
use core::ptr::NonNull;
378+
379+
/// A wrapper around box that does not provide &mut access to the pointee and
380+
/// uses raw-pointer borrowing rules to avoid invalidating extant references.
381+
///
382+
/// The resolved reference is guaranteed valid until the `PinBox` is dropped.
383+
///
384+
/// This type is meant to allow the owned data in the given `Box<T>` to be moved
385+
/// without being retagged by Miri. See #235, #236.
386+
pub(crate) struct PinBox<T: ?Sized> {
387+
ptr: NonNull<T>,
388+
_marker: PhantomData<Box<T>>,
389+
}
390+
391+
impl<T: ?Sized> PinBox<T> {
392+
#[inline]
393+
pub(crate) fn new(x: Box<T>) -> Self {
394+
let ptr = Box::into_raw(x);
395+
// SAFETY: `ptr` is derived from `Box::into_raw` and can never be null.
396+
let ptr = unsafe { NonNull::new_unchecked(ptr) };
397+
Self {
398+
ptr,
399+
_marker: PhantomData,
400+
}
401+
}
402+
403+
#[inline]
404+
pub(crate) unsafe fn as_ref<'a>(&self) -> &'a T {
405+
// SAFETY: `PinBox` acts like `Box`, `self.ptr` is non-null and points
406+
// to a live `Box`.
407+
unsafe { self.ptr.as_ref() }
408+
}
409+
}
410+
411+
impl<T: ?Sized> Drop for PinBox<T> {
412+
fn drop(&mut self) {
413+
// SAFETY: `PinBox` acts like `Box`.
414+
unsafe {
415+
drop(Box::from_raw(self.ptr.as_ptr()));
416+
}
417+
}
418+
}
419+
420+
impl<T: ?Sized + fmt::Debug> fmt::Debug for PinBox<T> {
421+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
422+
// SAFETY: `PinBox` acts like `Box`.
423+
let s = unsafe { self.as_ref() };
424+
s.fmt(f)
425+
}
426+
}
427+
428+
#[cfg(test)]
429+
mod tests {
430+
use core::fmt::Write;
431+
432+
use super::PinBox;
433+
434+
#[test]
435+
fn test_drop() {
436+
let x = "abc".to_string().into_boxed_str();
437+
let x = PinBox::new(x);
438+
drop(x);
439+
}
440+
441+
#[test]
442+
fn test_as_ref() {
443+
let x = "abc".to_string().into_boxed_str();
444+
let x = PinBox::new(x);
445+
446+
// SAFETY: `PinBox` acts like `Box` and contains a valid pointer.
447+
assert_eq!(unsafe { x.as_ref() }, "abc");
448+
}
449+
450+
#[test]
451+
fn test_debug_format() {
452+
let x = "abc".to_string().into_boxed_str();
453+
let x = PinBox::new(x);
454+
455+
let mut buf = String::new();
456+
write!(&mut buf, "{x:?}").unwrap();
457+
assert_eq!(buf, "\"abc\"");
458+
}
459+
}
460+
}
461+
357462
#[cfg(test)]
358463
mod tests {
359464
use core::fmt::Write;

0 commit comments

Comments
 (0)