Skip to content

Commit 0252fc9

Browse files
committed
explicitly distinguish pointer::addr and pointer::expose_addr
1 parent 168a020 commit 0252fc9

File tree

3 files changed

+217
-19
lines changed

3 files changed

+217
-19
lines changed

library/core/src/ptr/const_ptr.rs

+49-5
Original file line numberDiff line numberDiff line change
@@ -152,18 +152,27 @@ impl<T: ?Sized> *const T {
152152

153153
/// Gets the "address" portion of the pointer.
154154
///
155-
/// This is equivalent to `self as usize`, which semantically discards
156-
/// *provenance* and *address-space* information. To properly restore that information,
157-
/// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
155+
/// This is similar to `self as usize`, which semantically discards *provenance* and
156+
/// *address-space* information. However, unlike `self as usize`, casting the returned address
157+
/// back to a pointer yields [`invalid`][], which is undefined behavior to dereference. To
158+
/// properly restore the lost information and obtain a dereferencable pointer, use
159+
/// [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
160+
///
161+
/// If using those APIs is not possible because there is no way to preserve a pointer with the
162+
/// required provenance, use [`expose_addr`][pointer::expose_addr] and
163+
/// [`from_exposed_addr`][from_exposed_addr] instead. However, note that this makes
164+
/// your code less portable and less amenable to tools that check for compliance with the Rust
165+
/// memory model.
158166
///
159167
/// On most platforms this will produce a value with the same bytes as the original
160168
/// pointer, because all the bytes are dedicated to describing the address.
161169
/// Platforms which need to store additional information in the pointer may
162170
/// perform a change of representation to produce a value containing only the address
163171
/// portion of the pointer. What that means is up to the platform to define.
164172
///
165-
/// This API and its claimed semantics are part of the Strict Provenance experiment,
166-
/// see the [module documentation][crate::ptr] for details.
173+
/// This API and its claimed semantics are part of the Strict Provenance experiment, and as such
174+
/// might change in the future (including possibly weakening this so it becomes wholly
175+
/// equivalent to `self as usize`). See the [module documentation][crate::ptr] for details.
167176
#[must_use]
168177
#[inline]
169178
#[unstable(feature = "strict_provenance", issue = "95228")]
@@ -175,6 +184,41 @@ impl<T: ?Sized> *const T {
175184
self as usize
176185
}
177186

187+
/// Gets the "address" portion of the pointer, and 'exposes' the "provenance" part for future
188+
/// use in [`from_exposed_addr`][].
189+
///
190+
/// This is equivalent to `self as usize`, which semantically discards *provenance* and
191+
/// *address-space* information. Furthermore, this (like the `as` cast) has the implicit
192+
/// side-effect of marking the provenance as 'exposed', so on platforms that support it you can
193+
/// later call [`from_exposed_addr`][] to reconstitute the original pointer including its
194+
/// provenance. (Reconstructing address space information, if required, is your responsibility.)
195+
///
196+
/// Using this method means that code is *not* following Strict Provenance rules. Supporting
197+
/// [`from_exposed_addr`][] complicates specification and reasoning and may not be supported by
198+
/// tools that help you to stay conformant with the Rust memory model, so it is recommended to
199+
/// use [`addr`][pointer::addr] wherever possible.
200+
///
201+
/// On most platforms this will produce a value with the same bytes as the original pointer,
202+
/// because all the bytes are dedicated to describing the address. Platforms which need to store
203+
/// additional information in the pointer may not support this operation, since the 'expose'
204+
/// side-effect which is required for [`from_exposed_addr`][] to work is typically not
205+
/// available.
206+
///
207+
/// This API and its claimed semantics are part of the Strict Provenance experiment, see the
208+
/// [module documentation][crate::ptr] for details.
209+
///
210+
/// [`from_exposed_addr`]: from_exposed_addr
211+
#[must_use]
212+
#[inline]
213+
#[unstable(feature = "strict_provenance", issue = "95228")]
214+
pub fn expose_addr(self) -> usize
215+
where
216+
T: Sized,
217+
{
218+
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
219+
self as usize
220+
}
221+
178222
/// Creates a new pointer with the given address.
179223
///
180224
/// This performs the same operation as an `addr as ptr` cast, but copies

library/core/src/ptr/mod.rs

+119-9
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
//! interpretation of provenance. It's ok if your code doesn't strictly conform to it.**
7171
//!
7272
//! [Strict Provenance][] is an experimental set of APIs that help tools that try
73-
//! to validate the memory-safety of your program's execution. Notably this includes [miri][]
73+
//! to validate the memory-safety of your program's execution. Notably this includes [Miri][]
7474
//! and [CHERI][], which can detect when you access out of bounds memory or otherwise violate
7575
//! Rust's memory model.
7676
//!
@@ -136,7 +136,7 @@
136136
//!
137137
//! The strict provenance experiment is mostly only interested in exploring stricter *spatial*
138138
//! provenance. In this sense it can be thought of as a subset of the more ambitious and
139-
//! formal [Stacked Borrows][] research project, which is what tools like [miri][] are based on.
139+
//! formal [Stacked Borrows][] research project, which is what tools like [Miri][] are based on.
140140
//! In particular, Stacked Borrows is necessary to properly describe what borrows are allowed
141141
//! to do and when they become invalidated. This necessarily involves much more complex
142142
//! *temporal* reasoning than simply identifying allocations. Adjusting APIs and code
@@ -170,7 +170,8 @@
170170
//! Under Strict Provenance, a usize *cannot* accurately represent a pointer, and converting from
171171
//! a pointer to a usize is generally an operation which *only* extracts the address. It is
172172
//! therefore *impossible* to construct a valid pointer from a usize because there is no way
173-
//! to restore the address-space and provenance.
173+
//! to restore the address-space and provenance. In other words, pointer-integer-pointer
174+
//! roundtrips are not possible (in the sense that the resulting pointer is not dereferencable).
174175
//!
175176
//! The key insight to making this model *at all* viable is the [`with_addr`][] method:
176177
//!
@@ -194,10 +195,10 @@
194195
//! and then immediately converting back to a pointer. To make this use case more ergonomic,
195196
//! we provide the [`map_addr`][] method.
196197
//!
197-
//! To help make it clear that code is "following" Strict Provenance semantics, we also
198-
//! provide an [`addr`][] method which is currently equivalent to `ptr as usize`. In the
199-
//! future we may provide a lint for pointer<->integer casts to help you audit if your
200-
//! code conforms to strict provenance.
198+
//! To help make it clear that code is "following" Strict Provenance semantics, we also provide an
199+
//! [`addr`][] method which promises that the returned address is not part of a
200+
//! pointer-usize-pointer roundtrip. In the future we may provide a lint for pointer<->integer
201+
//! casts to help you audit if your code conforms to strict provenance.
201202
//!
202203
//!
203204
//! ## Using Strict Provenance
@@ -310,6 +311,41 @@
310311
//! For instance, ARM explicitly supports high-bit tagging, and so CHERI on ARM inherits
311312
//! that and should support it.
312313
//!
314+
//! ## Pointer-usize-pointer roundtrips and 'exposed' provenance
315+
//!
316+
//! **This section is *non-normative* and is part of the [Strict Provenance] experiment.**
317+
//!
318+
//! As discussed above, pointer-usize-pointer roundtrips are not possible under [Strict Provenance].
319+
//! However, there exists legacy Rust code that is full of such roundtrips, and legacy platform APIs
320+
//! regularly assume that `usize` can capture all the information that makes up a pointer. There
321+
//! also might be code that cannot be ported to Strict Provenance (which is something we would [like
322+
//! to hear about][Strict Provenance]).
323+
//!
324+
//! For situations like this, there is a fallback plan, a way to 'opt out' of Strict Provenance.
325+
//! However, note that this makes your code a lot harder to specify, and the code will not work
326+
//! (well) with tools like [Miri] and [CHERI].
327+
//!
328+
//! This fallback plan is provided by the [`expose_addr`] and [`from_exposed_addr`] methods (which
329+
//! are equivalent to `as` casts between pointers and integers). [`expose_addr`] is a lot like
330+
//! [`addr`], but additionally adds the provenance of the pointer to a global list of 'exposed'
331+
//! provenances. (This list is purely conceptual, it exists for the purpose of specifying Rust but
332+
//! is not materialized in actual executions, except in tools like [Miri].) [`from_exposed_addr`]
333+
//! can be used to construct a pointer with one of these previously 'exposed' provenances.
334+
//! [`from_exposed_addr`] takes only `addr: usize` as arguments, so unlike in [`with_addr`] there is
335+
//! no indication of what the correct provenance for the returned pointer is -- and that is exactly
336+
//! what makes pointer-usize-pointer roundtrips so tricky to rigorously specify! There is no
337+
//! algorithm that decides which provenance will be used. You can think of this as "guessing" the
338+
//! right provenance, and the guess will be "maximally in your favor", in the sense that if there is
339+
//! any way to avoid undefined behavior, then that is the guess that will be taken. However, if
340+
//! there is *no* previously 'exposed' provenance that justifies the way the returned pointer will
341+
//! be used, the program has undefined behavior.
342+
//!
343+
//! Using [`expose_addr`] or [`from_exposed_addr`] (or the equivalent `as` casts) means that code is
344+
//! *not* following Strict Provenance rules. The goal of the Strict Provenance experiment is to
345+
//! determine whether it is possible to use Rust without [`expose_addr`] and [`from_exposed_addr`].
346+
//! If this is successful, it would be a major win for avoiding specification complexity and to
347+
//! facilitate adoption of tools like [CHERI] and [Miri] that can be a big help in increasing the
348+
//! confidence in (unsafe) Rust code.
313349
//!
314350
//! [aliasing]: ../../nomicon/aliasing.html
315351
//! [book]: ../../book/ch19-01-unsafe-rust.html#dereferencing-a-raw-pointer
@@ -322,7 +358,9 @@
322358
//! [`map_addr`]: pointer::map_addr
323359
//! [`addr`]: pointer::addr
324360
//! [`ptr::invalid`]: core::ptr::invalid
325-
//! [miri]: https://github.com/rust-lang/miri
361+
//! [`expose_addr`]: pointer::expose_addr
362+
//! [`from_exposed_addr`]: from_exposed_addr
363+
//! [Miri]: https://github.com/rust-lang/miri
326364
//! [CHERI]: https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/
327365
//! [Strict Provenance]: https://github.com/rust-lang/rust/issues/95228
328366
//! [Stacked Borrows]: https://plv.mpi-sws.org/rustbelt/stacked-borrows/
@@ -547,6 +585,78 @@ pub const fn invalid_mut<T>(addr: usize) -> *mut T {
547585
addr as *mut T
548586
}
549587

588+
/// Convert an address back to a pointer, picking up a previously 'exposed' provenance.
589+
///
590+
/// This is equivalent to `addr as *const T`. The provenance of the returned pointer is that of *any*
591+
/// pointer that was previously passed to [`expose_addr`][pointer::expose_addr] or a `ptr as usize`
592+
/// cast. If there is no previously 'exposed' provenance that justifies the way this pointer will be
593+
/// used, the program has undefined behavior. Note that there is no algorithm that decides which
594+
/// provenance will be used. You can think of this as "guessing" the right provenance, and the guess
595+
/// will be "maximally in your favor", in the sense that if there is any way to avoid undefined
596+
/// behavior, then that is the guess that will be taken.
597+
///
598+
/// On platforms with multiple address spaces, it is your responsibility to ensure that the
599+
/// address makes sense in the address space that this pointer will be used with.
600+
///
601+
/// Using this method means that code is *not* following strict provenance rules. "Guessing" a
602+
/// suitable provenance complicates specification and reasoning and may not be supported by
603+
/// tools that help you to stay conformant with the Rust memory model, so it is recommended to
604+
/// use [`with_addr`][pointer::with_addr] wherever possible.
605+
///
606+
/// On most platforms this will produce a value with the same bytes as the address. Platforms
607+
/// which need to store additional information in a pointer may not support this operation,
608+
/// since it is generally not possible to actually *compute* which provenance the returned
609+
/// pointer has to pick up.
610+
///
611+
/// This API and its claimed semantics are part of the Strict Provenance experiment, see the
612+
/// [module documentation][crate::ptr] for details.
613+
#[must_use]
614+
#[inline]
615+
#[unstable(feature = "strict_provenance", issue = "95228")]
616+
pub fn from_exposed_addr<T>(addr: usize) -> *const T
617+
where
618+
T: Sized,
619+
{
620+
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
621+
addr as *const T
622+
}
623+
624+
/// Convert an address back to a mutable pointer, picking up a previously 'exposed' provenance.
625+
///
626+
/// This is equivalent to `addr as *mut T`. The provenance of the returned pointer is that of *any*
627+
/// pointer that was previously passed to [`expose_addr`][pointer::expose_addr] or a `ptr as usize`
628+
/// cast. If there is no previously 'exposed' provenance that justifies the way this pointer will be
629+
/// used, the program has undefined behavior. Note that there is no algorithm that decides which
630+
/// provenance will be used. You can think of this as "guessing" the right provenance, and the guess
631+
/// will be "maximally in your favor", in the sense that if there is any way to avoid undefined
632+
/// behavior, then that is the guess that will be taken.
633+
///
634+
/// On platforms with multiple address spaces, it is your responsibility to ensure that the
635+
/// address makes sense in the address space that this pointer will be used with.
636+
///
637+
/// Using this method means that code is *not* following strict provenance rules. "Guessing" a
638+
/// suitable provenance complicates specification and reasoning and may not be supported by
639+
/// tools that help you to stay conformant with the Rust memory model, so it is recommended to
640+
/// use [`with_addr`][pointer::with_addr] wherever possible.
641+
///
642+
/// On most platforms this will produce a value with the same bytes as the address. Platforms
643+
/// which need to store additional information in a pointer may not support this operation,
644+
/// since it is generally not possible to actually *compute* which provenance the returned
645+
/// pointer has to pick up.
646+
///
647+
/// This API and its claimed semantics are part of the Strict Provenance experiment, see the
648+
/// [module documentation][crate::ptr] for details.
649+
#[must_use]
650+
#[inline]
651+
#[unstable(feature = "strict_provenance", issue = "95228")]
652+
pub fn from_exposed_addr_mut<T>(addr: usize) -> *mut T
653+
where
654+
T: Sized,
655+
{
656+
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
657+
addr as *mut T
658+
}
659+
550660
/// Forms a raw slice from a pointer and a length.
551661
///
552662
/// The `len` argument is the number of **elements**, not the number of bytes.
@@ -762,7 +872,7 @@ pub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize) {
762872
);
763873
}
764874

765-
// NOTE(scottmcm) MIRI is disabled here as reading in smaller units is a
875+
// NOTE(scottmcm) Miri is disabled here as reading in smaller units is a
766876
// pessimization for it. Also, if the type contains any unaligned pointers,
767877
// copying those over multiple reads is difficult to support.
768878
#[cfg(not(miri))]

library/core/src/ptr/mut_ptr.rs

+49-5
Original file line numberDiff line numberDiff line change
@@ -156,18 +156,27 @@ impl<T: ?Sized> *mut T {
156156

157157
/// Gets the "address" portion of the pointer.
158158
///
159-
/// This is equivalent to `self as usize`, which semantically discards
160-
/// *provenance* and *address-space* information. To properly restore that information,
161-
/// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
159+
/// This is similar to `self as usize`, which semantically discards *provenance* and
160+
/// *address-space* information. However, unlike `self as usize`, casting the returned address
161+
/// back to a pointer yields [`invalid`][], which is undefined behavior to dereference. To
162+
/// properly restore the lost information and obtain a dereferencable pointer, use
163+
/// [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
164+
///
165+
/// If using those APIs is not possible because there is no way to preserve a pointer with the
166+
/// required provenance, use [`expose_addr`][pointer::expose_addr] and
167+
/// [`from_exposed_addr_mut`][from_exposed_addr_mut] instead. However, note that this makes
168+
/// your code less portable and less amenable to tools that check for compliance with the Rust
169+
/// memory model.
162170
///
163171
/// On most platforms this will produce a value with the same bytes as the original
164172
/// pointer, because all the bytes are dedicated to describing the address.
165173
/// Platforms which need to store additional information in the pointer may
166174
/// perform a change of representation to produce a value containing only the address
167175
/// portion of the pointer. What that means is up to the platform to define.
168176
///
169-
/// This API and its claimed semantics are part of the Strict Provenance experiment,
170-
/// see the [module documentation][crate::ptr] for details.
177+
/// This API and its claimed semantics are part of the Strict Provenance experiment, and as such
178+
/// might change in the future (including possibly weakening this so it becomes wholly
179+
/// equivalent to `self as usize`). See the [module documentation][crate::ptr] for details.
171180
#[must_use]
172181
#[inline]
173182
#[unstable(feature = "strict_provenance", issue = "95228")]
@@ -179,6 +188,41 @@ impl<T: ?Sized> *mut T {
179188
self as usize
180189
}
181190

191+
/// Gets the "address" portion of the pointer, and 'exposes' the "provenance" part for future
192+
/// use in [`from_exposed_addr`][].
193+
///
194+
/// This is equivalent to `self as usize`, which semantically discards *provenance* and
195+
/// *address-space* information. Furthermore, this (like the `as` cast) has the implicit
196+
/// side-effect of marking the provenance as 'exposed', so on platforms that support it you can
197+
/// later call [`from_exposed_addr_mut`][] to reconstitute the original pointer including its
198+
/// provenance. (Reconstructing address space information, if required, is your responsibility.)
199+
///
200+
/// Using this method means that code is *not* following Strict Provenance rules. Supporting
201+
/// [`from_exposed_addr_mut`][] complicates specification and reasoning and may not be supported
202+
/// by tools that help you to stay conformant with the Rust memory model, so it is recommended
203+
/// to use [`addr`][pointer::addr] wherever possible.
204+
///
205+
/// On most platforms this will produce a value with the same bytes as the original pointer,
206+
/// because all the bytes are dedicated to describing the address. Platforms which need to store
207+
/// additional information in the pointer may not support this operation, since the 'expose'
208+
/// side-effect which is required for [`from_exposed_addr_mut`][] to work is typically not
209+
/// available.
210+
///
211+
/// This API and its claimed semantics are part of the Strict Provenance experiment, see the
212+
/// [module documentation][crate::ptr] for details.
213+
///
214+
/// [`from_exposed_addr_mut`]: from_exposed_addr_mut
215+
#[must_use]
216+
#[inline]
217+
#[unstable(feature = "strict_provenance", issue = "95228")]
218+
pub fn expose_addr(self) -> usize
219+
where
220+
T: Sized,
221+
{
222+
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
223+
self as usize
224+
}
225+
182226
/// Creates a new pointer with the given address.
183227
///
184228
/// This performs the same operation as an `addr as ptr` cast, but copies

0 commit comments

Comments
 (0)