Skip to content

Commit

Permalink
Merge pull request #1062 from godot-rust/feature/strings-from-cstr
Browse files Browse the repository at this point in the history
`GString`, `StringName`: add conversions from bytes and C-strings
  • Loading branch information
Bromeon authored Mar 4, 2025
2 parents e2a7f05 + db41515 commit da23b37
Show file tree
Hide file tree
Showing 10 changed files with 419 additions and 14 deletions.
2 changes: 1 addition & 1 deletion godot-core/src/builtin/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pub mod __prelude_reexport {
pub use rect2i::*;
pub use rid::*;
pub use signal::*;
pub use string::{GString, NodePath, StringName};
pub use string::{Encoding, GString, NodePath, StringName};
pub use transform2d::*;
pub use transform3d::*;
pub use variant::*;
Expand Down
74 changes: 71 additions & 3 deletions godot-core/src/builtin/string/gstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
*/

use std::convert::Infallible;
use std::ffi::c_char;
use std::fmt;
use std::fmt::Write;

use godot_ffi as sys;
use sys::types::OpaqueString;
use sys::{ffi_methods, interface_fn, GodotFfi};

use crate::builtin::string::Encoding;
use crate::builtin::{inner, NodePath, StringName, Variant};
use crate::meta::error::StringError;
use crate::meta::AsArg;
use crate::{impl_shared_string_api, meta};

Expand Down Expand Up @@ -77,6 +78,73 @@ impl GString {
Self::default()
}

/// Convert string from bytes with given encoding, returning `Err` on validation errors.
///
/// Intermediate `NUL` characters are not accepted in Godot and always return `Err`.
///
/// Some notes on the encodings:
/// - **Latin-1:** Since every byte is a valid Latin-1 character, no validation besides the `NUL` byte is performed.
/// It is your responsibility to ensure that the input is valid Latin-1.
/// - **ASCII**: Subset of Latin-1, which is additionally validated to be valid, non-`NUL` ASCII characters.
/// - **UTF-8**: The input is validated to be UTF-8.
///
/// Specifying incorrect encoding is safe, but may result in unintended string values.
pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Result<Self, StringError> {
Self::try_from_bytes_with_nul_check(bytes, encoding, true)
}

/// Convert string from C-string with given encoding, returning `Err` on validation errors.
///
/// Convenience function for [`try_from_bytes()`](Self::try_from_bytes); see its docs for more information.
pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Result<Self, StringError> {
Self::try_from_bytes_with_nul_check(cstr.to_bytes(), encoding, false)
}

pub(super) fn try_from_bytes_with_nul_check(
bytes: &[u8],
encoding: Encoding,
check_nul: bool,
) -> Result<Self, StringError> {
match encoding {
Encoding::Ascii => {
// If the bytes are ASCII, we can fall back to Latin-1, which is always valid (except for NUL).
// is_ascii() does *not* check for the NUL byte, so the check in the Latin-1 branch is still necessary.
if bytes.is_ascii() {
Self::try_from_bytes_with_nul_check(bytes, Encoding::Latin1, check_nul)
.map_err(|_e| StringError::new("intermediate NUL byte in ASCII string"))
} else {
Err(StringError::new("invalid ASCII"))
}
}
Encoding::Latin1 => {
// Intermediate NUL bytes are not accepted in Godot. Both ASCII + Latin-1 encodings need to explicitly check for this.
if check_nul && bytes.contains(&0) {
// Error overwritten when called from ASCII branch.
return Err(StringError::new("intermediate NUL byte in Latin-1 string"));
}

let s = unsafe {
Self::new_with_string_uninit(|string_ptr| {
let ctor = interface_fn!(string_new_with_latin1_chars_and_len);
ctor(
string_ptr,
bytes.as_ptr() as *const std::ffi::c_char,
bytes.len() as i64,
);
})
};
Ok(s)
}
Encoding::Utf8 => {
// from_utf8() also checks for intermediate NUL bytes.
let utf8 = std::str::from_utf8(bytes);

utf8.map(GString::from)
.map_err(|e| StringError::with_source("invalid UTF-8", e))
}
}
}

/// Number of characters in the string.
///
/// _Godot equivalent: `length`_
Expand Down Expand Up @@ -260,7 +328,7 @@ impl From<&str> for GString {
let ctor = interface_fn!(string_new_with_utf8_chars_and_len);
ctor(
string_ptr,
bytes.as_ptr() as *const c_char,
bytes.as_ptr() as *const std::ffi::c_char,
bytes.len() as i64,
);
})
Expand Down Expand Up @@ -307,7 +375,7 @@ impl From<&GString> for String {

interface_fn!(string_to_utf8_chars)(
string.string_sys(),
buf.as_mut_ptr() as *mut c_char,
buf.as_mut_ptr() as *mut std::ffi::c_char,
len,
);

Expand Down
14 changes: 14 additions & 0 deletions godot-core/src/builtin/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,20 @@ impl FromGodot for String {
}
}

// ----------------------------------------------------------------------------------------------------------------------------------------------
// Encoding

/// Specifies string encoding.
///
/// Used in functions such as [`GString::try_from_bytes()`][GString::try_from_bytes] to handle multiple input string encodings.
#[non_exhaustive]
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum Encoding {
Ascii,
Latin1,
Utf8,
}

// ----------------------------------------------------------------------------------------------------------------------------------------------

/// Returns a tuple of `(from, len)` from a Rust range.
Expand Down
82 changes: 80 additions & 2 deletions godot-core/src/builtin/string/string_name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

use std::fmt;

use godot_ffi as sys;
use godot_ffi::interface_fn;
use sys::{ffi_methods, GodotFfi};

use crate::builtin::{inner, GString, NodePath, Variant};
use crate::builtin::{inner, Encoding, GString, NodePath, Variant};
use crate::meta::error::StringError;
use crate::meta::AsArg;
use crate::{impl_shared_string_api, meta};

Expand Down Expand Up @@ -60,6 +61,83 @@ impl StringName {
Self { opaque }
}

/// Convert string from bytes with given encoding, returning `Err` on validation errors.
///
/// Intermediate `NUL` characters are not accepted in Godot and always return `Err`.
///
/// Some notes on the encodings:
/// - **Latin-1:** Since every byte is a valid Latin-1 character, no validation besides the `NUL` byte is performed.
/// It is your responsibility to ensure that the input is valid Latin-1.
/// - **ASCII**: Subset of Latin-1, which is additionally validated to be valid, non-`NUL` ASCII characters.
/// - **UTF-8**: The input is validated to be UTF-8.
///
/// Specifying incorrect encoding is safe, but may result in unintended string values.
pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Result<Self, StringError> {
Self::try_from_bytes_with_nul_check(bytes, encoding, true)
}

/// Convert string from bytes with given encoding, returning `Err` on validation errors.
///
/// Convenience function for [`try_from_bytes()`](Self::try_from_bytes); see its docs for more information.
///
/// When called with `Encoding::Latin1`, this can be slightly more efficient than `try_from_bytes()`.
pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Result<Self, StringError> {
// Short-circuit the direct Godot 4.2 function for Latin-1, which takes a null-terminated C string.
#[cfg(since_api = "4.2")]
if encoding == Encoding::Latin1 {
// Note: CStr guarantees no intermediate NUL bytes, so we don't need to check for them.

let is_static = sys::conv::SYS_FALSE;
let s = unsafe {
Self::new_with_string_uninit(|string_ptr| {
let ctor = interface_fn!(string_name_new_with_latin1_chars);
ctor(
string_ptr,
cstr.as_ptr() as *const std::ffi::c_char,
is_static,
);
})
};
return Ok(s);
}

Self::try_from_bytes_with_nul_check(cstr.to_bytes(), encoding, false)
}

fn try_from_bytes_with_nul_check(
bytes: &[u8],
encoding: Encoding,
check_nul: bool,
) -> Result<Self, StringError> {
match encoding {
Encoding::Ascii => {
// ASCII is a subset of UTF-8, and UTF-8 has a more direct implementation than Latin-1; thus use UTF-8 via `From<&str>`.
if !bytes.is_ascii() {
Err(StringError::new("invalid ASCII"))
} else if check_nul && bytes.contains(&0) {
Err(StringError::new("intermediate NUL byte in ASCII string"))
} else {
// SAFETY: ASCII is a subset of UTF-8 and was verified above.
let ascii = unsafe { std::str::from_utf8_unchecked(bytes) };
Ok(Self::from(ascii))
}
}
Encoding::Latin1 => {
// This branch is short-circuited if invoked for CStr and Godot 4.2+, which uses `string_name_new_with_latin1_chars`
// (requires nul-termination). In general, fall back to GString conversion.
GString::try_from_bytes_with_nul_check(bytes, Encoding::Latin1, check_nul)
.map(Self::from)
}
Encoding::Utf8 => {
// from_utf8() also checks for intermediate NUL bytes.
let utf8 = std::str::from_utf8(bytes);

utf8.map(StringName::from)
.map_err(|e| StringError::with_source("invalid UTF-8", e))
}
}
}

/// Number of characters in the string.
///
/// _Godot equivalent: `length`_
Expand Down
2 changes: 2 additions & 0 deletions godot-core/src/meta/error/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
mod call_error;
mod convert_error;
mod io_error;
mod string_error;

pub use call_error::*;
pub use convert_error::*;
pub use io_error::*;
pub use string_error::*;
51 changes: 51 additions & 0 deletions godot-core/src/meta/error/string_error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) godot-rust; Bromeon and contributors.
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

use std::error::Error;
use std::fmt;

/// Error related to string encoding/decoding.
#[derive(Debug)]
pub struct StringError {
message: String,
source: Option<Box<(dyn Error + 'static)>>,
}

impl fmt::Display for StringError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(source) = self.source() {
write!(f, "{}: {}", self.message, source)
} else {
write!(f, "{}", self.message)
}
}
}

impl Error for StringError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
self.source.as_deref()
}
}

impl StringError {
pub(crate) fn new(message: impl Into<String>) -> Self {
Self {
message: message.into(),
source: None,
}
}

pub(crate) fn with_source(
message: impl Into<String>,
source: impl Into<Box<(dyn Error + 'static)>>,
) -> Self {
Self {
message: message.into(),
source: Some(source.into()),
}
}
}
1 change: 1 addition & 0 deletions itest/rust/src/builtin_tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ mod string {
mod gstring_test;
mod node_path_test;
mod string_name_test;
mod string_test_macros;
}

mod script {
Expand Down
28 changes: 21 additions & 7 deletions itest/rust/src/builtin_tests/string/gstring_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use std::collections::HashSet;

use crate::framework::{expect_debug_panic_or_release_ok, itest};
use godot::builtin::{GString, PackedStringArray};
use godot::builtin::{Encoding, GString, PackedStringArray};

// TODO use tests from godot-rust/gdnative

Expand Down Expand Up @@ -150,7 +150,7 @@ fn string_substr() {
}

#[itest]
fn string_find() {
fn gstring_find() {
let s = GString::from("Hello World");

assert_eq!(s.find("o"), Some(4));
Expand All @@ -171,7 +171,7 @@ fn string_find() {
}

#[itest]
fn string_split() {
fn gstring_split() {
let s = GString::from("Hello World");
assert_eq!(s.split(" "), packed(&["Hello", "World"]));
assert_eq!(
Expand Down Expand Up @@ -206,7 +206,7 @@ fn string_split() {
}

#[itest]
fn string_count() {
fn gstring_count() {
let s = GString::from("Long sentence with Sentry guns.");
assert_eq!(s.count("sent", ..), 1);
assert_eq!(s.count("en", 6..), 3);
Expand All @@ -224,7 +224,7 @@ fn string_count() {
}

#[itest]
fn string_erase() {
fn gstring_erase() {
let s = GString::from("Hello World");
assert_eq!(s.erase(..), GString::new());
assert_eq!(s.erase(4..4), s);
Expand All @@ -236,7 +236,7 @@ fn string_erase() {
}

#[itest]
fn string_insert() {
fn gstring_insert() {
let s = GString::from("H World");
assert_eq!(s.insert(1, "i"), "Hi World".into());
assert_eq!(s.insert(1, "ello"), "Hello World".into());
Expand All @@ -248,7 +248,7 @@ fn string_insert() {
}

#[itest]
fn string_pad() {
fn gstring_pad() {
let s = GString::from("123");
assert_eq!(s.lpad(5, '0'), "00123".into());
assert_eq!(s.lpad(2, ' '), "123".into());
Expand All @@ -266,7 +266,21 @@ fn string_pad() {
assert_eq!(s.pad_zeros(2), "123.456".into());
}

// Byte and C-string conversions.
crate::generate_string_bytes_and_cstr_tests!(
builtin: GString,
tests: [
gstring_from_bytes_ascii,
gstring_from_cstr_ascii,
gstring_from_bytes_latin1,
gstring_from_cstr_latin1,
gstring_from_bytes_utf8,
gstring_from_cstr_utf8,
]
);

// ----------------------------------------------------------------------------------------------------------------------------------------------
// Helpers

fn packed(strings: &[&str]) -> PackedStringArray {
strings.iter().map(|&s| GString::from(s)).collect()
Expand Down
Loading

0 comments on commit da23b37

Please sign in to comment.