Skip to content

Commit 03f60e8

Browse files
committed
fix(pass-style)!: only well-formed strings are passable
1 parent a4a5a75 commit 03f60e8

File tree

5 files changed

+119
-6
lines changed

5 files changed

+119
-6
lines changed

packages/pass-style/NEWS.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
User-visible changes in `@endo/pass-style`:
22

3+
# Next release
4+
5+
- Previously, all JavaScript strings were considered Passable with `passStyleOf(str) === 'string'`. Now, only well-formed Unicode strings are considered Passable. For all others, `passStyleOf(str)` throws a diagnostic error. This brings us into closer conformance to the OCapN standard, which prohibits sending non-well-formed strings, and requires non-well-formed strings to be rejected when received. Applications that had previously handled non-well-formed strings successfully (even if inadvertantly) may now start experiences these failure.
6+
- Exports `isWellFormedString` and `assertWellFormedString`. Unfortunately the [standard `String.prototype.isWellFormed`](https://tc39.es/proposal-is-usv-string/) first coerces its input to string, leading it to claim that some non-strings are well-formed strings. By contrast, `isWellFormedString` and `assertWellFormedString` will not judge any non-strings to be well-formed strings.
7+
38
# v1.2.0 (2024-02-22)
49

510
- Now supports `AggegateError`, `error.errors`, `error.cause`.

packages/pass-style/index.js

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ export {
2525
isPassableError,
2626
assertPassableError,
2727
toPassableError,
28+
isWellFormedString,
29+
assertWellFormedString,
2830
} from './src/passStyleOf.js';
2931

3032
export { makeTagged } from './src/makeTagged.js';

packages/pass-style/src/passStyleOf.js

+59-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,61 @@ import { assertSafePromise } from './safe-promise.js';
3636
const { ownKeys } = Reflect;
3737
const { isFrozen, getOwnPropertyDescriptors } = Object;
3838

39+
// @ts-expect-error TS builtin `String` type does not yet
40+
// know about`isWellFormed`
41+
const hasWellFormedStringMethod = !!String.prototype.isWellFormed;
42+
43+
/**
44+
* Is the argument a well-formed string?
45+
*
46+
* Unfortunately, the
47+
* [standard built-in `String.prototype.isWellFormed`](https://github.com/tc39/proposal-is-usv-string)
48+
* does a ToString on its input, causing it to judge non-strings to be
49+
* well-formed strings if they coerce to a well-formed strings. This
50+
* recapitulates the mistake in having the global `isNaN` coerce its inputs,
51+
* causing it to judge non-string to be NaN if they coerce to NaN.
52+
*
53+
* This `isWellFormedString` function only judges well-formed strings to be
54+
* well-formed strings. For all non-strings it returns false.
55+
*
56+
* @param {unknown} str
57+
* @returns {str is string}
58+
*/
59+
export const isWellFormedString = hasWellFormedStringMethod
60+
? // @ts-expect-error TS does not yet know about `isWellFormed`
61+
str => typeof str === 'string' && str.isWellFormed()
62+
: str => {
63+
if (typeof str !== 'string') {
64+
return false;
65+
}
66+
for (const ch of str) {
67+
// The string iterator iterates by Unicode code point, not
68+
// UTF16 code unit. But if it encounters an unpaired surrogate,
69+
// it will produce it.
70+
const cp = /** @type {number} */ (ch.codePointAt(0));
71+
if (cp >= 0xd800 && cp <= 0xdfff) {
72+
// All surrogates are in this range. The string iterator only
73+
// produces a character in this range for unpaired surrogates,
74+
// which only happens if the string is not well-formed.
75+
return false;
76+
}
77+
}
78+
return true;
79+
};
80+
harden(isWellFormedString);
81+
82+
/**
83+
* Returns normally when `isWellFormedString(str)` would return true.
84+
* Throws a diagnostic error when `isWellFormedString(str)` would return false.
85+
*
86+
* @param {unknown} str
87+
* @returns {asserts str is string}
88+
*/
89+
export const assertWellFormedString = str => {
90+
isWellFormedString(str) || Fail`Expected well-formed unicode string: ${str}`;
91+
};
92+
harden(assertWellFormedString);
93+
3994
/**
4095
* @param {PassStyleHelper[]} passStyleHelpers
4196
* @returns {Record<HelperPassStyle, PassStyleHelper> }
@@ -134,12 +189,15 @@ const makePassStyleOf = passStyleHelpers => {
134189
const typestr = typeof inner;
135190
switch (typestr) {
136191
case 'undefined':
137-
case 'string':
138192
case 'boolean':
139193
case 'number':
140194
case 'bigint': {
141195
return typestr;
142196
}
197+
case 'string': {
198+
assertWellFormedString(inner);
199+
return 'string';
200+
}
143201
case 'symbol': {
144202
assertPassableSymbol(inner);
145203
return 'symbol';

packages/pass-style/test/test-pass-style.js

-5
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/* eslint-disable no-useless-concat */
2+
import { test } from './prepare-test-env-ava.js';
3+
4+
import {
5+
passStyleOf,
6+
isWellFormedString,
7+
assertWellFormedString,
8+
} from '../src/passStyleOf.js';
9+
10+
test('test string well formedness behaviors', t => {
11+
const gcleff1 = '\u{1D11E}';
12+
const gcleff2 = '\u{D834}\u{DD1E}';
13+
const gcleff3 = '\u{D834}' + '\u{DD1E}';
14+
const badcleff1 = '\u{D834}\u{D834}\u{DD1E}';
15+
const badcleff2 = '\u{D834}\u{DD1E}\u{D834}';
16+
const badcleff3 = '\u{D834}' + '\u{DD1E}\u{D834}';
17+
18+
// This test block ensures that the underlying platform behaves as we expect
19+
t.is(gcleff1, gcleff2);
20+
t.is(gcleff1, gcleff3);
21+
t.is(gcleff1.length, 2);
22+
t.is(gcleff2.length, 2);
23+
t.is(gcleff3.length, 2);
24+
// Uses string iterator, which iterates code points if possible, not
25+
// UTF16 code units
26+
t.deepEqual([...gcleff1], [gcleff1]);
27+
t.not(badcleff1, badcleff2);
28+
t.is(badcleff2, badcleff3);
29+
t.is(badcleff1.length, 3);
30+
// But if the string contains lone surrogates, the string iterator will
31+
// produce those as characters
32+
t.deepEqual([...badcleff1], ['\u{D834}', gcleff1]);
33+
t.deepEqual([...badcleff2], [gcleff1, '\u{D834}']);
34+
35+
t.is(passStyleOf(gcleff1), 'string');
36+
t.true(isWellFormedString(gcleff1));
37+
t.notThrows(() => assertWellFormedString(gcleff1));
38+
39+
t.throws(() => passStyleOf(badcleff1), {
40+
message: 'Expected well-formed unicode string: "\\ud834𝄞"',
41+
});
42+
t.throws(() => passStyleOf(badcleff2), {
43+
message: 'Expected well-formed unicode string: "𝄞\\ud834"',
44+
});
45+
t.false(isWellFormedString(badcleff1));
46+
t.false(isWellFormedString(badcleff2));
47+
t.throws(() => assertWellFormedString(badcleff1), {
48+
message: 'Expected well-formed unicode string: "\\ud834𝄞"',
49+
});
50+
t.throws(() => assertWellFormedString(badcleff2), {
51+
message: 'Expected well-formed unicode string: "𝄞\\ud834"',
52+
});
53+
});

0 commit comments

Comments
 (0)