Skip to content

Commit e615774

Browse files
committed
fix(pass-style): only well-formed strings are passable
1 parent 5b2537c commit e615774

File tree

5 files changed

+124
-7
lines changed

5 files changed

+124
-7
lines changed

packages/pass-style/NEWS.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
User-visible changes in `@endo/pass-style`:
2+
3+
# next release
4+
5+
- Previously, all JavaScript strings were considered Passable with `passStyleOf(str) === 'string'`. Now, only well-formed Unicode strings are considered Passable. For all others, `passStyleOf(str)` throws a diagnostic error. This brings us into closer conformance to the OCapN standard, which prohibits sending non-well-formed strings, and requires non-well-formed strings to be rejected when received. Applications that had previously handled non-well-formed strings successfully (even if inadvertantly) may now start experiences these failure.
6+
- Exports `isWellFormedString` and `assertWellFormedString`. Unfortunately the [standard `String.prototype.isWellFormed`](https://tc39.es/proposal-is-usv-string/) first coerces its input to string, leading it to claim that some non-strings are well-formed strings. By contrast, `isWellFormedString` and `assertWellFormedString` will not judge any non-strings to be well-formed strings.

packages/pass-style/index.js

+6-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@ export {
2121
passableSymbolForName,
2222
} from './src/symbol.js';
2323

24-
export { passStyleOf, assertPassable } from './src/passStyleOf.js';
24+
export {
25+
isWellFormedString,
26+
assertWellFormedString,
27+
passStyleOf,
28+
assertPassable,
29+
} from './src/passStyleOf.js';
2530

2631
export { makeTagged } from './src/makeTagged.js';
2732
export {

packages/pass-style/src/passStyleOf.js

+59-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,61 @@ const { details: X, Fail, quote: q } = assert;
2626
const { ownKeys } = Reflect;
2727
const { isFrozen } = Object;
2828

29+
// @ts-expect-error TS builtin `String` type does not yet
30+
// know about`isWellFormed`
31+
const hasWellFormedStringMethod = !!String.prototype.isWellFormed;
32+
33+
/**
34+
* Is the argument a well-formed string?
35+
*
36+
* Unfortunately, the
37+
* [standard built-in `String.prototype.isWellFormed`](https://github.com/tc39/proposal-is-usv-string)
38+
* does a ToString on its input, causing it to judge non-strings to be
39+
* well-formed strings if they coerce to a well-formed strings. This
40+
* recapitulates the mistake in having the global `isNaN` coerce its inputs,
41+
* causing it to judge non-string to be NaN if they coerce to NaN.
42+
*
43+
* This `isWellFormedString` function only judges well-formed strings to be
44+
* well-formed strings. For all non-strings it returns false.
45+
*
46+
* @param {unknown} str
47+
* @returns {str is string}
48+
*/
49+
export const isWellFormedString = hasWellFormedStringMethod
50+
? // @ts-expect-error TS does not yet know about `isWellFormed`
51+
str => typeof str === 'string' && str.isWellFormed()
52+
: str => {
53+
if (typeof str !== 'string') {
54+
return false;
55+
}
56+
for (const ch of str) {
57+
// The string iterator iterates by Unicode code point, not
58+
// UTF16 code unit. But if it encounters an unpaired surrogate,
59+
// it will produce it.
60+
const cp = /** @type {number} */ (ch.codePointAt(0));
61+
if (cp >= 0xd800 && cp <= 0xdfff) {
62+
// All surrogates are in this range. The string iterator only
63+
// produces a character in this range for unpaired surrogates,
64+
// which only happens if the string is not well-formed.
65+
return false;
66+
}
67+
}
68+
return true;
69+
};
70+
harden(isWellFormedString);
71+
72+
/**
73+
* Returns normally when `isWellFormedString(str)` would return true.
74+
* Throws a diagnostic error when `isWellFormedString(str)` would return false.
75+
*
76+
* @param {unknown} str
77+
* @returns {asserts str is string}
78+
*/
79+
export const assertWellFormedString = str => {
80+
isWellFormedString(str) || Fail`Expected well-formed unicode string: ${str}`;
81+
};
82+
harden(assertWellFormedString);
83+
2984
/**
3085
* @param {PassStyleHelper[]} passStyleHelpers
3186
* @returns {Record<HelperPassStyle, PassStyleHelper> }
@@ -124,12 +179,15 @@ const makePassStyleOf = passStyleHelpers => {
124179
const typestr = typeof inner;
125180
switch (typestr) {
126181
case 'undefined':
127-
case 'string':
128182
case 'boolean':
129183
case 'number':
130184
case 'bigint': {
131185
return typestr;
132186
}
187+
case 'string': {
188+
assertWellFormedString(inner);
189+
return 'string';
190+
}
133191
case 'symbol': {
134192
assertPassableSymbol(inner);
135193
return 'symbol';

packages/pass-style/test/test-pass-style.js

-5
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/* eslint-disable no-useless-concat */
2+
import { test } from './prepare-test-env-ava.js';
3+
4+
import {
5+
passStyleOf,
6+
isWellFormedString,
7+
assertWellFormedString,
8+
} from '../src/passStyleOf.js';
9+
10+
test('test string well formedness behaviors', t => {
11+
const gcleff1 = '\u{1D11E}';
12+
const gcleff2 = '\u{D834}\u{DD1E}';
13+
const gcleff3 = '\u{D834}' + '\u{DD1E}';
14+
const badcleff1 = '\u{D834}\u{D834}\u{DD1E}';
15+
const badcleff2 = '\u{D834}\u{DD1E}\u{D834}';
16+
const badcleff3 = '\u{D834}' + '\u{DD1E}\u{D834}';
17+
18+
// This test block ensures that the underlying platform behaves as we expect
19+
t.is(gcleff1, gcleff2);
20+
t.is(gcleff1, gcleff3);
21+
t.is(gcleff1.length, 2);
22+
t.is(gcleff2.length, 2);
23+
t.is(gcleff3.length, 2);
24+
// Uses string iterator, which iterates code points if possible, not
25+
// UTF16 code units
26+
t.deepEqual([...gcleff1], [gcleff1]);
27+
t.not(badcleff1, badcleff2);
28+
t.is(badcleff2, badcleff3);
29+
t.is(badcleff1.length, 3);
30+
// But if the string contains lone surrogates, the string iterator will
31+
// produce those as characters
32+
t.deepEqual([...badcleff1], ['\u{D834}', gcleff1]);
33+
t.deepEqual([...badcleff2], [gcleff1, '\u{D834}']);
34+
35+
t.is(passStyleOf(gcleff1), 'string');
36+
t.true(isWellFormedString(gcleff1));
37+
t.notThrows(() => assertWellFormedString(gcleff1));
38+
39+
t.throws(() => passStyleOf(badcleff1), {
40+
message: 'Expected well-formed unicode string: "\\ud834𝄞"',
41+
});
42+
t.throws(() => passStyleOf(badcleff2), {
43+
message: 'Expected well-formed unicode string: "𝄞\\ud834"',
44+
});
45+
t.false(isWellFormedString(badcleff1));
46+
t.false(isWellFormedString(badcleff2));
47+
t.throws(() => assertWellFormedString(badcleff1), {
48+
message: 'Expected well-formed unicode string: "\\ud834𝄞"',
49+
});
50+
t.throws(() => assertWellFormedString(badcleff2), {
51+
message: 'Expected well-formed unicode string: "𝄞\\ud834"',
52+
});
53+
});

0 commit comments

Comments
 (0)