Skip to content

Commit bca1e3f

Browse files
authored
feat(pass-style): feature flag: only well-formed strings are passable (#2002)
1 parent 05fe78a commit bca1e3f

7 files changed

+165
-1
lines changed

packages/pass-style/NEWS.md

+11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
User-visible changes in `@endo/pass-style`:
22

3+
# Next release
4+
5+
- Exports `isWellFormedString` and `assertWellFormedString`. Unfortunately the [standard `String.prototype.isWellFormed`](https://tc39.es/proposal-is-usv-string/) first coerces its input to string, leading it to claim that some non-strings are well-formed strings. By contrast, `isWellFormedString` and `assertWellFormedString` will not judge any non-strings to be well-formed strings.
6+
- Previously, all JavaScript strings were considered Passable with `passStyleOf(str) === 'string'`. Our tentative plan is that only well-formed Unicode strings will be considered Passable. For all others, `passStyleOf(str)` throws a diagnostic error. This would bring us into closer conformance to the OCapN standard, which prohibits sending non-well-formed strings, and requires non-well-formed strings to be rejected when received. Applications that had previously handled non-well-formed strings successfully (even if inadvertantly) may then start experiences these failure. We are also uncertain about the performance impact of this extra check, since it is linear in the size of strings.
7+
- Thus, in this release we introduce the environment option `ONLY_WELL_FORMED_STRINGS_PASSABLE` as a feature flag. To abstract over this switch, we also export `assertPassableString`. For now, if `ONLY_WELL_FORMED_STRINGS_PASSABLE` environment option is `'enabled'`, then `assertPassableString` is the same as `assertWellFormedString`. Otherwise `assertPassableString` just asserts that `str` is a string. In a bash shell, for example, you could set
8+
```sh
9+
export ONLY_WELL_FORMED_STRINGS_PASSABLE=enabled
10+
```
11+
to turn this feature on.
12+
- Currently, `ONLY_WELL_FORMED_STRINGS_PASSABLE` defaults to `'disabled'` because we do not yet know the performance impact. Later, if we decide we can afford it, we'll first change the default to `'enabled'` and ultimately remove the switch altogether. Be prepared for these changes.
13+
314
# v1.2.0 (2024-02-22)
415
516
- Now supports `AggegateError`, `error.errors`, `error.cause`.

packages/pass-style/index.js

+6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ export {
1818
passableSymbolForName,
1919
} from './src/symbol.js';
2020

21+
export {
22+
isWellFormedString,
23+
assertWellFormedString,
24+
assertPassableString,
25+
} from './src/string.js';
26+
2127
export {
2228
passStyleOf,
2329
isPassable,

packages/pass-style/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"test": "ava"
3535
},
3636
"dependencies": {
37+
"@endo/env-options": "^1.1.1",
3738
"@endo/errors": "^1.1.0",
3839
"@endo/eventual-send": "^1.1.2",
3940
"@endo/promise-kit": "^1.0.4",

packages/pass-style/src/passStyleOf.js

+5-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import { RemotableHelper } from './remotable.js';
2424

2525
import { assertPassableSymbol } from './symbol.js';
2626
import { assertSafePromise } from './safe-promise.js';
27+
import { assertPassableString } from './string.js';
2728

2829
/** @typedef {import('./internal-types.js').PassStyleHelper} PassStyleHelper */
2930
/** @typedef {import('./types.js').Passable} Passable */
@@ -134,12 +135,15 @@ const makePassStyleOf = passStyleHelpers => {
134135
const typestr = typeof inner;
135136
switch (typestr) {
136137
case 'undefined':
137-
case 'string':
138138
case 'boolean':
139139
case 'number':
140140
case 'bigint': {
141141
return typestr;
142142
}
143+
case 'string': {
144+
assertPassableString(inner);
145+
return 'string';
146+
}
143147
case 'symbol': {
144148
assertPassableSymbol(inner);
145149
return 'symbol';

packages/pass-style/src/string.js

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import { getEnvironmentOption } from '@endo/env-options';
2+
import { Fail } from '@endo/errors';
3+
4+
// @ts-expect-error TS builtin `String` type does not yet
5+
// know about`isWellFormed`
6+
const hasWellFormedStringMethod = !!String.prototype.isWellFormed;
7+
8+
/**
9+
* Is the argument a well-formed string?
10+
*
11+
* Unfortunately, the
12+
* [standard built-in `String.prototype.isWellFormed`](https://github.com/tc39/proposal-is-usv-string)
13+
* does a ToString on its input, causing it to judge non-strings to be
14+
* well-formed strings if they coerce to a well-formed strings. This
15+
* recapitulates the mistake in having the global `isNaN` coerce its inputs,
16+
* causing it to judge non-string to be NaN if they coerce to NaN.
17+
*
18+
* This `isWellFormedString` function only judges well-formed strings to be
19+
* well-formed strings. For all non-strings it returns false.
20+
*
21+
* @param {unknown} str
22+
* @returns {str is string}
23+
*/
24+
export const isWellFormedString = hasWellFormedStringMethod
25+
? // @ts-expect-error TS does not yet know about `isWellFormed`
26+
str => typeof str === 'string' && str.isWellFormed()
27+
: str => {
28+
if (typeof str !== 'string') {
29+
return false;
30+
}
31+
for (const ch of str) {
32+
// The string iterator iterates by Unicode code point, not
33+
// UTF16 code unit. But if it encounters an unpaired surrogate,
34+
// it will produce it.
35+
const cp = /** @type {number} */ (ch.codePointAt(0));
36+
if (cp >= 0xd800 && cp <= 0xdfff) {
37+
// All surrogates are in this range. The string iterator only
38+
// produces a character in this range for unpaired surrogates,
39+
// which only happens if the string is not well-formed.
40+
return false;
41+
}
42+
}
43+
return true;
44+
};
45+
harden(isWellFormedString);
46+
47+
/**
48+
* Returns normally when `isWellFormedString(str)` would return true.
49+
* Throws a diagnostic error when `isWellFormedString(str)` would return false.
50+
*
51+
* @param {unknown} str
52+
* @returns {asserts str is string}
53+
*/
54+
export const assertWellFormedString = str => {
55+
isWellFormedString(str) || Fail`Expected well-formed unicode string: ${str}`;
56+
};
57+
harden(assertWellFormedString);
58+
59+
const ONLY_WELL_FORMED_STRINGS_PASSABLE =
60+
getEnvironmentOption('ONLY_WELL_FORMED_STRINGS_PASSABLE', 'disabled', [
61+
'enabled',
62+
]) === 'enabled';
63+
64+
/**
65+
* For now,
66+
* if `ONLY_WELL_FORMED_STRINGS_PASSABLE` environment option is `'enabled'`,
67+
* then `assertPassableString` is the same as `assertWellFormedString`.
68+
* Otherwise `assertPassableString` just asserts that `str` is a string.
69+
*
70+
* Currently, `ONLY_WELL_FORMED_STRINGS_PASSABLE` defaults to `'disabled'`
71+
* because we do not yet know the performance impact. Later, if we decide we
72+
* can afford it, we'll first change the default to `'enabled'` and ultimately
73+
* remove the switch altogether. Be prepared for these changes.
74+
*
75+
* TODO once the switch is removed, simplify `assertPassableString` to
76+
* simply be `assertWellFormedString`.
77+
*
78+
* TODO update https://github.com/Agoric/agoric-sdk/blob/master/docs/env.md
79+
* which is unfortunately in the wrong repo to be updated in the same change.
80+
*
81+
* @param { unknown } str
82+
* @returns {asserts str is string }
83+
*/
84+
export const assertPassableString = str => {
85+
typeof str === 'string' || Fail`Expected string ${str}`;
86+
!ONLY_WELL_FORMED_STRINGS_PASSABLE || assertWellFormedString(str);
87+
};
88+
harden(assertPassableString);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/* global process */
2+
3+
process.env.ONLY_WELL_FORMED_STRINGS_PASSABLE = 'enabled';
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/* eslint-disable no-useless-concat */
2+
import './prepare-only-well-formed-strings-passable.js';
3+
import test from '@endo/ses-ava/prepare-endo.js';
4+
5+
import { isWellFormedString, assertWellFormedString } from '../src/string.js';
6+
import { passStyleOf } from '../src/passStyleOf.js';
7+
8+
test('test string well formedness behaviors', t => {
9+
const gcleff1 = '\u{1D11E}';
10+
const gcleff2 = '\u{D834}\u{DD1E}';
11+
const gcleff3 = '\u{D834}' + '\u{DD1E}';
12+
const badcleff1 = '\u{D834}\u{D834}\u{DD1E}';
13+
const badcleff2 = '\u{D834}\u{DD1E}\u{D834}';
14+
const badcleff3 = '\u{D834}' + '\u{DD1E}\u{D834}';
15+
16+
// This test block ensures that the underlying platform behaves as we expect
17+
t.is(gcleff1, gcleff2);
18+
t.is(gcleff1, gcleff3);
19+
t.is(gcleff1.length, 2);
20+
t.is(gcleff2.length, 2);
21+
t.is(gcleff3.length, 2);
22+
// Uses string iterator, which iterates code points if possible, not
23+
// UTF16 code units
24+
t.deepEqual([...gcleff1], [gcleff1]);
25+
t.not(badcleff1, badcleff2);
26+
t.is(badcleff2, badcleff3);
27+
t.is(badcleff1.length, 3);
28+
// But if the string contains lone surrogates, the string iterator will
29+
// produce those as characters
30+
t.deepEqual([...badcleff1], ['\u{D834}', gcleff1]);
31+
t.deepEqual([...badcleff2], [gcleff1, '\u{D834}']);
32+
33+
t.is(passStyleOf(gcleff1), 'string');
34+
t.true(isWellFormedString(gcleff1));
35+
t.notThrows(() => assertWellFormedString(gcleff1));
36+
37+
t.throws(() => passStyleOf(badcleff1), {
38+
message: 'Expected well-formed unicode string: "\\ud834𝄞"',
39+
});
40+
t.throws(() => passStyleOf(badcleff2), {
41+
message: 'Expected well-formed unicode string: "𝄞\\ud834"',
42+
});
43+
t.false(isWellFormedString(badcleff1));
44+
t.false(isWellFormedString(badcleff2));
45+
t.throws(() => assertWellFormedString(badcleff1), {
46+
message: 'Expected well-formed unicode string: "\\ud834𝄞"',
47+
});
48+
t.throws(() => assertWellFormedString(badcleff2), {
49+
message: 'Expected well-formed unicode string: "𝄞\\ud834"',
50+
});
51+
});

0 commit comments

Comments
 (0)