Skip to content

Commit 08dd271

Browse files
nodejs-github-botdanielleadams
authored andcommitted
deps: update ada to 2.1.0
PR-URL: #47598 Backport-PR-URL: #48345 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: Filip Skokan <panva.ip@gmail.com> Reviewed-By: Matthew Aitken <maitken033380023@gmail.com> Reviewed-By: Tiancheng "Timothy" Gu <timothygu99@gmail.com> Reviewed-By: Tobias Nießen <tniessen@tnie.de> Reviewed-By: Rich Trott <rtrott@gmail.com>
1 parent 8897305 commit 08dd271

File tree

2 files changed

+93
-19
lines changed

2 files changed

+93
-19
lines changed

deps/ada/ada.cpp

+79-14
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2023-03-30 17:00:48 -0400. Do not edit! */
1+
/* auto-generated on 2023-04-17 12:20:41 -0400. Do not edit! */
22
/* begin file src/ada.cpp */
33
#include "ada.h"
44
/* begin file src/checkers.cpp */
@@ -2753,7 +2753,7 @@ bool ascii_has_upper_case(char* input, size_t length) {
27532753
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
27542754
uint64_t broadcast_80 = broadcast(0x80);
27552755
uint64_t broadcast_Ap = broadcast(128 - 'A');
2756-
uint64_t broadcast_Zp = broadcast(128 - 'Z');
2756+
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
27572757
size_t i = 0;
27582758

27592759
uint64_t runner{0};
@@ -2775,7 +2775,7 @@ void ascii_map(char* input, size_t length) {
27752775
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
27762776
uint64_t broadcast_80 = broadcast(0x80);
27772777
uint64_t broadcast_Ap = broadcast(128 - 'A');
2778-
uint64_t broadcast_Zp = broadcast(128 - 'Z');
2778+
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
27792779
size_t i = 0;
27802780

27812781
for (; i + 7 < length; i += 8) {
@@ -9845,7 +9845,7 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
98459845
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
98469846
uint64_t broadcast_80 = broadcast(0x80);
98479847
uint64_t broadcast_Ap = broadcast(128 - 'A');
9848-
uint64_t broadcast_Zp = broadcast(128 - 'Z');
9848+
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
98499849
uint64_t non_ascii = 0;
98509850
size_t i = 0;
98519851

@@ -9961,7 +9961,7 @@ ada_really_inline constexpr bool is_forbidden_domain_code_point(
99619961
}
99629962

99639963
ada_really_inline constexpr bool contains_forbidden_domain_code_point(
9964-
char* input, size_t length) noexcept {
9964+
const char* input, size_t length) noexcept {
99659965
size_t i = 0;
99669966
uint8_t accumulator{};
99679967
for (; i + 4 <= length; i += 4) {
@@ -9976,6 +9976,44 @@ ada_really_inline constexpr bool contains_forbidden_domain_code_point(
99769976
return accumulator;
99779977
}
99789978

9979+
constexpr static uint8_t is_forbidden_domain_code_point_table_or_upper[] = {
9980+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9981+
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
9982+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
9983+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0,
9984+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9985+
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9986+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9987+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9988+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9989+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9990+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
9991+
9992+
static_assert(sizeof(is_forbidden_domain_code_point_table_or_upper) == 256);
9993+
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('A')] == 2);
9994+
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('Z')] == 2);
9995+
9996+
ada_really_inline constexpr bool contains_forbidden_domain_code_point_or_upper(
9997+
const char* input, size_t length) noexcept {
9998+
size_t i = 0;
9999+
uint8_t accumulator{};
10000+
for (; i + 4 <= length; i += 4) {
10001+
accumulator |=
10002+
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i])];
10003+
accumulator |=
10004+
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 1])];
10005+
accumulator |=
10006+
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 2])];
10007+
accumulator |=
10008+
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 3])];
10009+
}
10010+
for (; i < length; i++) {
10011+
accumulator |=
10012+
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i])];
10013+
}
10014+
return accumulator;
10015+
}
10016+
997910017
static_assert(unicode::is_forbidden_domain_code_point('%'));
998010018
static_assert(unicode::is_forbidden_domain_code_point('\x7f'));
998110019
static_assert(unicode::is_forbidden_domain_code_point('\0'));
@@ -13473,23 +13511,50 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
1347313511
// to ASCII with domain and false. The most common case is an ASCII input, in
1347413512
// which case we do not need to call the expensive 'to_ascii' if a few
1347513513
// conditions are met: no '%' and no 'xn-' subsequence.
13476-
std::string _buffer = std::string(input);
13477-
// This next function checks that the result is ascii, but we are going to
13478-
// to check anyhow with is_forbidden.
13479-
// bool is_ascii =
13480-
unicode::to_lower_ascii(_buffer.data(), _buffer.size());
13481-
bool is_forbidden = unicode::contains_forbidden_domain_code_point(
13482-
_buffer.data(), _buffer.size());
13483-
if (is_forbidden == 0 && _buffer.find("xn-") == std::string_view::npos) {
13514+
13515+
// Often, the input does not contain any forbidden code points, and no upper
13516+
// case ASCII letter, then we can just copy it to the buffer. We want to
13517+
// optimize for such a common case.
13518+
uint8_t is_forbidden_or_upper =
13519+
unicode::contains_forbidden_domain_code_point_or_upper(input.data(),
13520+
input.size());
13521+
// Minor optimization opportunity:
13522+
// contains_forbidden_domain_code_point_or_upper could be extend to check for
13523+
// the presence of characters that cannot appear in the ipv4 address and we
13524+
// could also check whether x and n and - are present, and so we could skip
13525+
// some of the checks below. However, the gains are likely to be small, and
13526+
// the code would be more complex.
13527+
if (is_forbidden_or_upper == 0 &&
13528+
input.find("xn-") == std::string_view::npos) {
1348413529
// fast path
13485-
update_base_hostname(_buffer);
13530+
update_base_hostname(input);
1348613531
if (checkers::is_ipv4(get_hostname())) {
1348713532
ada_log("parse_host fast path ipv4");
1348813533
return parse_ipv4(get_hostname());
1348913534
}
1349013535
ada_log("parse_host fast path ", get_hostname());
1349113536
return true;
13537+
} else if (is_forbidden_or_upper == 2) {
13538+
// We have encountered at least one upper case ASCII letter, let us
13539+
// try to convert it to lower case. If there is no 'xn-' in the result,
13540+
// we can then use a secondary fast path.
13541+
std::string _buffer = std::string(input);
13542+
unicode::to_lower_ascii(_buffer.data(), _buffer.size());
13543+
if (input.find("xn-") == std::string_view::npos) {
13544+
// secondary fast path when input is not all lower case
13545+
update_base_hostname(input);
13546+
if (checkers::is_ipv4(get_hostname())) {
13547+
ada_log("parse_host fast path ipv4");
13548+
return parse_ipv4(get_hostname());
13549+
}
13550+
ada_log("parse_host fast path ", get_hostname());
13551+
return true;
13552+
}
1349213553
}
13554+
// We have encountered at least one forbidden code point or the input contains
13555+
// 'xn-' (case insensitive), so we need to call 'to_ascii' to perform the full
13556+
// conversion.
13557+
1349313558
ada_log("parse_host calling to_ascii");
1349413559
std::optional<std::string> host = std::string(get_hostname());
1349513560
is_valid = ada::unicode::to_ascii(host, input, input.find('%'));

deps/ada/ada.h

+14-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2023-03-30 17:00:48 -0400. Do not edit! */
1+
/* auto-generated on 2023-04-17 12:20:41 -0400. Do not edit! */
22
/* begin file include/ada.h */
33
/**
44
* @file ada.h
@@ -1418,11 +1418,20 @@ ada_really_inline constexpr bool is_forbidden_host_code_point(
14181418
const char c) noexcept;
14191419

14201420
/**
1421-
* Checks if the input is a forbidden domain code point.
1421+
* Checks if the input contains a forbidden domain code point.
14221422
* @see https://url.spec.whatwg.org/#forbidden-domain-code-point
14231423
*/
14241424
ada_really_inline constexpr bool contains_forbidden_domain_code_point(
1425-
char* input, size_t length) noexcept;
1425+
const char* input, size_t length) noexcept;
1426+
1427+
/**
1428+
* Checks if the input contains a forbidden domain code point in which case
1429+
* the first bit is set to 1. If the input contains an upper case ASCII letter,
1430+
* then the second bit is set to 1.
1431+
* @see https://url.spec.whatwg.org/#forbidden-domain-code-point
1432+
*/
1433+
ada_really_inline constexpr bool contains_forbidden_domain_code_point_or_upper(
1434+
const char* input, size_t length) noexcept;
14261435

14271436
/**
14281437
* Checks if the input is a forbidden doamin code point.
@@ -6503,13 +6512,13 @@ inline std::ostream &operator<<(std::ostream &out,
65036512
#ifndef ADA_ADA_VERSION_H
65046513
#define ADA_ADA_VERSION_H
65056514

6506-
#define ADA_VERSION "2.0.0"
6515+
#define ADA_VERSION "2.1.0"
65076516

65086517
namespace ada {
65096518

65106519
enum {
65116520
ADA_VERSION_MAJOR = 2,
6512-
ADA_VERSION_MINOR = 0,
6521+
ADA_VERSION_MINOR = 1,
65136522
ADA_VERSION_REVISION = 0,
65146523
};
65156524

0 commit comments

Comments
 (0)