Skip to content

Commit 14c636c

Browse files
committed
buffer: use a branchless loop for atob
1 parent 527e435 commit 14c636c

File tree

2 files changed

+77
-35
lines changed

2 files changed

+77
-35
lines changed

lib/buffer.js

Lines changed: 66 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,9 +1246,9 @@ if (internalBinding('config').hasIntl) {
12461246
}
12471247

12481248
function btoa(input) {
1249-
// The implementation here has not been performance optimized in any way and
1250-
// should not be.
1251-
// Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932
1249+
// The implementation here has been slightly performance optimized,
1250+
// but still not nearly as much as it should be.
1251+
// Refs: https://github.com/nodejs/node/pull/51670
12521252
if (arguments.length === 0) {
12531253
throw new ERR_MISSING_ARGS('input');
12541254
}
@@ -1267,63 +1267,94 @@ function btoa(input) {
12671267
// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode
12681268
// https://infra.spec.whatwg.org/#ascii-whitespace
12691269
// Valid Characters: [\t\n\f\r +/0-9=A-Za-z]
1270-
// Lookup table (-1 = invalid, 0 = valid)
1270+
// Lookup table (-1 = invalid, 0 = whitespace, 1 = non-whitespace)
1271+
// Note that `=` is set to `-1` as it is handled elsewhere.
12711272
/* eslint-disable no-multi-spaces, indent */
12721273
const kForgivingBase64AllowedChars = [
12731274
-1, -1, -1, -1, -1, -1, -1, -1,
12741275
-1, 0, 0, -1, 0, 0, -1, -1,
12751276
-1, -1, -1, -1, -1, -1, -1, -1,
12761277
-1, -1, -1, -1, -1, -1, -1, -1,
12771278
0, -1, -1, -1, -1, -1, -1, -1,
1278-
-1, -1, -1, 0, -1, -1, -1, 0,
1279-
0, 0, 0, 0, 0, 0, 0, 0,
1280-
0, 0, -1, -1, -1, 0, -1, -1,
1281-
-1, 0, 0, 0, 0, 0, 0, 0,
1282-
0, 0, 0, 0, 0, 0, 0, 0,
1283-
0, 0, 0, 0, 0, 0, 0, 0,
1284-
0, 0, 0, -1, -1, -1, -1, -1,
1285-
-1, 0, 0, 0, 0, 0, 0, 0,
1286-
0, 0, 0, 0, 0, 0, 0, 0,
1287-
0, 0, 0, 0, 0, 0, 0, 0,
1288-
0, 0, 0, -1, -1, -1, -1, -1,
1279+
-1, -1, -1, 1, -1, -1, -1, 1,
1280+
1, 1, 1, 1, 1, 1, 1, 1,
1281+
1, 1, -1, -1, -1, -1, -1, -1,
1282+
-1, 1, 1, 1, 1, 1, 1, 1,
1283+
1, 1, 1, 1, 1, 1, 1, 1,
1284+
1, 1, 1, 1, 1, 1, 1, 1,
1285+
1, 1, 1, -1, -1, -1, -1, -1,
1286+
-1, 1, 1, 1, 1, 1, 1, 1,
1287+
1, 1, 1, 1, 1, 1, 1, 1,
1288+
1, 1, 1, 1, 1, 1, 1, 1,
1289+
1, 1, 1, -1, -1, -1, -1, -1,
12891290
];
12901291
/* eslint-enable no-multi-spaces, indent */
12911292

12921293
function atob(input) {
1293-
// The implementation here has not been performance optimized in any way and
1294-
// should not be.
1295-
// Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932
1294+
// The implementation here has been slightly performance optimized,
1295+
// but still not nearly as much as it should be.
1296+
// Refs: https://github.com/nodejs/node/pull/51670
12961297
if (arguments.length === 0) {
12971298
throw new ERR_MISSING_ARGS('input');
12981299
}
12991300

13001301
input = `${input}`;
1302+
13011303
let nonAsciiWhitespaceCharCount = 0;
13021304
let equalCharCount = 0;
1305+
let length = input.length;
1306+
1307+
// We use an accumulator to track errors. If, at the end,
1308+
// any high bits are set in `acc`, an invalid character has
1309+
// been parsed.
1310+
//
1311+
// This works because invalid base64 characters in the lookup
1312+
// table are `-1` and any non-ascii character will be greater
1313+
// than 0x7f.
1314+
let acc = 0;
13031315

1304-
for (let n = 0; n < input.length; n++) {
1305-
const ch = StringPrototypeCharCodeAt(input, n);
1306-
const val = kForgivingBase64AllowedChars[ch & 0x7f];
1316+
// Right-trim whitespace and equal signs.
1317+
while (length > 0) {
1318+
const ch = StringPrototypeCharCodeAt(input, length - 1);
13071319

1308-
if ((ch | val) & ~0x7f) {
1309-
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1320+
// Possibly-valid whitespace.
1321+
if (ch <= 0x20) {
1322+
acc |= kForgivingBase64AllowedChars[ch];
1323+
length--;
1324+
continue;
13101325
}
13111326

1312-
if (ch > 0x20) {
1327+
// Equals sign.
1328+
if (ch === 0x3d) {
13131329
nonAsciiWhitespaceCharCount++;
1330+
equalCharCount++;
1331+
length--;
1332+
continue;
1333+
}
13141334

1315-
if (ch === 0x3d) {
1316-
equalCharCount++;
1317-
} else if (equalCharCount) {
1318-
// The `=` char is only allowed at the end.
1319-
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1320-
}
1335+
break;
1336+
}
13211337

1322-
if (equalCharCount > 2) {
1323-
// Only one more `=` is permitted after the first equal sign.
1324-
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1325-
}
1326-
}
1338+
// Parse optimistically. Check for errors after.
1339+
// Equal signs are considered errors at this point (value = -1).
1340+
for (let n = 0; n < length; n++) {
1341+
const ch = StringPrototypeCharCodeAt(input, n);
1342+
const value = kForgivingBase64AllowedChars[ch & 0x7f];
1343+
1344+
acc |= ch | value;
1345+
1346+
// Valid non-whitespace has a value of `1`.
1347+
nonAsciiWhitespaceCharCount += value;
1348+
}
1349+
1350+
if (acc & ~0x7f) {
1351+
// We parsed an invalid character at some point in one of the loops.
1352+
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
1353+
}
1354+
1355+
if (equalCharCount > 2) {
1356+
// Only two equal signs are permitted.
1357+
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
13271358
}
13281359

13291360
let reminder = nonAsciiWhitespaceCharCount % 4;

test/parallel/test-btoa-atob.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,17 @@ for (let i = 0; i <= 0xffff; i++) {
6666
throws(() => atob(ch + 'aaa'), invalidChar);
6767
}
6868

69+
strictEqual(atob('YQ'), 'a');
70+
strictEqual(atob('YQ '), 'a');
71+
strictEqual(atob('Y Q'), 'a');
72+
strictEqual(atob('Y Q\t'), 'a');
73+
strictEqual(atob('Y Q=\t= '), 'a');
74+
strictEqual(atob('Y Q = = '), 'a');
75+
throws(() => atob('YQ='), invalidChar);
76+
throws(() => atob('YQ==='), invalidChar);
77+
throws(() => atob('Y=Q'), invalidChar);
78+
throws(() => atob('YQ\v'), invalidChar);
79+
6980
throws(() => btoa('abcd\ufeffx'), invalidChar);
7081

7182
const charset =

0 commit comments

Comments
 (0)