Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue with non-ASCII characters in Buffer and Stream attachments #255

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions features/attachments.feature
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,13 @@ Feature: Attachments
"""
var hooks = function () {
this.Before(function(scenario, callback) {
scenario.attach(new Buffer([100, 97, 116, 97]), 'image/png');
var data = [];

for (var i = 0; i < 256; i++) {
data.push(i);
}

scenario.attach(new Buffer(data), 'image/png');
callback();
});
};
Expand Down Expand Up @@ -57,7 +63,7 @@ Feature: Attachments
"embeddings": [
{
"mime_type": "image/png",
"data": "ZGF0YQ=="
"data": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/w=="
}
]
},
Expand Down Expand Up @@ -98,22 +104,33 @@ Feature: Attachments
var hooks = function () {
this.Before(function(scenario, callback) {
var Stream = require('stream');
var versionParts = /v(\d+)\.(\d+)\.(\d+)/.exec(process.version);
var major = parseInt(versionParts[0], 10);
var minor = parseInt(versionParts[1], 10);
var versionParts = process.version.match(/v(\d+)\.(\d+)\.(\d+)/);
var major = parseInt(versionParts[1], 10);
var minor = parseInt(versionParts[2], 10);
var data1 = [];
var data2 = [];

for (var i = 0; i < 128; i++) {
data1.push(i);
}

for (var i = 128; i < 256; i++) {
data2.push(i);
}

if (major > 0 || minor >= 10) {
var stream = new Stream.Readable();
stream._read = function() {};
stream.push(new Buffer([100, 97, 116, 97]));
stream.push(new Buffer(data1));
stream.push(new Buffer(data2));
stream.push(null);

scenario.attach(stream, 'image/png', function(error) {
callback(error);
});
}
else {
scenario.attach(new Buffer([100, 97, 116, 97]), 'image/png');
scenario.attach(new Buffer([].concat(data1, data2)), 'image/png');
callback();
}
});
Expand Down Expand Up @@ -152,7 +169,7 @@ Feature: Attachments
"embeddings": [
{
"mime_type": "image/png",
"data": "ZGF0YQ=="
"data": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/w=="
}
]
},
Expand Down
14 changes: 6 additions & 8 deletions lib/cucumber/api/scenario.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ var Scenario = function (astTreeWalker, astScenario) {

data.on('data', function(chunk) {
buffers.push(chunk);
})
});
data.on('end', function() {
astTreeWalker.attach(Buffer.concat(buffers).toString(), mimeType);
astTreeWalker.attach(Buffer.concat(buffers).toString('binary'), mimeType);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assumes that the buffer is always going to be binary, Is this a safe assumption?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've just pushed a commit that adds some additional tests around this. The commit also fixes a bug I just found with attaching strings that contain non-ASCII characters.

Best I can tell from http://nodejs.org/api/buffer.html Buffer is only designed to handle arrays of octals (bytes); just search for the word octet on that page to see what I mean. Which would mean it is safe to assume that a Buffer only ever contains binary. What sort of binary it contains (e.g. raw binary or a UTF-8 encoded string) depends on what constructor if Buffer you use when creating the Buffer you pass into Scenario.attach().

Looking at the encode64s function in https://github.com/cucumber/gherkin/blob/master/js/lib/gherkin/formatter/json_formatter.js it assumes that the string you pass as an attachment to the JSONFormatter only contains 8-bit characters; basically it can only base64 encode binary. Which makes sense as you can only base64 encode binary.

Cucumber only seems to store a MIME type (aka media type) with attachments/embeddings. It doesn't store the character encoding. Which means if you attach text, you have no idea from the output JSON what encoding the text is in. cucumber/cucumber-jvm#501 seems to be related.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @samccone. Did that answer your question?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@simondean Reading the docs here I see a note below that means 'binary' encoding will be removed in future versions of Node. Is it safe to use it here?

" 'binary' - A way of encoding raw binary data into strings by using only the first 8 bits of each character. This encoding method is deprecated and should be avoided in favor of Buffer objects where possible. This encoding will be removed in future versions of Node."

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you thing about using the hex encoding since the binary encoding appears to be deprecated?


callback();
});
Expand All @@ -34,18 +34,16 @@ var Scenario = function (astTreeWalker, astScenario) {
if (!mimeType)
throw Error(Scenario.ATTACH_MISSING_MIME_TYPE_ARGUMENT);

astTreeWalker.attach(data.toString(), mimeType);
astTreeWalker.attach(data.toString('binary'), mimeType);

if (callback) {
if (callback)
callback();
}
}
else {
if (!mimeType) {
if (!mimeType)
mimeType = Scenario.DEFAULT_TEXT_MIME_TYPE;
}

astTreeWalker.attach(data.toString(), mimeType);
astTreeWalker.attach(new Buffer(data.toString(), 'utf8').toString('binary'), mimeType);
}
}
};
Expand Down
97 changes: 95 additions & 2 deletions spec/cucumber/api/scenario_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ describe("Cucumber.Api.Scenario", function() {
spyOnStub(astTreeWalker, "attach");

scenario.attach(stream, mimeType, callback);
})
});

it("does not call back straight away", function() {
expect(callback).not.toHaveBeenCalled();
Expand Down Expand Up @@ -164,6 +164,33 @@ describe("Cucumber.Api.Scenario", function() {
expect(callback).toHaveBeenCalled();
});
});

describe("when the stream finishes providing data and the data contains non-ASCII characters", function() {
var data1, data2, text;

beforeEach(function() {
data1 = [];
data2 = [];

for (var i = 0; i < 256; i++) {
data1.push(i);
data2.push(i);
}

dataListener(new Buffer(data1));
dataListener(new Buffer(data2));
text = String.fromCharCode.apply(null, [].concat(data1, data2));
endListener();
});

it("instructs the ast tree walker to create an attachment containing the contents of the stream", function() {
expect(astTreeWalker.attach).toHaveBeenCalledWith(text, mimeType);
});

it("calls back", function() {
expect(callback).toHaveBeenCalled();
});
});
})
});
}
Expand All @@ -173,7 +200,7 @@ describe("Cucumber.Api.Scenario", function() {

beforeEach(function() {
buffer = new Buffer("data");
})
});

it("throws an exception when the mimeType argument is missing", function() {
expect(function() { scenario.attach(buffer); }).toThrow(new Error("Cucumber.Api.Scenario.attach() expects a mimeType"));
Expand Down Expand Up @@ -203,6 +230,47 @@ describe("Cucumber.Api.Scenario", function() {
expect(callback).not.toHaveBeenCalled();
});
});

describe("when the buffer contains an array of bytes", function() {
var data, text, buffer;

beforeEach(function() {
data = [];

for (var i = 0; i < 256; i++) {
data.push(i);
}

text = String.fromCharCode.apply(null, data);
buffer = new Buffer(data);
});

it("instructs the ast tree walker to create an attachment containing the contents of the buffer", function() {
scenario.attach(buffer, mimeType);
expect(astTreeWalker.attach).toHaveBeenCalledWith(text, mimeType);
});
});

describe("when the buffer contains a UTF-8 encoded string", function() {
var data, text, buffer, utf8EncodedText;

beforeEach(function() {
data = [];

for (var i = 0; i < 512; i++) {
data.push(i);
}

text = String.fromCharCode.apply(null, data);
buffer = new Buffer(text, 'utf8');
utf8EncodedText = new Buffer(text, 'utf8').toString('binary');
});

it("instructs the ast tree walker to create an attachment containing the contents of the buffer", function() {
scenario.attach(buffer, mimeType);
expect(astTreeWalker.attach).toHaveBeenCalledWith(utf8EncodedText, mimeType);
});
});
});

describe("when the data is a string", function() {
Expand All @@ -221,6 +289,31 @@ describe("Cucumber.Api.Scenario", function() {
scenario.attach(data);
expect(astTreeWalker.attach).toHaveBeenCalledWith(data, "text/plain");
});

describe("when the string is a UTF-8 encoded string", function() {
var data, text, utf8EncodedText;

beforeEach(function() {
data = [];

for (var i = 0; i < 512; i++) {
data.push(i);
}

text = String.fromCharCode.apply(null, data);
utf8EncodedText = new Buffer(text, 'utf8').toString('binary');
});

it("instructs the ast tree walker to create an attachment containing the string", function() {
scenario.attach(text, mimeType);
expect(astTreeWalker.attach).toHaveBeenCalledWith(utf8EncodedText, mimeType);
});

it("defaults to the plain text mime type when the mimeType argument is missing", function() {
scenario.attach(text);
expect(astTreeWalker.attach).toHaveBeenCalledWith(utf8EncodedText, "text/plain");
});
});
});
});
});