Add a new res.textConverted() and always use UTF-8 for res.text()

Also uses iconv-lite directly instead of using the "encoding" package.

Fixes #184.
This commit is contained in:
Timothy Gu 2016-10-12 21:47:36 -07:00
parent c7a912cf5b
commit d3b4161d7c
2 changed files with 39 additions and 27 deletions

View File

@ -77,6 +77,16 @@ export default class Body {
return this[CONSUME_BODY]();
}
/**
* Decode response as text, while automatically detecting the encoding and
* trying to decode to UTF-8 (non-spec api)
*
* @return Promise
*/
textConverted() {
return this[CONSUME_BODY]().then(buffer => convertBody(buffer, this.headers));
}
/**
* Decode buffers into utf-8 string
*
@ -96,12 +106,12 @@ export default class Body {
// body is string
if (typeof this.body === 'string') {
return Body.Promise.resolve(convertBody([new Buffer(this.body)], this.headers));
return Body.Promise.resolve(new Buffer(this.body));
}
// body is buffer
if (Buffer.isBuffer(this.body)) {
return Body.Promise.resolve(convertBody([this.body], this.headers));
return Body.Promise.resolve(this.body);
}
// body is stream
@ -147,7 +157,7 @@ export default class Body {
}
clearTimeout(resTimeout);
resolve(convertBody(accum, this.headers));
resolve(Buffer.concat(accum));
});
});
}
@ -158,11 +168,11 @@ export default class Body {
* Detect buffer encoding and convert to target encoding
* ref: http://www.w3.org/TR/2011/WD-html5-20110113/parsing.html#determining-the-character-encoding
*
* @param Array<Buffer> arrayOfBuffers Array of buffers
* @param Buffer buffer Incoming buffer
* @param String encoding Target encoding
* @return String
*/
function convertBody(arrayOfBuffers, headers) {
function convertBody(buffer, headers) {
const ct = headers.get('content-type');
let charset = 'utf-8';
let res, str;
@ -171,22 +181,14 @@ function convertBody(arrayOfBuffers, headers) {
if (ct) {
// skip encoding detection altogether if not html/xml/plain text
if (!/text\/html|text\/plain|\+xml|\/xml/i.test(ct)) {
return Buffer.concat(arrayOfBuffers);
return buffer;
}
res = /charset=([^;]*)/i.exec(ct);
}
// no charset in content type, peek at response body for at most 1024 bytes
if (!res && arrayOfBuffers.length > 0) {
for (let i = 0; i < arrayOfBuffers.length; i++) {
str += arrayOfBuffers[i].toString()
if (str.length > 1024) {
break;
}
}
str = str.substr(0, 1024);
}
str = buffer.slice(0, 1024).toString();
// html5
if (!res && str) {
@ -220,10 +222,10 @@ function convertBody(arrayOfBuffers, headers) {
// turn raw buffers into a single utf-8 buffer
return convert(
Buffer.concat(arrayOfBuffers)
, 'utf-8'
buffer
, 'UTF-8'
, charset
);
).toString();
}
/**

View File

@ -923,11 +923,21 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
});
});
it('should support encoding decode, xml dtd detect', function() {
it('should only use UTF-8 decoding with text()', function() {
url = `${base}encoding/euc-jp`;
return fetch(url).then(res => {
expect(res.status).to.equal(200);
return res.text().then(result => {
expect(result).to.equal('<?xml version="1.0" encoding="EUC-JP"?><title>\ufffd\ufffd\ufffd\u0738\ufffd</title>');
});
});
});
it('should support encoding decode, xml dtd detect', function() {
url = `${base}encoding/euc-jp`;
return fetch(url).then(res => {
expect(res.status).to.equal(200);
return res.textConverted().then(result => {
expect(result).to.equal('<?xml version="1.0" encoding="EUC-JP"?><title>日本語</title>');
});
});
@ -937,7 +947,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
url = `${base}encoding/shift-jis`;
return fetch(url).then(res => {
expect(res.status).to.equal(200);
return res.text().then(result => {
return res.textConverted().then(result => {
expect(result).to.equal('<div>日本語</div>');
});
});
@ -947,7 +957,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
url = `${base}encoding/gbk`;
return fetch(url).then(res => {
expect(res.status).to.equal(200);
return res.text().then(result => {
return res.textConverted().then(result => {
expect(result).to.equal('<meta charset="gbk"><div>中文</div>');
});
});
@ -957,7 +967,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
url = `${base}encoding/gb2312`;
return fetch(url).then(res => {
expect(res.status).to.equal(200);
return res.text().then(result => {
return res.textConverted().then(result => {
expect(result).to.equal('<meta http-equiv="Content-Type" content="text/html; charset=gb2312"><div>中文</div>');
});
});
@ -968,7 +978,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
return fetch(url).then(res => {
expect(res.status).to.equal(200);
expect(res.headers.get('content-type')).to.be.null;
return res.text().then(result => {
return res.textConverted().then(result => {
expect(result).to.equal('中文');
});
});
@ -978,7 +988,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
url = `${base}encoding/order1`;
return fetch(url).then(res => {
expect(res.status).to.equal(200);
return res.text().then(result => {
return res.textConverted().then(result => {
expect(result).to.equal('中文');
});
});
@ -988,7 +998,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
url = `${base}encoding/order2`;
return fetch(url).then(res => {
expect(res.status).to.equal(200);
return res.text().then(result => {
return res.textConverted().then(result => {
expect(result).to.equal('中文');
});
});
@ -999,7 +1009,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
return fetch(url).then(res => {
expect(res.status).to.equal(200);
const padding = 'a'.repeat(10);
return res.text().then(result => {
return res.textConverted().then(result => {
expect(result).to.equal(`${padding}<meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS" /><div>日本語</div>`);
});
});
@ -1010,7 +1020,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
return fetch(url).then(res => {
expect(res.status).to.equal(200);
const padding = 'a'.repeat(1200);
return res.text().then(result => {
return res.textConverted().then(result => {
expect(result).to.not.equal(`${padding}中文`);
});
});