Add a new res.textConverted() and always use UTF-8 for res.text()

Also uses iconv-lite directly instead of using the "encoding" package. Fixes #184.
2016-10-12 21:47:36 -07:00 · 2016-10-12 21:47:36 -07:00 · d3b4161d7c
parent c7a912cf5b
commit d3b4161d7c
2 changed files with 39 additions and 27 deletions
--- a/src/body.js
+++ b/src/body.js
@ -77,6 +77,16 @@ export default class Body {
 		return this[CONSUME_BODY]();
 	}

+	/**
+	 * Decode response as text, while automatically detecting the encoding and
+	 * trying to decode to UTF-8 (non-spec api)
+	 *
+	 * @return  Promise
+	 */
+	textConverted() {
+		return this[CONSUME_BODY]().then(buffer => convertBody(buffer, this.headers));
+	}
+
 	/**
 	 * Decode buffers into utf-8 string
 	 *
@ -96,12 +106,12 @@ export default class Body {

 		// body is string
 		if (typeof this.body === 'string') {
-			return Body.Promise.resolve(convertBody([new Buffer(this.body)], this.headers));
+			return Body.Promise.resolve(new Buffer(this.body));
 		}

 		// body is buffer
 		if (Buffer.isBuffer(this.body)) {
-			return Body.Promise.resolve(convertBody([this.body], this.headers));
+			return Body.Promise.resolve(this.body);
 		}

 		// body is stream
@ -147,7 +157,7 @@ export default class Body {
 				}

 				clearTimeout(resTimeout);
-				resolve(convertBody(accum, this.headers));
+				resolve(Buffer.concat(accum));
 			});
 		});
 	}
@ -158,11 +168,11 @@ export default class Body {
 * Detect buffer encoding and convert to target encoding
 * ref: http://www.w3.org/TR/2011/WD-html5-20110113/parsing.html#determining-the-character-encoding
 *
- * @param   Array<Buffer>   arrayOfBuffers  Array of buffers
+ * @param   Buffer  buffer    Incoming buffer
 * @param   String  encoding  Target encoding
 * @return  String
 */
-function convertBody(arrayOfBuffers, headers) {
+function convertBody(buffer, headers) {
 	const ct = headers.get('content-type');
 	let charset = 'utf-8';
 	let res, str;
@ -171,22 +181,14 @@ function convertBody(arrayOfBuffers, headers) {
 	if (ct) {
 		// skip encoding detection altogether if not html/xml/plain text
 		if (!/text\/html|text\/plain|\+xml|\/xml/i.test(ct)) {
-			return Buffer.concat(arrayOfBuffers);
+			return buffer;
 		}

 		res = /charset=([^;]*)/i.exec(ct);
 	}

 	// no charset in content type, peek at response body for at most 1024 bytes
-	if (!res && arrayOfBuffers.length > 0) {
-		for (let i = 0; i < arrayOfBuffers.length; i++) {
-			str += arrayOfBuffers[i].toString()
-			if (str.length > 1024) {
-				break;
-			}
-		}
-		str = str.substr(0, 1024);
-	}
+	str = buffer.slice(0, 1024).toString();

 	// html5
 	if (!res && str) {
@ -220,10 +222,10 @@ function convertBody(arrayOfBuffers, headers) {

 	// turn raw buffers into a single utf-8 buffer
 	return convert(
-		Buffer.concat(arrayOfBuffers)
-		, 'utf-8'
+		buffer
+		, 'UTF-8'
 		, charset
-	);
+	).toString();
 }

 /**
--- a/test/test.js
+++ b/test/test.js
@ -923,11 +923,21 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
 		});
 	});

-	it('should support encoding decode, xml dtd detect', function() {
+	it('should only use UTF-8 decoding with text()', function() {
 		url = `${base}encoding/euc-jp`;
 		return fetch(url).then(res => {
 			expect(res.status).to.equal(200);
 			return res.text().then(result => {
+				expect(result).to.equal('<?xml version="1.0" encoding="EUC-JP"?><title>\ufffd\ufffd\ufffd\u0738\ufffd</title>');
+			});
+		});
+	});
+
+	it('should support encoding decode, xml dtd detect', function() {
+		url = `${base}encoding/euc-jp`;
+		return fetch(url).then(res => {
+			expect(res.status).to.equal(200);
+			return res.textConverted().then(result => {
 				expect(result).to.equal('<?xml version="1.0" encoding="EUC-JP"?><title>日本語</title>');
 			});
 		});
@ -937,7 +947,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
 		url = `${base}encoding/shift-jis`;
 		return fetch(url).then(res => {
 			expect(res.status).to.equal(200);
-			return res.text().then(result => {
+			return res.textConverted().then(result => {
 				expect(result).to.equal('<div>日本語</div>');
 			});
 		});
@ -947,7 +957,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
 		url = `${base}encoding/gbk`;
 		return fetch(url).then(res => {
 			expect(res.status).to.equal(200);
-			return res.text().then(result => {
+			return res.textConverted().then(result => {
 				expect(result).to.equal('<meta charset="gbk"><div>中文</div>');
 			});
 		});
@ -957,7 +967,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
 		url = `${base}encoding/gb2312`;
 		return fetch(url).then(res => {
 			expect(res.status).to.equal(200);
-			return res.text().then(result => {
+			return res.textConverted().then(result => {
 				expect(result).to.equal('<meta http-equiv="Content-Type" content="text/html; charset=gb2312"><div>中文</div>');
 			});
 		});
@ -968,7 +978,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
 		return fetch(url).then(res => {
 			expect(res.status).to.equal(200);
 			expect(res.headers.get('content-type')).to.be.null;
-			return res.text().then(result => {
+			return res.textConverted().then(result => {
 				expect(result).to.equal('中文');
 			});
 		});
@ -978,7 +988,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
 		url = `${base}encoding/order1`;
 		return fetch(url).then(res => {
 			expect(res.status).to.equal(200);
-			return res.text().then(result => {
+			return res.textConverted().then(result => {
 				expect(result).to.equal('中文');
 			});
 		});
@ -988,7 +998,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
 		url = `${base}encoding/order2`;
 		return fetch(url).then(res => {
 			expect(res.status).to.equal(200);
-			return res.text().then(result => {
+			return res.textConverted().then(result => {
 				expect(result).to.equal('中文');
 			});
 		});
@ -999,7 +1009,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
 		return fetch(url).then(res => {
 			expect(res.status).to.equal(200);
 			const padding = 'a'.repeat(10);
-			return res.text().then(result => {
+			return res.textConverted().then(result => {
 				expect(result).to.equal(`${padding}<meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS" /><div>日本語</div>`);
 			});
 		});
@ -1010,7 +1020,7 @@ describe(`node-fetch with FOLLOW_SPEC = ${defaultFollowSpec}`, () => {
 		return fetch(url).then(res => {
 			expect(res.status).to.equal(200);
 			const padding = 'a'.repeat(1200);
-			return res.text().then(result => {
+			return res.textConverted().then(result => {
 				expect(result).to.not.equal(`${padding}中文`);
 			});
 		});