decoder_spec.js 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. var should = require('should'),
  2. needle = require('./../'),
  3. Q = require('q'),
  4. chardet = require('jschardet'),
  5. helpers = require('./helpers');
  6. describe('character encoding', function() {
  7. var url;
  8. this.timeout(5000);
  9. describe('Given content-type: "text/html; charset=EUC-JP"', function() {
  10. before(function() {
  11. url = 'http://www.nina.jp/server/slackware/webapp/tomcat_charset.html';
  12. })
  13. describe('with decode = false', function() {
  14. it('does not decode', function(done) {
  15. needle.get(url, { decode: false }, function(err, resp) {
  16. resp.body.should.be.a.String;
  17. chardet.detect(resp.body).encoding.should.eql('windows-1252');
  18. resp.body.indexOf('EUCを使う').should.eql(-1);
  19. done();
  20. })
  21. })
  22. })
  23. describe('with decode = true', function() {
  24. it('decodes', function(done) {
  25. needle.get(url, { decode: true }, function(err, resp) {
  26. resp.body.should.be.a.String;
  27. chardet.detect(resp.body).encoding.should.eql('ascii');
  28. resp.body.indexOf('EUCを使う').should.not.eql(-1);
  29. done();
  30. })
  31. })
  32. })
  33. })
  34. describe('Given content-type: "text/html but file is charset: gb2312', function() {
  35. it('encodes to UTF-8', function(done) {
  36. // Our Needle wrapper that requests a chinese website.
  37. var task = Q.nbind(needle.get, needle, 'http://www.chinesetop100.com/');
  38. // Different instantiations of this task
  39. var tasks = [Q.fcall(task, {decode: true}),
  40. Q.fcall(task, {decode: false})];
  41. var results = tasks.map(function(task) {
  42. return task.then(function(obj) {
  43. return obj[0].body;
  44. });
  45. });
  46. // Execute all requests concurrently
  47. Q.all(results).done(function(bodies) {
  48. var charsets = [
  49. chardet.detect(bodies[0]).encoding,
  50. chardet.detect(bodies[1]).encoding,
  51. ]
  52. // We wanted to decode our first stream as specified by options
  53. charsets[0].should.equal('ascii');
  54. bodies[0].indexOf('全球中文网站前二十强').should.not.equal(-1);
  55. // But not our second stream
  56. charsets[1].should.equal('windows-1252');
  57. bodies[1].indexOf('全球中文网站前二十强').should.equal(-1);
  58. done();
  59. });
  60. })
  61. })
  62. describe('Given content-type: "text/html"', function () {
  63. var server,
  64. port = 54321,
  65. text = 'Magyarországi Fióktelepe'
  66. before(function(done) {
  67. server = helpers.server({
  68. port: port,
  69. response: text,
  70. headers: { 'Content-Type': 'text/html' }
  71. }, done);
  72. })
  73. after(function(done) {
  74. server.close(done)
  75. })
  76. describe('with decode = false', function () {
  77. it('decodes by default to utf-8', function (done) {
  78. needle.get('http://localhost:' + port, { decode: false }, function (err, resp) {
  79. resp.body.should.be.a.String;
  80. chardet.detect(resp.body).encoding.should.eql('ISO-8859-2');
  81. resp.body.should.eql('Magyarországi Fióktelepe')
  82. done();
  83. })
  84. })
  85. })
  86. })
  87. })