| 1 | | /*! |
| 2 | | * urlexpand - index.js |
| 3 | | * Copyright(c) 2012 fengmk2 <fengmk2@gmail.com> |
| 4 | | * MIT Licensed |
| 5 | | */ |
| 6 | | |
| 7 | 1 | "use strict"; |
| 8 | | |
| 9 | | /** |
| 10 | | * Module dependencies. |
| 11 | | */ |
| 12 | | |
| 13 | 1 | var http = require('http'); |
| 14 | 1 | var https = require('https'); |
| 15 | 1 | var urlutil = require('url'); |
| 16 | 1 | var charset = require('charset'); |
| 17 | 1 | var iconv = require('iconv-lite'); |
| 18 | | |
| 19 | | |
| 20 | 1 | function handleCallback(err, url, callback) { |
| 21 | 11 | if (callback.__called) { |
| 22 | 0 | return; |
| 23 | | } |
| 24 | 11 | callback.__called = true; |
| 25 | 11 | callback(err, { |
| 26 | | url: url, |
| 27 | | title: callback.__title, |
| 28 | | count: callback.__redirectCounter, |
| 29 | | tracks: callback.__tracks, |
| 30 | | }); |
| 31 | | } |
| 32 | | |
| 33 | 1 | var TITLE_RE = /<title>([^<]+)</i; |
| 34 | | |
| 35 | 1 | function getTitle(data, cs) { |
| 36 | 6 | cs = iconv.encodings[cs] ? cs : 'utf8'; |
| 37 | 6 | var text = iconv.decode(data, cs); |
| 38 | 6 | var m = TITLE_RE.exec(text); |
| 39 | 6 | return m ? m[1].trim() : null; |
| 40 | | } |
| 41 | | |
| 42 | | /** |
| 43 | | * Expand a shorten url, return the original url and the redirect histories. |
| 44 | | * |
| 45 | | * @param {String} url, the url you want to expand. |
| 46 | | * @param {Object} [options] |
| 47 | | * - {Number} [redirects], max redirect times, default is `5`. |
| 48 | | * - {Boolean} [title], get title or not, default is `true`. |
| 49 | | * - {Number} [timeout], request timeout, default is `10000` ms. |
| 50 | | * @param {Function(err, data)} callback |
| 51 | | * - {Object} data { |
| 52 | | * {String} url: the last status 200 url. |
| 53 | | * {String} title: the last status 200 html page title, maybe empty. |
| 54 | | * {Number} count: need redirect times. |
| 55 | | * {Array} tracks: the handle tracks. `[{ url: $url, headers: $headers, statusCode: 301 }, ... ]` |
| 56 | | * } |
| 57 | | */ |
| 58 | 1 | function expand(url, options, callback) { |
| 59 | 18 | if (typeof options === 'function') { |
| 60 | 8 | callback = options; |
| 61 | 8 | options = null; |
| 62 | | } |
| 63 | 18 | options = options || {}; |
| 64 | 18 | options.redirects = options.redirects || 5; |
| 65 | 18 | if (options.title === undefined) { |
| 66 | 10 | options.title = true; |
| 67 | | } |
| 68 | 18 | options.timeout = options.timeout || 10000; |
| 69 | 18 | var info = urlutil.parse(url || ''); |
| 70 | 18 | if (!info.hostname) { |
| 71 | 0 | return callback(); |
| 72 | | } |
| 73 | 18 | var reqOptions = { |
| 74 | | hostname: info.hostname, |
| 75 | | path: info.path, |
| 76 | | method: 'GET' |
| 77 | | }; |
| 78 | 18 | if (info.port) { |
| 79 | 5 | reqOptions.port = info.port; |
| 80 | | } |
| 81 | 18 | if (callback.__redirectCounter === undefined) { |
| 82 | 11 | callback.__redirectCounter = 0; |
| 83 | 11 | callback.__tracks = []; |
| 84 | | } |
| 85 | 18 | var request = http.request; |
| 86 | 18 | if (info.protocol === 'https:') { |
| 87 | 2 | request = https.request; |
| 88 | | } |
| 89 | 18 | var req = request(reqOptions); |
| 90 | 18 | var timer = null; |
| 91 | 18 | req.on('response', function (res) { |
| 92 | 15 | callback.__tracks.push({ |
| 93 | | url: url, |
| 94 | | headers: res.headers, |
| 95 | | statusCode: res.statusCode |
| 96 | | }); |
| 97 | 15 | if (res.statusCode === 302 || res.statusCode === 301) { |
| 98 | 8 | clearTimeout(timer); |
| 99 | 8 | callback.__redirectCounter++; |
| 100 | 8 | var location = urlutil.resolve(url, res.headers.location); |
| 101 | 8 | if (callback.__redirectCounter > options.redirects) { |
| 102 | 1 | return handleCallback(null, location, callback); |
| 103 | | } |
| 104 | 7 | return expand(location, options, callback); |
| 105 | | } |
| 106 | | |
| 107 | 7 | if (!options.title) { |
| 108 | 1 | clearTimeout(timer); |
| 109 | 1 | res.destroy(); |
| 110 | 1 | return handleCallback(null, url, callback); |
| 111 | | } |
| 112 | | |
| 113 | | // get the title |
| 114 | 6 | var buffers = []; |
| 115 | 6 | var size = 0; |
| 116 | 6 | res.on('data', function (chunk) { |
| 117 | 224 | buffers.push(chunk); |
| 118 | 224 | size += chunk.length; |
| 119 | | }); |
| 120 | 6 | res.on('end', function () { |
| 121 | 6 | clearTimeout(timer); |
| 122 | 6 | var data = Buffer.concat(buffers, size); |
| 123 | 6 | var cs = charset(res.headers, data) || 'utf8'; |
| 124 | 6 | var title = getTitle(data, cs); |
| 125 | 6 | callback.__title = title; |
| 126 | 6 | handleCallback(null, url, callback); |
| 127 | | }); |
| 128 | | }); |
| 129 | 18 | req.on('error', function (err) { |
| 130 | 3 | callback.__tracks.push({ |
| 131 | | url: url, |
| 132 | | error: req.isTimeout ? 'request timeout' : err.message |
| 133 | | }); |
| 134 | 3 | handleCallback(err, url, callback); |
| 135 | | }); |
| 136 | 18 | req.end(); |
| 137 | 18 | timer = setTimeout(function () { |
| 138 | 1 | req.isTimeout = true; |
| 139 | 1 | req.abort(); |
| 140 | | }, options.timeout); |
| 141 | | } |
| 142 | | |
| 143 | 1 | module.exports = expand; |
| 144 | | |
| 145 | | /** |
| 146 | | * Let Buffer support concat. node < 0.8 |
| 147 | | * |
| 148 | | * https://github.com/joyent/node/blob/master/lib/buffer.js#L504 |
| 149 | | */ |
| 150 | | |
| 151 | 1 | if (!Buffer.concat) { |
| 152 | 1 | Buffer.concat = function (list, length) { |
| 153 | 6 | if (!Array.isArray(list)) { |
| 154 | 0 | throw new Error('Usage: Buffer.concat(list, [length])'); |
| 155 | | } |
| 156 | | |
| 157 | 6 | if (list.length === 0) { |
| 158 | 0 | return new Buffer(0); |
| 159 | 6 | } else if (list.length === 1) { |
| 160 | 1 | return list[0]; |
| 161 | | } |
| 162 | | |
| 163 | 5 | if (typeof length !== 'number') { |
| 164 | 0 | length = 0; |
| 165 | 0 | for (var i = 0; i < list.length; i++) { |
| 166 | 0 | var buf = list[i]; |
| 167 | 0 | length += buf.length; |
| 168 | | } |
| 169 | | } |
| 170 | | |
| 171 | 5 | var buffer = new Buffer(length); |
| 172 | 5 | var pos = 0; |
| 173 | 5 | for (var i = 0; i < list.length; i++) { |
| 174 | 223 | var buf = list[i]; |
| 175 | 223 | buf.copy(buffer, pos); |
| 176 | 223 | pos += buf.length; |
| 177 | | } |
| 178 | 5 | return buffer; |
| 179 | | }; |
| 180 | | } |