Skip to content

Commit ce350aa

Browse files
committed
First commit
0 parents  commit ce350aa

40 files changed

+3552
-0
lines changed

LICENSE

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Copyright (c) 2010 Chris O'Hara <cohara87@gmail.com>
2+
3+
Permission is hereby granted, free of charge, to any person obtaining
4+
a copy of this software and associated documentation files (the
5+
"Software"), to deal in the Software without restriction, including
6+
without limitation the rights to use, copy, modify, merge, publish,
7+
distribute, sublicense, and/or sell copies of the Software, and to
8+
permit persons to whom the Software is furnished to do so, subject to
9+
the following conditions:
10+
11+
The above copyright notice and this permission notice shall be
12+
included in all copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*This is a _major_ work in progess.. check back soon!*

bin/node.io

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/usr/bin/env node
2+
3+
require('node.io').cli(process.argv.slice(2));

examples/duplicates.js

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// This module can find/remove duplicates in a list
2+
//
3+
// 1. To remove duplicates from a list and output unique lines:
4+
// $ cat list.txt | node.io duplicates
5+
//
6+
// 2. To output lines that appear more than once:
7+
// $ cat list.txt | node.io duplicates find
8+
//
9+
// To output the results to a file, use either:
10+
// $ cat list.txt | node.io -s duplicates > unique.txt
11+
// $ node.io -i list.txt -o unique.txt duplicates
12+
13+
var Job = require('../lib/node.io/job').Job;
14+
15+
var seen_lines = [], emitted_lines = [];
16+
17+
function reduce(lines) {
18+
var args = this.options.args, emit = [];
19+
20+
lines.forEach(function(line) {
21+
if (args === 'find') {
22+
23+
//Output duplicate lines
24+
if (seen_lines.indexOf(line) >= 0 && !~emitted_lines.indexOf(line)) {
25+
emit.push(line);
26+
emitted_lines.push(line); //Only output once
27+
} else {
28+
seen_lines.push(line);
29+
}
30+
31+
} else {
32+
33+
//Remove duplicate lines (default)
34+
if (!~seen_lines.indexOf(line)) {
35+
emit.push(line);
36+
seen_lines.push(line);
37+
}
38+
39+
}
40+
});
41+
42+
this.emit(emit);
43+
}
44+
45+
exports.job = new Job({max:20},{reduce:reduce});

examples/google_pagerank.js

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// This module checks a domain's Google pagerank (rate limits obviously apply)
2+
//
3+
// 1. To find the rank of a domain for a given keyword:
4+
// $ echo "mastercard.com" | node.io -s google_pagerank
5+
// => mastercard.com,7
6+
7+
var Job = require('../').Job;
8+
9+
exports.job = new Job({timeout:10, retries:3}, {
10+
11+
run: function google(input) {
12+
var self = this;
13+
14+
var url = input;
15+
if (!~url.indexOf('http://')) url = 'http://'+url;
16+
17+
var ch = '6'+GoogleCH(strord('info:'+url));
18+
19+
this.get('http://www.google.com/search?client=navclient-auto&ch='+ch+'&features=Rank&q=info:'+encodeURIComponent(url), function(err, data) {
20+
if (err) self.retry();
21+
22+
if (!~data.indexOf('Rank_1:1:')) {
23+
self.emit(input+',');
24+
} else {
25+
self.emit(input+','+data.substr(9));
26+
}
27+
});
28+
},
29+
30+
fail: function(input) {
31+
this.emit(input+',');
32+
}
33+
34+
});
35+
36+
function zF(a,b) {
37+
var z = parseInt(80000000,16);
38+
if (z & a) {
39+
a = a>>1;
40+
a &=~z;
41+
a |= 0x40000000;
42+
a = a>>(b-1);
43+
} else {
44+
a = a>>b;
45+
}
46+
return(a);
47+
}
48+
49+
function mix(a,b,c) {
50+
a-=b; a-=c; a^=(zF(c,13));
51+
b-=c; b-=a; b^=(a<<8);
52+
c-=a; c-=b; c^=(zF(b,13));
53+
a-=b; a-=c; a^=(zF(c,12));
54+
b-=c; b-=a; b^=(a<<16);
55+
c-=a; c-=b; c^=(zF(b,5));
56+
a-=b; a-=c; a^=(zF(c,3));
57+
b-=c; b-=a; b^=(a<<10);
58+
c-=a; c-=b; c^=(zF(b,15));
59+
return (new Array((a),(b),(c)));
60+
}
61+
function GoogleCH(url,length) {
62+
if(arguments.length == 1) length=url.length;
63+
var a=0x9E3779B9, b=0x9E3779B9, c=0xE6359A60, k=0, len=length, mx=new Array();
64+
while(len>=12) {
65+
a+=(url[k+0]+(url[k+1]<<8)+(url[k+2]<<16)+(url[k+3]<<24));
66+
b+=(url[k+4]+(url[k+5]<<8)+(url[k+6]<<16)+(url[k+7]<<24));
67+
c+=(url[k+8]+(url[k+9]<<8)+(url[k+10]<<16)+(url[k+11]<<24));
68+
mx=mix(a,b,c);
69+
a=mx[0]; b=mx[1]; c=mx[2];
70+
k+=12; len-=12;
71+
}
72+
c+=length;
73+
switch(len) {
74+
case 11: c+=url[k+10]<<24;
75+
case 10: c+=url[k+9]<<16;
76+
case 9:c+=url[k+8]<<8;
77+
case 8:b+=(url[k+7]<<24);
78+
case 7:b+=(url[k+6]<<16);
79+
case 6:b+=(url[k+5]<<8);
80+
case 5:b+=(url[k+4]);
81+
case 4:a+=(url[k+3]<<24);
82+
case 3:a+=(url[k+2]<<16);
83+
case 2:a+=(url[k+1]<<8);
84+
case 1:a+=(url[k+0]);
85+
}
86+
mx=mix(a,b,c);
87+
if(mx[2]<0) {
88+
return(0x100000000+mx[2]);
89+
} else {
90+
return(mx[2]);
91+
}
92+
}
93+
function strord(string) {
94+
var result=new Array();
95+
for(i=0;i<string.length;i++){
96+
result[i]=string[i].charCodeAt(0);
97+
}
98+
return(result);
99+
}

examples/google_rank.js

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// This module checks a domain's Google rank for a given keyword (rate limits obviously apply)
2+
//
3+
// 1. To find the rank of a domain for a given keyword:
4+
// $ echo "mastercard.com,Credit Cards" | node.io -s google_rank
5+
// => mastercard.com,Credit Cards,9
6+
7+
var Job = require('../').Job;
8+
9+
exports.job = new Job({timeout:10, retries:3}, {
10+
11+
run: function google(input) {
12+
var links, self = this;
13+
14+
var input = input.split(',');
15+
16+
this.getHtml('http://www.google.com/search?hl=en&num=100&q='+encodeURIComponent(input[1]), function(err, $, data) {
17+
if (err) self.retry();
18+
19+
var rank, i = 0;
20+
21+
if (links = $('a.l')) {
22+
links.each('href', function(href) {
23+
i++;
24+
if (href.indexOf('www.'+input[0]+'/') >= 0) {
25+
rank = i;
26+
} else if (href.indexOf('/'+input[0]+'/') >= 0) {
27+
rank = i;
28+
}
29+
});
30+
if (rank) {
31+
self.emit(input[0]+','+input[1]+','+rank);
32+
} else {
33+
self.emit(input+',');
34+
}
35+
}
36+
});
37+
},
38+
39+
fail: function(input) {
40+
this.emit(input+',');
41+
}
42+
43+
});

examples/google_spell.js

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// This module uses Google suggest to spell check a word or list of words (rate limits obviously apply)
2+
//
3+
// 1. To output the result of Google suggest:
4+
// $ echo "definately" | node.io -s google_spell
5+
// => definitely
6+
7+
var Job = require('../').Job;
8+
9+
exports.job = new Job({timeout:10, retries:3}, {
10+
11+
run: function google(input) {
12+
var spell, self = this;
13+
14+
this.getHtml('http://www.google.com/search?hl=en&q='+encodeURIComponent(input), function(err, $) {
15+
if (err) self.retry();
16+
17+
if (spell = $('a.spell')) {
18+
self.emit(spell.first().fulltext);
19+
}
20+
});
21+
},
22+
23+
fail: function(input) {
24+
this.emit(input);
25+
}
26+
27+
});

examples/reddit.js

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//This module pulls the front page stories and scores from reddit.com
2+
//There are API's for doing this - this is just as a quick demonstration of
3+
//parsing HTML using htmlparser and an augmented soupselect
4+
5+
var Job = require('../').Job;
6+
7+
function reddit() {
8+
var self = this;
9+
10+
this.getHtml('http://www.reddit.com/', function(err, $) {
11+
//Handle any http / parsing errors
12+
if (err) self.exit(err);
13+
14+
var titles = [], scores = [], output = [];
15+
16+
//Select all titles on the page
17+
$('a.title').each(function(a) {
18+
titles.push(a.text);
19+
});
20+
21+
//Select all scores on the page
22+
$('div.score.unvoted').each(function(div) {
23+
scores.push(div.text);
24+
});
25+
26+
//Mismatch? page probably didn't load properly
27+
if (scores.length != titles.length) {
28+
self.exit('Title / score mismatch');
29+
}
30+
31+
//Output = [score] title
32+
for (var i = 0, len = scores.length; i < len; i++) {
33+
//Ignore upcoming stories
34+
if (scores[i] == '&bull;') continue;
35+
36+
//Check the data is ok
37+
this.assert(scores[i]).isInt();
38+
39+
output.push('['+scores[i]+'] '+titles[i]);
40+
}
41+
42+
self.emit(output);
43+
});
44+
}
45+
46+
exports.job = new Job({timeout:10, once:true}, {input:false, run:reddit});

examples/resolve.js

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// This module wraps the dns.lookup() method. There are a few different uses:
2+
// (In each case replace domains.txt with your list of domains)
3+
//
4+
// 1. To resolve domains and return "domain,ip":
5+
// $ cat domains.txt | node.io resolve
6+
//
7+
// 2. To return domains that do not resolve:
8+
// $ cat domains.txt | node.io resolve notfound
9+
//
10+
// 3. To return domains that do resolve:
11+
// $ cat domains.txt | node.io resolve found
12+
//
13+
// To output the results to a file, use either:
14+
// $ cat domains.txt | node.io -s resolve > result.txt
15+
// $ node.io -i domains.txt -o result.txt resolve
16+
17+
var Job = require('../').Job, dns = require('dns');
18+
19+
var options = {
20+
max: 100,
21+
timeout: 10,
22+
retries: 3
23+
}
24+
25+
var methods = {
26+
27+
run: function(domain) {
28+
var self = this, type = this.options.args;
29+
30+
dns.lookup(domain, 4, function(err, ip) {
31+
if (err) {
32+
33+
//The domain didn't resolve
34+
switch(err.errno) {
35+
case 4: case 8: // == notfound
36+
if (type === 'notfound') {
37+
self.emit(domain);
38+
} else if (type === 'found') {
39+
self.skip();
40+
} else {
41+
self.emit(domain + ',');
42+
}
43+
break;
44+
default: self.retry();
45+
}
46+
47+
} else {
48+
49+
//The domain resolved successfully
50+
if (type === 'notfound') {
51+
self.skip();
52+
} else if (type === 'found') {
53+
self.emit(domain);
54+
} else {
55+
self.emit(domain + ',' + ip);
56+
}
57+
58+
}
59+
});
60+
},
61+
62+
fail: function(status, domain) {
63+
64+
//The domain either timed out or exceeded the max number of retries
65+
if (type === 'notfound') {
66+
self.emit(domain);
67+
} else if (type === 'found') {
68+
self.skip();
69+
} else {
70+
self.emit(domain + ',');
71+
}
72+
this.emit(domain+',');
73+
74+
}
75+
76+
}
77+
78+
exports.job = new Job(options, methods);

examples/resources/domains.txt

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
google.com
2+
youtube.com
3+
download.com
4+
cnet.com
5+
wow.com
6+
google.com.au
7+
amazon.com
8+
asdfhkasdhfkashdjkashdk.com
9+
asdjfh98eua9sdfunm.com
10+
,,,,,,,,.com

0 commit comments

Comments
 (0)