Skip to content

Commit 95bd4b3

Browse files
committed
Merge pull request #51 from RobberPhex/python3_support
add python3 support
2 parents d4cbb49 + 300f54d commit 95bd4b3

File tree

3 files changed

+33
-18
lines changed

3 files changed

+33
-18
lines changed

.travis.yml

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
language: python
22
python:
33
- 2.6
4+
- 3.4
45
- 2.7
56
before_install:
67
- sudo apt-get update -qq

gfwlist2pac/main.py

100644100755
+30-17
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1-
#!/usr/bin/python
1+
#!/bin/env python
22
# -*- coding: utf-8 -*-
3+
from __future__ import print_function
4+
5+
try:
6+
from urllib.parse import urlparse, urlsplit
7+
from urllib.request import urlopen
8+
except ImportError:
9+
from urlparse import urlparse, urlsplit
10+
from urllib2 import urlopen
311

412
import pkgutil
5-
import urlparse
13+
import base64
614
import json
715
import logging
8-
import urllib2
916
from argparse import ArgumentParser
1017

1118
__all__ = ['main']
@@ -38,7 +45,8 @@ def decode_gfwlist(content):
3845
try:
3946
if '.' in content:
4047
raise Exception()
41-
return content.decode('base64')
48+
content = base64.b64decode(content)
49+
return content.decode('UTF-8')
4250
except:
4351
return content
4452

@@ -48,7 +56,7 @@ def get_hostname(something):
4856
# quite enough for GFW
4957
if not something.startswith('http:'):
5058
something = 'http://' + something
51-
r = urlparse.urlparse(something)
59+
r = urlparse(something)
5260
return r.hostname
5361
except Exception as e:
5462
logging.error(e)
@@ -64,6 +72,7 @@ def add_domain_to_set(s, something):
6472
def combine_lists(content, user_rule=None):
6573
builtin_rules = pkgutil.get_data('gfwlist2pac',
6674
'resources/builtin.txt').splitlines(False)
75+
builtin_rules = [rule.decode('UTF8') for rule in builtin_rules]
6776
gfwlist = content.splitlines(False)
6877
gfwlist.extend(builtin_rules)
6978
if user_rule:
@@ -99,12 +108,13 @@ def reduce_domains(domains):
99108
# reduce 'www.google.com' to 'google.com'
100109
# remove invalid domains
101110
tld_content = pkgutil.get_data('gfwlist2pac', 'resources/tld.txt')
111+
tld_content = tld_content.decode('UTF-8')
102112
tlds = set(tld_content.splitlines(False))
103113
new_domains = set()
104114
for domain in domains:
105115
domain_parts = domain.split('.')
106116
last_root_domain = None
107-
for i in xrange(0, len(domain_parts)):
117+
for i in range(0, len(domain_parts)):
108118
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
109119
if i == 0:
110120
if not tlds.__contains__(root_domain):
@@ -121,7 +131,7 @@ def reduce_domains(domains):
121131
uni_domains = set()
122132
for domain in new_domains:
123133
domain_parts = domain.split('.')
124-
for i in xrange(0, len(domain_parts)-1):
134+
for i in range(0, len(domain_parts)-1):
125135
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
126136
if domains.__contains__(root_domain):
127137
break
@@ -133,6 +143,7 @@ def reduce_domains(domains):
133143
def generate_pac_fast(domains, proxy):
134144
# render the pac file
135145
proxy_content = pkgutil.get_data('gfwlist2pac', 'resources/proxy.pac')
146+
proxy_content = proxy_content.decode('UTF-8')
136147
domains_dict = {}
137148
for domain in domains:
138149
domains_dict[domain] = 1
@@ -155,7 +166,8 @@ def grep_rule(rule):
155166
return None
156167
# render the pac file
157168
proxy_content = pkgutil.get_data('gfwlist2pac', 'resources/abp.js')
158-
rules = filter(grep_rule, rules)
169+
proxy_content = proxy_content.decode('UTF-8')
170+
rules = list(filter(grep_rule, rules))
159171
proxy_content = proxy_content.replace('__PROXY__', json.dumps(str(proxy)))
160172
proxy_content = proxy_content.replace('__RULES__',
161173
json.dumps(rules, indent=2))
@@ -166,22 +178,23 @@ def main():
166178
args = parse_args()
167179
user_rule = None
168180
if (args.input):
169-
with open(args.input, 'rb') as f:
181+
with open(args.input, 'r') as f:
170182
content = f.read()
171183
else:
172-
print 'Downloading gfwlist from %s' % gfwlist_url
173-
content = urllib2.urlopen(gfwlist_url, timeout=10).read()
184+
print('Downloading gfwlist from %s' % gfwlist_url)
185+
content = urlopen(gfwlist_url, timeout=10).read()
186+
content = content.decode('UTF-8')
174187
if args.user_rule:
175-
userrule_parts = urlparse.urlsplit(args.user_rule)
188+
userrule_parts = urlsplit(args.user_rule)
176189
if not userrule_parts.scheme or not userrule_parts.netloc:
177190
# It's not an URL, deal it as local file
178-
with open(args.user_rule, 'rb') as f:
191+
with open(args.user_rule, 'r') as f:
179192
user_rule = f.read()
180193
else:
181194
# Yeah, it's an URL, try to download it
182-
print 'Downloading user rules file from %s' % args.user_rule
183-
user_rule = urllib2.urlopen(args.user_rule, timeout=10).read()
184-
195+
print('Downloading user rules file from %s' % args.user_rule)
196+
user_rule = urlopen(args.user_rule, timeout=10).read()
197+
user_rule = user_rule.decode('UTF-8')
185198
content = decode_gfwlist(content)
186199
gfwlist = combine_lists(content, user_rule)
187200
if args.precise:
@@ -190,7 +203,7 @@ def main():
190203
domains = parse_gfwlist(gfwlist)
191204
domains = reduce_domains(domains)
192205
pac_content = generate_pac_fast(domains, args.proxy)
193-
with open(args.output, 'wb') as f:
206+
with open(args.output, 'w') as f:
194207
f.write(pac_content)
195208

196209

test/log2test.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
from __future__ import print_function
12
import re
23

34
with open('1000.log', 'rb') as f:
45
for line in f:
56
line = line.strip()
67
m = re.findall('connecting (.*?):', line)
78
if m:
8-
print m[0]
9+
print(m[0])

0 commit comments

Comments
 (0)