-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
executable file
·103 lines (88 loc) · 3.99 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import os
import time
from glob import iglob
import mailbox
# procmail-py - Email content and spam filtering
# MIT License
# © 2014 Noah K. Tilton <code@tilton.co>
from config import BASE_MAILDIR, MY_DOMAINS, addresses, mark_read
from spam import spamc, blacklisted
from utils import file, spammy_spamc, mark_as_read, uniq
INBOXDIR = os.path.join(BASE_MAILDIR, "INBOX")
maildirs_on_disk = [os.path.basename(dir) for dir in iglob(os.path.join(BASE_MAILDIR, "*"))]
maildirs_in_file = addresses.values() # <- some of these may not exist
maildirs = uniq(maildirs_on_disk + maildirs_in_file)
mailboxes = dict((d, mailbox.Maildir(os.path.join(BASE_MAILDIR, d), create=True)) for d in maildirs)
# N.B.: the order of the following filters matters. note the return
# statements. this short-circuiting is desirable, but has to be done
# carefully to avoid double-booking mails.
def mfilter(args):
try:
key, message = args
# BLACKLISTED WORDS/PHRASES
if not message.is_multipart():
# Can't run blacklist logic against multipart messages
# because random phrases such as "gucci" may show up in
# base64-encoded strings ... and I'm too lazy to write a
# better loop here. Derp.
flat_msg = message.as_string()
for badword in blacklisted:
if badword in flat_msg:
print("badword: %s (%s)" % (badword, message["subject"]))
mark_as_read(message)
file(INBOX, mailboxes["Junk"], message, key)
return
# SPAM?
if spammy_spamc(message):
mark_as_read(message)
file(INBOX, mailboxes["Junk"], message, key)
return
# MARK-AS-READ?
for header, string in mark_read.items():
if string in message[header]:
# http://docs.python.org/library/mailbox.html#mailbox.MaildirMessage
mark_as_read(message)
# MAILING LIST?
ml_indicia = [ message["delivered-to"], message["list-id"], message['reply-to'] ]
for list_header in ml_indicia:
if list_header is not None:
try:
list_id, remainder = list_header.split("@")
remainder = remainder.strip('<>').strip()
# only allow mailinglist delivery to MY_DOMAINS
if remainder not in MY_DOMAINS:
print "{} not in {}".format(remainder, MY_DOMAINS)
continue
destination = None
if list_id not in mailboxes.keys():
# maildir doesn't exist: create it.
mailbox.Maildir(os.path.join(BASE_MAILDIR, list_id), create=True)
destination = list_id
else:
destination = mailboxes[list_id]
file(INBOX, destination, message, key)
return
except ValueError:
print("couldn't split: header: %s\n key %s\n subject %s\n" % (list_header, key,
message["subject"]))
# WHITELISTED SENDER?
# FIXME - this should be a regex, not an 'in' check
for addr in addresses.keys():
if addr in message["from"].lower():
file(INBOX, mailboxes[addresses[addr]], message, key)
return
except Exception, e:
print("error", e)
if __name__ == '__main__':
INBOX = mailbox.Maildir(INBOXDIR, factory=None)
#numprocs = (min((cpu_count() + 2), len(INBOX)))
#if numprocs < 1: sys.exit()
#get_pool = lambda: Pool(processes=numprocs)
for email in iglob(os.path.join(INBOXDIR, "new", "*")):
#if time.time() - os.stat(email).st_ctime < 90:
spamc(email)
for email in INBOX.iteritems():
mfilter(email)
[box.close() for name, box in mailboxes.items()]