forked from attardi/wikiextractor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMagicWords.py
123 lines (116 loc) · 2.77 KB
/
MagicWords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
# variables
class MagicWords(object):
"""
One copy in each Extractor.
@see https://doc.wikimedia.org/mediawiki-core/master/php/MagicWord_8php_source.html
"""
names = [
'!',
'currentmonth',
'currentmonth1',
'currentmonthname',
'currentmonthnamegen',
'currentmonthabbrev',
'currentday',
'currentday2',
'currentdayname',
'currentyear',
'currenttime',
'currenthour',
'localmonth',
'localmonth1',
'localmonthname',
'localmonthnamegen',
'localmonthabbrev',
'localday',
'localday2',
'localdayname',
'localyear',
'localtime',
'localhour',
'numberofarticles',
'numberoffiles',
'numberofedits',
'articlepath',
'pageid',
'sitename',
'server',
'servername',
'scriptpath',
'stylepath',
'pagename',
'pagenamee',
'fullpagename',
'fullpagenamee',
'namespace',
'namespacee',
'namespacenumber',
'currentweek',
'currentdow',
'localweek',
'localdow',
'revisionid',
'revisionday',
'revisionday2',
'revisionmonth',
'revisionmonth1',
'revisionyear',
'revisiontimestamp',
'revisionuser',
'revisionsize',
'subpagename',
'subpagenamee',
'talkspace',
'talkspacee',
'subjectspace',
'subjectspacee',
'talkpagename',
'talkpagenamee',
'subjectpagename',
'subjectpagenamee',
'numberofusers',
'numberofactiveusers',
'numberofpages',
'currentversion',
'rootpagename',
'rootpagenamee',
'basepagename',
'basepagenamee',
'currenttimestamp',
'localtimestamp',
'directionmark',
'contentlanguage',
'numberofadmins',
'cascadingsources',
]
def __init__(self):
self.values = {}
self.values['!'] = '|'
def __getitem__(self, name):
return self.values.get(name)
def __setitem__(self, name, value):
self.values[name] = value
switches = [
'__NOTOC__',
'__FORCETOC__',
'__TOC__',
'__TOC__',
'__NEWSECTIONLINK__',
'__NONEWSECTIONLINK__',
'__NOGALLERY__',
'__HIDDENCAT__',
'__NOCONTENTCONVERT__',
'__NOCC__',
'__NOTITLECONVERT__',
'__NOTC__',
'__START__',
'__END__',
'__INDEX__',
'__NOINDEX__',
'__STATICREDIRECT__',
'__DISAMBIG__'
]
magicWordsRE = re.compile('|'.join(MagicWords.switches))