-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_to_philosophy.py
109 lines (87 loc) · 3.92 KB
/
get_to_philosophy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""Module to actually get to the Wikipedia page for Philosophy."""
import json
from random import choice
from typing import Dict, List, Optional
from graph import Edge, Page
def get_articles() -> List[Page]:
"""Returns the cached list of all articles."""
print("Retrieving articles...", end="", flush=True)
BASE_PATH = "cache/articles/"
extensions = [f"articles_{x}.json" for x in list("abcdefghijklmnopqrstuvwxyz") + ["num", "other"]]
articles = []
for extension in extensions:
with open(BASE_PATH + extension, "r") as infile:
data = json.load(infile)
for title, link in data.items():
articles.append(Page(title, link))
print("Done")
return articles
def get_edges() -> Dict[str, str]:
"""Returns the cached dictionary of all edges."""
print("Retrieving edges...", end="", flush=True)
BASE_PATH = "cache/edges/"
extensions = [f"edges_{x}.json" for x in list("abcdefghijklmnopqrstuvwxyz") + ["num", "other"]]
edges = {}
for extension in extensions:
with open(BASE_PATH + extension, "r") as infile:
data = json.load(infile)
for base_title, linked_title in data.items():
edges[base_title] = linked_title
print("Done")
return edges
def get_path_to_philosophy(target: str, pages: List[Page], edges: List[Edge]) -> Optional[List[str]]:
"""Find the route from a target Wikipedia page to Philosophy.
Parameters
----------
target: str
The title of the Wikipedia article to start at. Must be a valid article title.
pages: List[Page]
The (cached) list of all Wikipedia pages.
edges: List[Edge]
The (cached) list of all Wikipedia edges. That is, links from the title of
one page to the title of the first page it links to.
Returns
-------
List[str] | None
A list of Wikipedia article titles, ordered such that first is the target page,
and last is either "Philosophy", or a duplicate of another page in the list, in
the case that a loop is created. If the target page is invalid, nothing is
returned.
"""
# Target page is invalid
if target not in [p.title for p in pages]:
return None
path = [target] # initialize list
if not edges[path[-1]]: return path # make sure the article links to something
next_page_title = edges[path[-1]] # and if it does, get that something
while next_page_title != "Philosophy" and next_page_title not in path: # either we reach Philosophy, or loop; no links case handled above
path.append(next_page_title)
if not edges[path[-1]]: return path # reached article with no more links
next_page_title = edges[path[-1]]
path.append(next_page_title)
return path # we've either reached Philosophy, or looped
def main():
"""The main method for running test queries."""
QUIT_INPUT = 'q' # the command to close the process
RAND_INPUT = '?' # the command to get a random page
articles = get_articles()
edges = get_edges()
while True:
target = input(f"Enter the name of an article to path ({QUIT_INPUT} to quit, {RAND_INPUT} for random article): ")
if target.lower() == QUIT_INPUT:
print("Closing application...")
return
if target.lower() == RAND_INPUT:
target = choice(articles).title
print(f"Choosing random article '{target}'")
path = get_path_to_philosophy(target, articles, edges)
if not path:
print(f"No path could be found from {target} to Philosophy. Did you enter the article title correctly?")
continue
print("Found path!\n")
print(path[0])
for article_title in path[1:]:
print(f" -> {article_title}")
print()
if __name__=='__main__':
main()