-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathWikidataCommand.php
209 lines (179 loc) · 8.62 KB
/
WikidataCommand.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
<?php
namespace App\Command;
use App\Exception\FileException;
use App\Exception\OSMException;
use App\Model\Overpass\Element;
use App\Model\Overpass\Overpass;
use App\Wikidata\Wikidata;
use Exception;
use GuzzleHttp\Exception\BadResponseException;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Exception\InvalidArgumentException;
use Symfony\Component\Console\Helper\ProgressBar;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
/**
* Download in JSON format Wikidata item(s) defined in `name:etymology:wikidata` tag for each relation/way.
*
* @todo Download Wikidata item defined in `wikidata` tag.
*
* @package App\Command
*/
class WikidataCommand extends AbstractCommand
{
/** {@inheritdoc} */
protected static $defaultName = 'wikidata';
/** @var string Wikidata item URL. */
protected const URL = 'https://www.wikidata.org/wiki/Special:EntityData/';
/**
* {@inheritdoc}
*
* @return void
*
* @throws InvalidArgumentException
*/
protected function configure(): void
{
parent::configure();
$this->setDescription('Download data from Wikidata.');
}
/**
* {@inheritdoc}
*
* @param InputInterface $input
* @param OutputInterface $output
* @return int
*/
protected function execute(InputInterface $input, OutputInterface $output): int
{
try {
parent::execute($input, $output);
$relationPath = sprintf('%s/overpass/%s', self::OUTPUTDIR, OverpassCommand::FILENAME_RELATION);
if (!file_exists($relationPath) || !is_readable($relationPath)) {
throw new FileException(sprintf('File "%s" doesn\'t exist or is not readable. You maybe need to run "overpass" command first.', $relationPath));
}
$wayPath = sprintf('%s/overpass/%s', self::OUTPUTDIR, OverpassCommand::FILENAME_WAY);
if (!file_exists($wayPath) || !is_readable($wayPath)) {
throw new FileException(sprintf('File "%s" doesn\'t exist or is not readable. You maybe need to run "overpass" command first.', $wayPath));
}
$contentR = file_get_contents($relationPath);
/** @var Overpass|null */ $overpassR = $contentR !== false ? json_decode($contentR) : null;
$contentW = file_get_contents($wayPath);
/** @var Overpass|null */ $overpassW = $contentW !== false ? json_decode($contentW) : null;
// Only keep ways/relations that have a `wikidata` tag and/or a `name:etymology:wikidata` tag
$elements = array_filter(
array_merge($overpassR->elements ?? [], $overpassW->elements ?? []),
function ($element): bool {
return isset($element->tags) &&
(isset($element->tags->wikidata) || isset($element->tags->{'name:etymology:wikidata'})); // @phpstan-ignore-line
}
);
// Check count of elements with Wikidata information.
if (count($elements) === 0) {
$output->writeln('No element with Wikidata information!');
return Command::SUCCESS;
}
// Create wikidata directory to store results.
$outputDir = sprintf('%s/wikidata', self::OUTPUTDIR);
if (!file_exists($outputDir) || !is_dir($outputDir)) {
mkdir($outputDir, 0777, true);
}
$warnings = [];
$progressBar = new ProgressBar($output, count($elements));
$progressBar->start();
foreach ($elements as $element) {
/** @var string|null */
$wikidataTag = $element->tags->wikidata ?? null; // @phpstan-ignore-line
/** @var string|null */
$etymologyTag = $element->tags->{'name:etymology:wikidata'} ?? null; // @phpstan-ignore-line
// Download Wikidata item(s) defined in `name:etymology:wikidata` tag
if (!is_null($etymologyTag) && $etymologyTag !== $wikidataTag) {
$identifiers = explode(';', $etymologyTag);
$identifiers = array_map('trim', $identifiers);
foreach ($identifiers as $identifier) {
// Check that the value of the tag is a valid Wikidata item identifier
if (preg_match('/^Q[0-9]+$/', $identifier) !== 1) {
$warnings[] = sprintf('Format of `name:etymology:wikidata` is invalid (%s) for %s(%d).', $identifier, $element->type, $element->id);
continue;
}
// Download Wikidata item
$path = sprintf('%s/%s.json', $outputDir, $identifier);
if (!file_exists($path)) {
self::save($identifier, $element, $path, $warnings);
}
}
}
// Download Wikidata item defined in `wikidata` tag
if (!is_null($wikidataTag)) {
// Check that the value of the tag is a valid Wikidata item identifier
if (preg_match('/^Q[0-9]+$/', $wikidataTag) !== 1) {
$warnings[] = sprintf('Format of `wikidata` is invalid (%s) for %s(%d).', $wikidataTag, $element->type, $element->id);
continue;
}
// Download Wikidata item
$path = sprintf('%s/%s.json', $outputDir, $wikidataTag);
if (!file_exists($path)) {
self::save($wikidataTag, $element, $path, $warnings);
$wikiPath = sprintf('%s/%s.json', $outputDir, $wikidataTag);
$entity = Wikidata::read($wikiPath);
$identifiers = Wikidata::extractNamedAfter($entity);
if (!is_null($identifiers)) {
foreach ($identifiers as $identifier) {
// Check that the value of the tag is a valid Wikidata item identifier
if (preg_match('/^Q[0-9]+$/', $identifier) !== 1) {
$warnings[] = sprintf('Format of `P138` (NamedAfter) property is invalid (%s) for in item "%s".', $identifier, $wikidataTag);
continue;
}
// Download Wikidata item
$path = sprintf('%s/%s.json', $outputDir, $identifier);
if (!file_exists($path)) {
self::save($identifier, $element, $path, $warnings);
}
}
}
}
}
$progressBar->advance();
}
$progressBar->finish();
$output->writeln(['', ...$warnings]);
return Command::SUCCESS;
} catch (Exception $error) {
$output->writeln(sprintf('<error>%s</error>', $error->getMessage()));
return Command::FAILURE;
}
}
/**
* Send request and store result.
* Display warning if the Wikidata item doesn't exist or if the process can't download the Wikidate item.
*
* @param string $identifier Wikidata item identifier.
* @param Element $element OpenStreetMap element (relation/way/node).
* @param string $path Path where to store the result.
* @param string[] $warnings
* @return void
*
* @throws GuzzleException
*/
private static function save(string $identifier, $element, string $path, array &$warnings = []): void
{
$url = sprintf('%s%s.json', self::URL, $identifier);
try {
$client = new \GuzzleHttp\Client();
$client->request('GET', $url, ['sink' => $path]);
} catch (BadResponseException $exception) {
if (file_exists($path)) {
unlink($path);
}
switch ($exception->getResponse()->getStatusCode()) {
case 404:
$warnings[] = sprintf('<warning>Wikidata item %s for %s(%d) does not exist.</warning>', $identifier, $element->type, $element->id);
break;
default:
$warnings[] = sprintf('<warning>Error while fetching Wikidata item %s for %s(%d): %s.</warning>', $identifier, $element->type, $element->id, $exception->getMessage());
break;
}
}
}
}