Skip to content

Commit a44a596

Browse files
authored
Add enhanced URL checks (#731)
* add new Warning(RSC-023) when host of HTTP-URL is null // fixes #708 catches URL typos with missing slashes like `https:/www` or `https:www` * also report https URLs in the list of references
1 parent 5ef4497 commit a44a596

File tree

6 files changed

+42
-1
lines changed

6 files changed

+42
-1
lines changed

src/main/java/com/adobe/epubcheck/messages/MessageDictionary.java

+1
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ static Map<MessageId, Severity> getDefaultSeverities()
329329
map.put(MessageId.RSC_020, Severity.ERROR);
330330
map.put(MessageId.RSC_021, Severity.ERROR);
331331
map.put(MessageId.RSC_022, Severity.INFO);
332+
map.put(MessageId.RSC_023, Severity.WARNING);
332333

333334
// Scripting
334335
map.put(MessageId.SCP_001, Severity.USAGE);

src/main/java/com/adobe/epubcheck/messages/MessageId.java

+1
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ public enum MessageId implements Comparable<MessageId>
292292
RSC_020("RSC-020"),
293293
RSC_021("RSC-021"),
294294
RSC_022("RSC-022"),
295+
RSC_023("RSC-023"),
295296

296297
// Messages relating to scripting
297298
SCP_001("SCP-001"),

src/main/java/com/adobe/epubcheck/ops/OPSHandler.java

+12-1
Original file line numberDiff line numberDiff line change
@@ -224,9 +224,20 @@ else if (".".equals(href))
224224
URI uri = checkURI(href);
225225
if (uri == null) return;
226226

227-
if ("http".equals(uri.getScheme()))
227+
if ("http".equals(uri.getScheme()) || "https".equals(uri.getScheme()))
228228
{
229229
report.info(path, FeatureEnum.REFERENCE, href);
230+
231+
/*
232+
* #708 report invalid HTTP/HTTPS URLs
233+
* uri.scheme may be correct, but missing a : or a / from the //
234+
* leads to uri.getHost() == null
235+
*/
236+
if (uri.getHost() == null)
237+
{
238+
int missingSlashes = uri.getSchemeSpecificPart().startsWith("/") ? 1 : 2;
239+
report.message(MessageId.RSC_023, parser.getLocation(), uri, missingSlashes, uri.getScheme());
240+
}
230241
}
231242

232243
/*

src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties

+1
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ RSC_019=EPUBs with Multiple Renditions should contain a META-INF/metadata.xml fi
293293
RSC_020='%1$s' is not a valid URI.
294294
RSC_021=A Search Key Map Document must point to Content Documents ('%1$s' was not found in the spine).
295295
RSC_022=Cannot check image details (requires Java version 7 or higher).
296+
RSC_023=The URL '%1$s' is missing %2$d slash(es) '/' after the protocol '%3$s:'
296297

297298
#Scripting
298299
SCP_001=Use of Javascript eval() function in EPUB scripts is a security risk.

src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java

+9
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,15 @@ public void testValidateXHTMLLINKInvalid()
214214
EPUBVersion.VERSION_3);
215215
}
216216

217+
@Test
218+
public void testValidateXHTMLUrlChecksInvalid()
219+
{
220+
Collections.addAll(expectedErrors, MessageId.RSC_020);
221+
Collections.addAll(expectedWarnings, MessageId.HTM_025, MessageId.RSC_023, MessageId.RSC_023);
222+
testValidateDocument("xhtml/invalid/url-checks_issue-708.xhtml", "application/xhtml+xml",
223+
EPUBVersion.VERSION_3);
224+
}
225+
217226
@Test
218227
public void testValidateXHTMLXml11()
219228
{
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<?oxygen RNGSchema="../../../src/schema/epub-xhtml-30.rnc" type="compact"?>
3+
<?oxygen SCHSchema="../../../src/schema/epub-xhtml-30.sch"?>
4+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5+
<head/>
6+
<body>
7+
<p>
8+
<a href="https://www.youtube .com/watch?v=xxxxxxxxxxx">Invalid URI (RSC-020)</a>
9+
<a href="httpf://www.youtube.com/watch?v=xxxxxxxxxxx">Unsupported URI scheme (HTM-025)</a>
10+
<a href="https:/www.youtube.com/watch?v=xxxxxxxxxxx">URL is missing slashes after protocol (RSC-023)</a>
11+
<a href="https:www.youtube.com/watch?v=xxxxxxxxxxx">URL is missing slashes after protocol (RSC-023)</a>
12+
13+
<a href="https://www.youtube.com/watch?v=xxxxxxxxxxx">Valid URI</a>
14+
<a href="https://youtube.com/watch?v=xxxxxxxxxxx">Valid URI</a>
15+
<a href="https://youtube.com/watch?v=xxxxxx%20xxxx">Valid URI</a>
16+
</p>
17+
</body>
18+
</html>

0 commit comments

Comments
 (0)