Skip to content

Commit df49f6b

Browse files
authored
Error handling (#24)
* Add test when url is invalid and panic * Initial error handling ref #22 * Rename ErrorKind::HttpClient => ErrorKind::Http * Implement std::error::Error and rename to Error
1 parent 1474a8c commit df49f6b

File tree

6 files changed

+73
-17
lines changed

6 files changed

+73
-17
lines changed

src/http/reqwest/async_reqwest.rs

+11-9
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,26 @@
11
use reqwest::{Client, Request};
2-
use reqwest::{Method, Error};
2+
use reqwest::Method;
3+
use reqwest::Error as ReqwestError;
34
use reqwest::header::HeaderValue;
45
use url::{Origin, Url};
56
use reqwest::header::USER_AGENT;
67
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
78
use crate::parser::{ParseResult, parse_fetched_robots_txt};
89
use crate::model::FetchedRobotsTxt;
10+
use crate::model::{Error, ErrorKind};
911
use std::pin::Pin;
1012
use futures::task::{Context, Poll};
1113
use futures::Future;
1214
use futures::future::TryFutureExt;
1315
use futures::future::ok as future_ok;
1416

15-
type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>;
17+
type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), ReqwestError>>>;
1618

1719
impl RobotsTxtClient for Client {
18-
type Result = RobotsTxtResponse;
20+
type Result = Result<RobotsTxtResponse, Error>;
1921
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
2022
let url = format!("{}/robots.txt", origin.unicode_serialization());
21-
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
23+
let url = Url::parse(&url).map_err(|err| Error {kind: ErrorKind::Url(err)})?;
2224
let mut request = Request::new(Method::GET, url);
2325
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
2426
let response = self
@@ -29,11 +31,11 @@ impl RobotsTxtClient for Client {
2931
return future_ok((response_info, response_text));
3032
});
3133
});
32-
let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>> = Box::pin(response);
33-
return RobotsTxtResponse {
34+
let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), ReqwestError>>>> = Box::pin(response);
35+
Ok(RobotsTxtResponse {
3436
origin,
3537
response,
36-
}
38+
})
3739
}
3840
}
3941

@@ -55,7 +57,7 @@ impl RobotsTxtResponse {
5557
}
5658

5759
impl Future for RobotsTxtResponse {
58-
type Output = Result<ParseResult<FetchedRobotsTxt>, Error>;
60+
type Output = Result<ParseResult<FetchedRobotsTxt>, ReqwestError>;
5961

6062
fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
6163
let self_mut = self.get_mut();
@@ -73,4 +75,4 @@ impl Future for RobotsTxtResponse {
7375
},
7476
}
7577
}
76-
}
78+
}

src/http/reqwest/sync_reqwest.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
11
use reqwest::blocking::{Client, Request};
2-
use reqwest::{Method, Error};
2+
use reqwest::Method;
33
use reqwest::header::HeaderValue;
44
use url::{Origin, Url};
55
use reqwest::header::USER_AGENT;
66
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
77
use crate::parser::{ParseResult, parse_fetched_robots_txt};
88
use crate::model::FetchedRobotsTxt;
9+
use crate::model::{Error, ErrorKind};
910

1011
impl RobotsTxtClient for Client {
1112
type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
1213
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
1314
let url = format!("{}/robots.txt", origin.unicode_serialization());
14-
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
15+
let url = Url::parse(&url).map_err(|err| Error {kind: ErrorKind::Url(err)})?;
1516
let mut request = Request::new(Method::GET, url);
1617
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
17-
let response = self.execute(request)?;
18+
let response = self.execute(request).map_err(|err| Error {kind: ErrorKind::Http(err)})?;
1819
let status_code = response.status().as_u16();
19-
let text = response.text()?;
20+
let text = response.text().map_err(|err| Error {kind: ErrorKind::Http(err)})?;
2021
let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
2122
return Ok(robots_txt);
2223
}
23-
}
24+
}

src/model.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,6 @@ pub (crate) use self::fetched_robots_txt::FetchedRobotsTxtContainer;
1414
mod fetched_robots_txt;
1515
pub use self::robots_txt::RobotsTxt;
1616
mod path;
17-
pub (crate) use self::path::Path;
17+
pub (crate) use self::path::Path;
18+
mod errors;
19+
pub use self::errors::{Error, ErrorKind};

src/model/errors.rs

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
use std::fmt;
2+
3+
#[derive(Debug)]
4+
pub struct Error {
5+
pub kind: ErrorKind,
6+
}
7+
8+
#[derive(Debug)]
9+
pub enum ErrorKind {
10+
Url(url::ParseError),
11+
Http(reqwest::Error),
12+
}
13+
14+
impl fmt::Display for Error {
15+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
16+
match self.kind {
17+
ErrorKind::Url(ref err) => err.fmt(f),
18+
ErrorKind::Http(ref err) => err.fmt(f),
19+
}
20+
}
21+
}
22+
23+
impl std::error::Error for Error {}

tests/test_reqwest_async.rs

+16-2
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,28 @@ use robotparser::service::RobotsTxtService;
33
use reqwest::Client;
44
use url::Url;
55
use tokio::runtime::Runtime;
6+
use url::{Host, Origin};
67

78
#[test]
89
fn test_reqwest_async() {
910
let mut runtime = Runtime::new().unwrap();
1011
let client = Client::new();
1112
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
12-
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()));
13+
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()).unwrap());
1314
let robots_txt = robots_txt_response.unwrap().get_result();
1415
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
1516
assert!(robots_txt.can_fetch("*", &fetch_url));
16-
}
17+
let fetch_url = Url::parse("http://www.python.org/webstats/").unwrap();
18+
assert!(!robots_txt.can_fetch("*", &fetch_url));
19+
}
20+
21+
#[test]
22+
fn test_reqwest_blocking_panic_url() {
23+
let client = Client::new();
24+
let host = Host::Domain("python.org::".into());
25+
let origin = Origin::Tuple("https".into(), host, 80);
26+
match client.fetch_robots_txt(origin) {
27+
Ok(_) => assert!(false),
28+
Err(_) => assert!(true)
29+
}
30+
}

tests/test_reqwest_blocking.rs

+14
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use robotparser::http::RobotsTxtClient;
22
use robotparser::service::RobotsTxtService;
33
use reqwest::blocking::Client;
44
use url::Url;
5+
use url::{Host, Origin};
56

67
#[test]
78
fn test_reqwest_blocking() {
@@ -10,4 +11,17 @@ fn test_reqwest_blocking() {
1011
let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result();
1112
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
1213
assert!(robots_txt.can_fetch("*", &fetch_url));
14+
let fetch_url = Url::parse("https://www.python.org/webstats/").unwrap();
15+
assert!(!robots_txt.can_fetch("*", &fetch_url));
16+
}
17+
18+
#[test]
19+
fn test_reqwest_blocking_panic_url() {
20+
let client = Client::new();
21+
let host = Host::Domain("python.org::".into());
22+
let origin = Origin::Tuple("https".into(), host, 80);
23+
match client.fetch_robots_txt(origin) {
24+
Ok(_) => assert!(false),
25+
Err(_) => assert!(true)
26+
}
1327
}

0 commit comments

Comments
 (0)