Skip to content

Commit f7314be

Browse files
committed
Initial error handling
ref #22
1 parent 21b25b4 commit f7314be

File tree

6 files changed

+46
-17
lines changed

6 files changed

+46
-17
lines changed

src/http/reqwest/async_reqwest.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use reqwest::header::USER_AGENT;
66
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
77
use crate::parser::{ParseResult, parse_fetched_robots_txt};
88
use crate::model::FetchedRobotsTxt;
9+
use crate::model::{RobotparserError, ErrorKind};
910
use std::pin::Pin;
1011
use futures::task::{Context, Poll};
1112
use futures::Future;
@@ -15,10 +16,10 @@ use futures::future::ok as future_ok;
1516
type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>;
1617

1718
impl RobotsTxtClient for Client {
18-
type Result = RobotsTxtResponse;
19+
type Result = Result<RobotsTxtResponse, RobotparserError>;
1920
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
2021
let url = format!("{}/robots.txt", origin.unicode_serialization());
21-
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
22+
let url = Url::parse(&url).map_err(|err| RobotparserError {kind: ErrorKind::Url(err)})?;
2223
let mut request = Request::new(Method::GET, url);
2324
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
2425
let response = self
@@ -30,10 +31,10 @@ impl RobotsTxtClient for Client {
3031
});
3132
});
3233
let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>> = Box::pin(response);
33-
return RobotsTxtResponse {
34+
Ok(RobotsTxtResponse {
3435
origin,
3536
response,
36-
}
37+
})
3738
}
3839
}
3940

@@ -73,4 +74,4 @@ impl Future for RobotsTxtResponse {
7374
},
7475
}
7576
}
76-
}
77+
}

src/http/reqwest/sync_reqwest.rs

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
11
use reqwest::blocking::{Client, Request};
2-
use reqwest::{Method, Error};
2+
use reqwest::Method;
33
use reqwest::header::HeaderValue;
44
use url::{Origin, Url};
55
use reqwest::header::USER_AGENT;
66
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
77
use crate::parser::{ParseResult, parse_fetched_robots_txt};
88
use crate::model::FetchedRobotsTxt;
9+
use crate::model::{RobotparserError, ErrorKind};
910

1011
impl RobotsTxtClient for Client {
11-
type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
12+
type Result = Result<ParseResult<FetchedRobotsTxt>, RobotparserError>;
1213
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
1314
let url = format!("{}/robots.txt", origin.unicode_serialization());
14-
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
15+
let url = Url::parse(&url).map_err(|err| RobotparserError {kind: ErrorKind::Url(err)})?;
1516
let mut request = Request::new(Method::GET, url);
1617
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
17-
let response = self.execute(request)?;
18+
let response = self.execute(request).map_err(|err| RobotparserError {kind: ErrorKind::HttpClient(err)})?;
1819
let status_code = response.status().as_u16();
19-
let text = response.text()?;
20+
let text = response.text().map_err(|err| RobotparserError {kind: ErrorKind::HttpClient(err)})?;
2021
let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
2122
return Ok(robots_txt);
2223
}
23-
}
24+
}

src/model.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,6 @@ pub (crate) use self::fetched_robots_txt::FetchedRobotsTxtContainer;
1414
mod fetched_robots_txt;
1515
pub use self::robots_txt::RobotsTxt;
1616
mod path;
17-
pub (crate) use self::path::Path;
17+
pub (crate) use self::path::Path;
18+
mod errors;
19+
pub use self::errors::{RobotparserError, ErrorKind};

src/model/errors.rs

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
use std::fmt;
2+
3+
#[derive(Debug)]
4+
pub struct RobotparserError {
5+
pub kind: ErrorKind,
6+
}
7+
8+
#[derive(Debug)]
9+
pub enum ErrorKind {
10+
Url(url::ParseError),
11+
HttpClient(reqwest::Error),
12+
}
13+
14+
impl fmt::Display for RobotparserError {
15+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
16+
match self.kind {
17+
ErrorKind::Url(ref err) => err.fmt(f),
18+
ErrorKind::HttpClient(ref err) => err.fmt(f),
19+
}
20+
}
21+
}

tests/test_reqwest_async.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ fn test_reqwest_async() {
1010
let mut runtime = Runtime::new().unwrap();
1111
let client = Client::new();
1212
let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
13-
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()));
13+
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()).unwrap());
1414
let robots_txt = robots_txt_response.unwrap().get_result();
1515
let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap();
1616
assert!(robots_txt.can_fetch("*", &fetch_url));
@@ -19,10 +19,12 @@ fn test_reqwest_async() {
1919
}
2020

2121
#[test]
22-
#[should_panic]
2322
fn test_reqwest_blocking_panic_url() {
2423
let client = Client::new();
2524
let host = Host::Domain("python.org::".into());
2625
let origin = Origin::Tuple("https".into(), host, 80);
27-
client.fetch_robots_txt(origin);
26+
match client.fetch_robots_txt(origin) {
27+
Ok(_) => assert!(false),
28+
Err(_) => assert!(true)
29+
}
2830
}

tests/test_reqwest_blocking.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ fn test_reqwest_blocking() {
1616
}
1717

1818
#[test]
19-
#[should_panic]
2019
fn test_reqwest_blocking_panic_url() {
2120
let client = Client::new();
2221
let host = Host::Domain("python.org::".into());
2322
let origin = Origin::Tuple("https".into(), host, 80);
24-
client.fetch_robots_txt(origin).unwrap().get_result();
23+
match client.fetch_robots_txt(origin) {
24+
Ok(_) => assert!(false),
25+
Err(_) => assert!(true)
26+
}
2527
}

0 commit comments

Comments
 (0)