Skip to content

Commit 41916d8

Browse files
committed
Drop cached sources to reduce memory usage
1 parent 8779e7b commit 41916d8

File tree

1 file changed

+43
-23
lines changed

1 file changed

+43
-23
lines changed

src/tools/linkchecker/main.rs

+43-23
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,25 @@ struct FileEntry {
6666

6767
type Cache = HashMap<PathBuf, FileEntry>;
6868

69+
impl FileEntry {
70+
fn parse_ids(&mut self,
71+
file: &Path,
72+
contents: &str,
73+
errors: &mut bool)
74+
{
75+
if self.ids.is_empty() {
76+
with_attrs_in_source(contents, " id", |fragment, i| {
77+
let frag = fragment.trim_left_matches("#").to_owned();
78+
if !self.ids.insert(frag) {
79+
*errors = true;
80+
println!("{}:{}: id is not unique: `{}`",
81+
file.display(), i, fragment);
82+
}
83+
});
84+
}
85+
}
86+
}
87+
6988
fn walk(cache: &mut Cache,
7089
root: &Path,
7190
dir: &Path,
@@ -79,7 +98,13 @@ fn walk(cache: &mut Cache,
7998
if kind.is_dir() {
8099
walk(cache, root, &path, url, errors);
81100
} else {
82-
check(cache, root, &path, url, errors);
101+
let pretty_path = check(cache, root, &path, url, errors);
102+
if let Some(pretty_path) = pretty_path {
103+
let entry = cache.get_mut(&pretty_path).unwrap();
104+
// we don't need the source anymore,
105+
// so drop to to reduce memory-usage
106+
entry.source = String::new();
107+
}
83108
}
84109
url.path_mut().unwrap().pop();
85110
}
@@ -89,42 +114,42 @@ fn check(cache: &mut Cache,
89114
root: &Path,
90115
file: &Path,
91116
base: &Url,
92-
errors: &mut bool)
117+
errors: &mut bool) -> Option<PathBuf>
93118
{
94119
// ignore js files as they are not prone to errors as the rest of the
95120
// documentation is and they otherwise bring up false positives.
96121
if file.extension().and_then(|s| s.to_str()) == Some("js") {
97-
return
122+
return None;
98123
}
99124

100125
// Unfortunately we're not 100% full of valid links today to we need a few
101126
// whitelists to get this past `make check` today.
102127
// FIXME(#32129)
103128
if file.ends_with("std/string/struct.String.html") ||
104129
file.ends_with("collections/string/struct.String.html") {
105-
return
130+
return None;
106131
}
107132
// FIXME(#32130)
108133
if file.ends_with("btree_set/struct.BTreeSet.html") ||
109134
file.ends_with("collections/struct.BTreeSet.html") ||
110135
file.ends_with("collections/btree_map/struct.BTreeMap.html") ||
111136
file.ends_with("collections/hash_map/struct.HashMap.html") {
112-
return
137+
return None;
113138
}
114139

115140
if file.ends_with("std/sys/ext/index.html") {
116-
return
141+
return None;
117142
}
118143

119144
if let Some(file) = file.to_str() {
120145
// FIXME(#31948)
121146
if file.contains("ParseFloatError") {
122-
return
147+
return None;
123148
}
124149
// weird reexports, but this module is on its way out, so chalk it up to
125150
// "rustdoc weirdness" and move on from there
126151
if file.contains("scoped_tls") {
127-
return
152+
return None;
128153
}
129154
}
130155

@@ -134,8 +159,12 @@ fn check(cache: &mut Cache,
134159
let res = load_file(cache, root, PathBuf::from(file), false, false);
135160
let (pretty_file, contents) = match res {
136161
Ok(res) => res,
137-
Err(_) => return,
162+
Err(_) => return None,
138163
};
164+
{
165+
cache.get_mut(&pretty_file).unwrap()
166+
.parse_ids(&pretty_file, &contents, errors);
167+
}
139168

140169
// Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
141170
with_attrs_in_source(&contents, " href", |url, i| {
@@ -172,19 +201,10 @@ fn check(cache: &mut Cache,
172201
return;
173202
}
174203

175-
let ids = &mut cache.get_mut(&pretty_path).unwrap().ids;
176-
if ids.is_empty() {
177-
// Search for anything that's the regex 'id[ ]*=[ ]*".*?"'
178-
with_attrs_in_source(&contents, " id", |fragment, i| {
179-
let frag = fragment.trim_left_matches("#").to_owned();
180-
if !ids.insert(frag) {
181-
*errors = true;
182-
println!("{}:{}: id is not unique: `{}`",
183-
pretty_file.display(), i, fragment);
184-
}
185-
});
186-
}
187-
if !ids.contains(fragment) {
204+
let entry = &mut cache.get_mut(&pretty_path).unwrap();
205+
entry.parse_ids(&pretty_path, &contents, errors);
206+
207+
if !entry.ids.contains(fragment) {
188208
*errors = true;
189209
print!("{}:{}: broken link fragment ",
190210
pretty_file.display(), i + 1);
@@ -199,14 +219,14 @@ fn check(cache: &mut Cache,
199219
println!("{}", pretty_path.display());
200220
}
201221
});
222+
Some(pretty_file)
202223
}
203224

204225
fn load_file(cache: &mut Cache,
205226
root: &Path,
206227
file: PathBuf,
207228
follow_redirects: bool,
208229
is_redirect: bool) -> Result<(PathBuf, String), LoadError> {
209-
210230
let mut contents = String::new();
211231
let pretty_file = PathBuf::from(file.strip_prefix(root).unwrap_or(&file));
212232

0 commit comments

Comments
 (0)