forked from psf/cachecontrol
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathheuristics.py
153 lines (116 loc) · 4.7 KB
/
heuristics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import calendar
import time
from datetime import datetime, timedelta, timezone
from email.utils import formatdate, parsedate, parsedate_tz
from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional
if TYPE_CHECKING:
from urllib3 import HTTPResponse
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
def expire_after(delta: timedelta, date: Optional[datetime] = None) -> datetime:
date = date or datetime.now(timezone.utc)
return date + delta
def datetime_to_header(dt: datetime) -> str:
return formatdate(calendar.timegm(dt.timetuple()))
class BaseHeuristic(object):
def warning(self, response: "HTTPResponse") -> Optional[str]:
"""
Return a valid 1xx warning header value describing the cache
adjustments.
The response is provided too allow warnings like 113
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
to explicitly say response is over 24 hours old.
"""
return '110 - "Response is Stale"'
def update_headers(self, response: "HTTPResponse") -> Dict[str, str]:
"""Update the response headers with any new headers.
NOTE: This SHOULD always include some Warning header to
signify that the response was cached by the client, not
by way of the provided headers.
"""
return {}
def apply(self, response: "HTTPResponse") -> "HTTPResponse":
updated_headers = self.update_headers(response)
if updated_headers:
response.headers.update(updated_headers)
warning_header_value = self.warning(response)
if warning_header_value is not None:
response.headers.update({"Warning": warning_header_value})
return response
class OneDayCache(BaseHeuristic):
"""
Cache the response by providing an expires 1 day in the
future.
"""
def update_headers(self, response: "HTTPResponse") -> Dict[str, str]:
headers = {}
if "expires" not in response.headers:
date = parsedate(response.headers["date"])
expires = expire_after(timedelta(days=1), date=datetime(*date[:6], tzinfo=timezone.utc))
headers["expires"] = datetime_to_header(expires)
headers["cache-control"] = "public"
return headers
class ExpiresAfter(BaseHeuristic):
"""
Cache **all** requests for a defined time period.
"""
def __init__(self, **kw: Any) -> None:
self.delta = timedelta(**kw)
def update_headers(self, response: "HTTPResponse") -> Dict[str, str]:
expires = expire_after(self.delta)
return {"expires": datetime_to_header(expires), "cache-control": "public"}
def warning(self, response: "HTTPResponse") -> Optional[str]:
tmpl = "110 - Automatically cached for %s. Response might be stale"
return tmpl % self.delta
class LastModified(BaseHeuristic):
"""
If there is no Expires header already, fall back on Last-Modified
using the heuristic from
http://tools.ietf.org/html/rfc7234#section-4.2.2
to calculate a reasonable value.
Firefox also does something like this per
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
Unlike mozilla we limit this to 24-hr.
"""
cacheable_by_default_statuses = {
200,
203,
204,
206,
300,
301,
404,
405,
410,
414,
501,
}
def update_headers(self, resp: "HTTPResponse") -> Dict[str, str]:
headers: Mapping[str, str] = resp.headers
if "expires" in headers:
return {}
if "cache-control" in headers and headers["cache-control"] != "public":
return {}
if resp.status not in self.cacheable_by_default_statuses:
return {}
if "date" not in headers or "last-modified" not in headers:
return {}
time_tuple = parsedate_tz(headers["date"])
assert time_tuple is not None
date = calendar.timegm(time_tuple[:6])
last_modified = parsedate(headers["last-modified"])
if last_modified is None:
return {}
now = time.time()
current_age = max(0, now - date)
delta = date - calendar.timegm(last_modified)
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
if freshness_lifetime <= current_age:
return {}
expires = date + freshness_lifetime
return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))}
def warning(self, resp: "HTTPResponse") -> Optional[str]:
return None