In Python 3.3+, email
message can parse the headers for you:
import email
import email.policy
headers = email.message_from_file(file, policy=email.policy.default)
print(headers.get( date ).datetime)
# -> 2009-11-16 13:32:02+01:00
Since Python 3.2+, it works if you replace %Z
with %z
:
>>> from datetime import datetime
>>> datetime.strptime("Mon, 16 Nov 2009 13:32:02 +0100",
... "%a, %d %b %Y %H:%M:%S %z")
datetime.datetime(2009, 11, 16, 13, 32, 2,
tzinfo=datetime.timezone(datetime.timedelta(0, 3600)))
Or using email
package (Python 3.3+):
>>> from email.utils import parsedate_to_datetime
>>> parsedate_to_datetime("Mon, 16 Nov 2009 13:32:02 +0100")
datetime.datetime(2009, 11, 16, 13, 32, 2,
tzinfo=datetime.timezone(datetime.timedelta(0, 3600)))
if UTC offset is specified as -0000
then it returns a naive datetime object that represents time in UTC otherwise it returns an aware datetime object with the corresponding tzinfo
set.
To parse rfc 5322 date-time string on earlier Python versions (2.6+):
from calendar import timegm
from datetime import datetime, timedelta, tzinfo
from email.utils import parsedate_tz
ZERO = timedelta(0)
time_string = Mon, 16 Nov 2009 13:32:02 +0100
tt = parsedate_tz(time_string)
#NOTE: mktime_tz is broken on Python < 2.7.4,
# see https://bugs.python.org/issue21267
timestamp = timegm(tt) - tt[9] # local time - utc offset == utc time
naive_utc_dt = datetime(1970, 1, 1) + timedelta(seconds=timestamp)
aware_utc_dt = naive_utc_dt.replace(tzinfo=FixedOffset(ZERO, UTC ))
aware_dt = aware_utc_dt.astimezone(FixedOffset(timedelta(seconds=tt[9])))
print(aware_utc_dt)
print(aware_dt)
# -> 2009-11-16 12:32:02+00:00
# -> 2009-11-16 13:32:02+01:00
where FixedOffset
is based on tzinfo
subclass from the datetime
documentation:
class FixedOffset(tzinfo):
"""Fixed UTC offset: `time = utc_time + utc_offset`."""
def __init__(self, offset, name=None):
self.__offset = offset
if name is None:
seconds = abs(offset).seconds
assert abs(offset).days == 0
hours, seconds = divmod(seconds, 3600)
if offset < ZERO:
hours = -hours
minutes, seconds = divmod(seconds, 60)
assert seconds == 0
#NOTE: the last part is to remind about deprecated POSIX
# GMT+h timezones that have the opposite sign in the
# name; the corresponding numeric value is not used e.g.,
# no minutes
self.__name = <%+03d%02d>GMT%+d % (hours, minutes, -hours)
else:
self.__name = name
def utcoffset(self, dt=None):
return self.__offset
def tzname(self, dt=None):
return self.__name
def dst(self, dt=None):
return ZERO
def __repr__(self):
return FixedOffset(%r, %r) % (self.utcoffset(), self.tzname())