"Amy G" <amy-g-art at cox.net> wrote in message

news:4GfQb.16187$AA6.14368 at fed1read03...

I have seen something about this beofore on this forum, but my google search

didn't come up with the answer I am looking for.

I have a list of tuples. Each tuple is in the following format:

("data", "moredata", "evenmoredata", "date string")

The date string is my concern. This is the date stamp from an email.

The problem is that I have a whole bunch of variations when it comes to the

format that the date string is in. For example I could have the following

two tuples:

("data", "moredata", "evenmoredata", "Fri, 23 Jan 2004 00:06:15")

("data", "moredata", "evenmoredata", "Thursday, 22 January 2004 03:15:06")

I know there is some way to use the date string from each of these to get a

date usable by python, but I cannot figure it out.

I was trying to use time.strptime but have been unsuccesful thus far.

Any help is appreciated.

This is what I use to parse dates of unknown provinance.

It's laughably overengineered, and I don't include the day

of the week or the time. Given your examples, though,

those should be easy enough to deal with.

HTH

John Roth

class DateContainer(object):

_typeDict = {}

_stringValue = ""

_typeDict["stringValue"] = "String"

_typeDict["value"] = "String"

_value = ""

year = 1

month = 1

day = 1

bc = ""

def _checkUserFriendlyDate(self, date):

# The rules for a user friendly date are:

# 1. The year must be at least three digits, including

# leading zeroes if necessary. Day and numeric month

# may be no longer than 2 digits.

# 2. The month may be alphabetic or numeric. If it's

# alphabetic, it must be at least three letters long.

# 3. The epoch may be ad, bc, bce or ce. If omitted, it's

# assumed to be ad.

# 4. After removing the year, epoch and an alphabetic month,

# the remaining single piece is the day, or the piece that

# is greater than 12.

# 5. If two pieces remain, the first is the month, the second

# is the day. Both are between 1 and 12, inclusive.

partList = dateTimeParse(date)

if not(2 < len(partList) < 5):

raise ValueError, "incorrect part list: %s" % (partList,)

bc = self._findBC(partList)

if len(partList) != 3:

return "too many components in date: '%s'" % date

year = self._findYear(partList)

month = self._findAlphaMonth(partList)

if month != 0:

day = partList[0]

else:

day = self._findDay(partList)

if day:

month = partList[0]

else:

month, day = partList

year = self._checkNum(year, 4712)

day = self._checkNum(day, 31)

month = self._checkNum(month, 12)

if bc in ("AD", "CE"):

bc = ""

self.year, self.month, self.day, self.bc = year, month, day, bc

return True

def _checkNum(self, num, limit):

result = int(num)

if result > limit:

raise ValueError, "number '%s' out of range '%s'" % (num, limit)

return result

def _findBC(self, partList):

for i in range(len(partList)):

word = partList[i]

if word in ("AD", "BC", "CE", "BCE"):

del partList[i]

return word

# XXX if len(partList > 3): error

return ""

def _findYear(self, partList):

for i in range(len(partList)):

word = partList[i]

if len(word) > 2 and word.isdigit():

del partList[i]

return word

raise ValueError

def _findAlphaMonth(self, partList):

for i in range(len(partList)):

word = partList[i]

if word.isalpha():

del partList[i]

return ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN',

'JUL', 'AUG', 'SEP', 'OCT', 'NOV',

'DEC'].index(word[:3]) + 1

return 0

def _findDay(self, partList):

for i in range(len(partList)):

word = partList[i]

if word.isdigit() and int(word) > 12:

del partList[i]

return word

return ""

def _getStringValue(self):

return self._stringValue

def _setStringValue(self, value):

self._checkUserFriendlyDate(value)

self._stringValue = value

_typeDict["stringValue"] = "String"

stringValue = property(_getStringValue, _setStringValue,

doc="User Friendly Date")

def _getValue(self):

isoDate = "%04u-%02u-%02u %s" % (self.year, self.month, self.day,

self.bc)

return isoDate.strip()

def checkISODate(self, value):

year = self._checkNum(value[:4], 4712)

month = self._checkNum(value[5:7], 12)

day = self._checkNum(value[8:10], 31)

if len(value) > 10:

bc = value[11:]

if not (bc.upper() in ("AD", "BC", "BCE", "CE")):

raise ValueError

if bc in ("AD", "CE"):

bc = ""

self.year, self.month, self.day, self.bc = year, month, day, bc

return

def _setValue(self, value):

self._checkISODate(value)

isoDate = "%04u-%02u-%02u %s" % (self.year, self.month, self.day,

self.bc)

self.stringValue = isoDate

return None

value = property(_getValue, _setValue,

doc = "ISO Standard Format Date")