~earboxer/zachwalk

17d4fa5740e8b005f4b90fe55ba4c4b24125c07a — Zach DeCook 5 months ago 5e2cc3f
date parsing: Better accept standard subscribable pages spec
2 files changed, 11 insertions(+), 6 deletions(-)

M test_zachwalk.py
M zachwalk.py
M test_zachwalk.py => test_zachwalk.py +4 -2
@@ 7,12 7,14 @@ def main():
    assert zachwalk.gnd([b'2021-01-31']) == zachwalk.DEFAULT
    assert zachwalk.gnd([b'=> path.gmi 2021-01-31 - my post']) == parse('2021-01-31').date()
    assert zachwalk.gnd([b'=> 2020/11/25/hello-gemini.gmi 2020-11-25 - Hello, Gemini!']) == parse('2020-11-25').date()
    assert zachwalk.gnd([b'=> gemini://drewdevault.com/2020/09/21/Gemini-TOFU.gmi September 21, 2020: TOFU recommendations for Gemini']) == parse('2020-09-21').date()
    assert zachwalk.gnd(['=> gemini://drewdevault.com/2021/02/15/Status-update-February-2021.gmi February 15, 2021: Status update, February 2021']) == parse('2021-02-15').date()
    assert zachwalk.gnd([b'=> gemini://drewdevault.com/2020/09/21/Gemini-TOFU.gmi 2020-09-21: TOFU recommendations for Gemini']) == parse('2020-09-21').date()
    assert zachwalk.gnd(['=> gemini://drewdevault.com/2021/02/15/Status-update-February-2021.gmi 2021-02-15: Status update, February 2021']) == parse('2021-02-15').date()
    assert zachwalk.gnd(['=>2021-01-28.gmi 2021-01-28 - RE ew0k: Your Gemini Browser and Server are Probably Doing Certificates Wrong']) == parse('2021-01-28').date()
    assert zachwalk.gnd(['=> atom.xml     Atom Feed']) == zachwalk.DEFAULT
    assert zachwalk.gnd(['=> geminitoepub.gmi     2021-02-27 Gemini to Epub']) == parse('2021-02-27').date()
    assert zachwalk.gnd(['=> m5paper.gmi	2021-01-31 M5Paper']) == parse('2021-01-31').date()
    assert zachwalk.gnd(['=> gemini://fossphones.com/03-29-22.gmi 2022-03-29 Linux Phone News - March 29, 2022']) == parse('2022-03-29').date()


    assert zachwalk.getdesc('=> m5paper.gmi	2021-01-31 M5Paper') == '2021-01-31 M5Paper'


M zachwalk.py => zachwalk.py +7 -4
@@ 37,13 37,16 @@ def gnd(fp):
     if type(line) != str:
       line=line.decode('UTF-8')
     if line.strip()[0:2] == '=>':
      desc =getdesc(line)
      try:
        desc =getdesc(line)
        desc=desc.split(':')[0] #this should only have 1 'datey' part
        date = parse(desc,fuzzy=True).date()
        # todo: read lots of these and compare them
        date = parse(desc[0:10],fuzzy=True).date()
        return date
      except:
        try:
          date = parse(desc,fuzzy=True).date()
          return date
        except:
          pass
        pass
    return DEFAULT