From 1d1d3a5add792024377206672d97af164a7f207d Mon Sep 17 00:00:00 2001 From: Harald Pfeiffer Date: Mon, 24 Sep 2018 16:16:03 +0200 Subject: Initial commit: alpha version of my own, working refresh script --- holidays.de-ST | 1 + refresh | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100755 refresh diff --git a/holidays.de-ST b/holidays.de-ST index a61a51e..2518d9d 100644 --- a/holidays.de-ST +++ b/holidays.de-ST @@ -1,4 +1,5 @@ define holidays: de-ST: + 2018_01_06 = Heilige drei Könige 2018_10_31 = Reformationstag diff --git a/refresh b/refresh new file mode 100755 index 0000000..c5d308f --- /dev/null +++ b/refresh @@ -0,0 +1,133 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# vim:syntax=python +# http://holidata.net/en-US/2016.json + +""" +This script downloads data from holidata.net and writes it into files in the current directory. +Its output formats are parseable by timewarrior (https://taskwarrior.org/docs/timewarrior/). + +This is very alpha. Since holidata.net has a few minor mistakes for Germany I'm writing this to + test.xx-YY for now (I've just reported these, so allow the guys some time :) ). The files + are still sourcable with that name :) + +Also, for now this script only downloads de-DE. You will notice the complete lack of a main() + definition, that is the next step: it will also carry parameters so you can download your + own stuff. + +Finally, I'm fixing the ISO-3166-2 violation for North Rhine Westphalia - holidata.net has de-NRW. + That is the next bug to fix for them :) + +tl;dr +SCRIPT IS NOT FINISHED, THIS IS A GIT - USE ONLY IF YOU UNDERSTAND AND IF YOU ARE FROM ZE GERMANY +""" + +#import os, sys, re, time, argparse +import json, sys +reload(sys) +sys.setdefaultencoding('utf8') +from datetime import datetime +from time import sleep + +if sys.version_info >= (3, 0): + from urllib.request import urlopen + from urllib.error import HTTPError +else: + from urllib2 import urlopen, HTTPError + +__hheader__ = "# File auto-generated by lirion.de's refresh,\n" \ + "# Date: " + datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S %z") + "\n" \ + "# data provided by holidata.net\n\n" \ + "define holidays:\n" + +def headwrite(file): + """ + This writes the default header to the file it is being passed on. Content see + __hheader__ right above. + """ + try: + open(file, 'w').close() + except IOError: + print("Cannot write to %s!", file) + exit(2) + with open(file, "a") as ofile: + #f = open(ofile, "a") + ofile.write(__hheader__) + +def hfetch(locode, year): + """ + This function downloads the holiday data for a given ISO-3166 code from holidata.net + It will return a single string containing all of the JSON. + """ + myurl = "https://holidata.net/" + mylocode + "/" + myyear + ".json" + try: + lines = urlopen(myurl).read().decode('utf-8') + except HTTPError as httpe: + if httpe.code == 404: + print("holidata.net does not have data for %s(%s), for %d." % (mylocale, myregion, + mydate)) + else: + print(httpe.code, httpe.read()) + return lines + +def hparse(lines, locode, year): + """ + This function parses the JSON string we expect from holidata.net and writes its fragments to + the specific region files, if a region is given. So, e.g.: a holiday for all of the US will + not have a region definition and thus be written to file.en-US, whereas a holiday that is not + valid for all of the US but is valid for Oregon will be written to file.en-OR. + """ + islocdef = { 'XY-ZZ': 1 } + for line in lines.split('\n'): + if line: + #sys.stdout.write(".") + jdata = json.loads(line) + mylocale = unicode(jdata['locale']) + mycountry = mylocale.split('-')[0] + myregion = unicode(jdata['region']) + if myregion == "": + myregion = mylocale.split('-')[1] + # holidata.net violates ISO-3166-2:DE here: + if myregion == "NRW": + myregion = "NW" # or Neustadt/Weinstr., Palz > Pott + myhlocale = unicode(mycountry + "-" + myregion) + if jdata['date'] == "": + print("%s has no date given for %s!", jdata['description'], jdata['locale']) + mydate = unicode('1970_01_01') + else: + mydate = unicode(datetime.strftime(datetime.strptime(jdata['date'], '%Y-%m-%d'), + '%Y_%m_%d')) + myfile = "test." + myhlocale + if myhlocale not in islocdef: + headwrite(myfile) + with open(myfile, "a") as ofile: + #f = open(ofile, "a") + ofile.write(" " + mycountry + "-" + myregion + ":\n") + islocdef[myhlocale] = myhlocale + with open(myfile, "a") as ofile: + ofile.write(" " + mydate + " = " + unicode(jdata['description']) + "\n") + #print("%s (%s), %s, %s: %s" % (unicode(jdata['description']), unicode(jdata['region']), + # mycountry, myregion, mydate)) + sleep(0.1) + sys.stdout.flush() + print islocdef.keys() + +if __name__ == "__main__": + myyear = "" + mylocode = "" + if myyear == "": + now = datetime.now() + myyear = unicode(now.year) + if mylocode == "": + mylocode = "de-DE" + sys.stdout.write("Fetching holiday data from holidata.net...") + sys.stdout.flush() + lines = hfetch(mylocode, myyear) + print(" done.") + if lines == "": + print("No lines returned from holidata.net for %s!", mylocode) + exit(3) + sys.stdout.write("Parsing data") + sys.stdout.flush() + hparse(lines, mylocode, myyear) + print(" done.") -- cgit v1.2.3