from collections import defaultdict import csv import sys def timeName(start): """ Convert 24-hour times into a human-readable format """ while start > 24: start -= 24 if start < 3: return "LATENIGHT" elif start < 7: return "EARLYMORN" elif start < 11: return "LATEMORN" elif start < 15: return "AFTERNOON" elif start < 19: return "EVENING" elif start < 23: return "NIGHT" else: return "LATENIGHT" def coords(loc): """ Turn string into a usable format """ lat, long, elev = loc.split(":") return (float(lat), float(long)) if __name__ == "__main__": input_filename = sys.argv[1] infile = csv.DictReader(open(input_filename, 'r')) # Create datastructure to store counts starts = defaultdict(dict) ends = defaultdict(dict) locs = dict() # Go through all the lines in the original file print("Reading from %s" % input_filename) for ii in infile: u = ii["startStation"] v = ii["endStation"] try: s = timeName(float(ii["startHour"])) except ValueError: s = None try: e = timeName(float(ii["startHour"]) + float(ii["duration"])) except ValueError: e = None if not u in locs: locs[u] = coords(ii["startPos"]) if not v in locs: locs[v] = coords(ii["endPos"]) if s: starts[u][s] = starts[u].get(s, 0) + 1 if e: ends[v][e] = ends[v].get(e, 0) + 1 header = ["station", "type", "count", "time", "lat", "long"] outfile = csv.DictWriter(open(sys.argv[2], 'w'), fieldnames=header) outfile.writerow(dict((x,x) for x in header)) for type in ["Leave", "Return"]: stations = starts if type == "Return": stations = ends for ii in stations: d = {} d["station"] = ii d["type"] = type d["lat"], d["long"] = locs[ii] for tt in stations[ii]: d["time"] = tt d["count"] = stations[ii][tt] outfile.writerow(d)