#!/usr/bin/python3 import sys import pandas as pd import numpy as np import os # infile = 'wa.csv' infile = sys.argv[1] f = open(infile) l = f.readlines() h = l[4] maxPossible = l[6] f.close() # clean up of headers h = h.strip() h = h.replace('"', "") headers = h.split(",") headers[0] = "FullName" headers[1] = "UserName" headers[2] = "SID" headers[3] = "TotalPcnt" headers[4] = "TotalScore" d = pd.read_csv(infile, skiprows=[0, 1, 2, 3, 4, 5, 7, 8], header=None, names=headers) d.loc[0, "FullName"] = "MaxScore" d.loc[0, "UserName"] = "MaxScore" # cleanup c = d.columns c = c.drop(["FullName", "UserName", "SID"]) index = d[c] == "ND" d[index] = np.nan index = d[c] == "NS" d[index] = np.nan d["UserName"].replace("@wm$", "", regex=True, inplace=True) d["UserName"].replace("@email.wm.edu$", "@wm.edu", regex=True, inplace=True) # hand tuned fixes d["UserName"].replace( "phanng@hotmail.com@tj.va$", "kphan@wm.edu", regex=True, inplace=True ) d["UserName"].replace( "chipkd2001@gmail.com$", "ckdangerio@wm.edu", regex=True, inplace=True ) d["UserName"].replace( "eliasarivera@live.com@schs.va$", "earivera@wm.edu", regex=True, inplace=True ) d.to_csv("WebAssign.csv", index=False) # now import to sqlite3 os.popen("rm -f WebAssign.db") p = os.popen( 'printf ".mode csv\n.import "WebAssign.csv" export_table\n.q" | sqlite3 WebAssign.db' ) p.close()