diff options
-rwxr-xr-x | regenTheExpertTA.py | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/regenTheExpertTA.py b/regenTheExpertTA.py new file mode 100755 index 0000000..335fd7c --- /dev/null +++ b/regenTheExpertTA.py @@ -0,0 +1,66 @@ +#!/usr/bin/python3 + +import sys +import pandas as pd +import numpy as np +import os + +# infile = 'wa.csv' +infile = sys.argv[1] + +f = open(infile) +l= f.readlines() +h= l[0] +hsub=l[1] # TheExpertTA keep headers in 2 lines (mixed with max possible points) +maxPossible=l[1] +f.close() + +# clean up of headers +h=h.strip() +h = h.replace('"', '') +hsub = hsub.strip() +hsub = hsub.replace('"', '') +hsub = hsub.split(',') +headers = h.split(',') + +# we should fail hard if this column namess are not present +headers[hsub.index('Last')]='LastName' +headers[hsub.index('First')]='FirstName' +headers[hsub.index('Email')]='UserName' +headers[hsub.index('Student No')]='SID' + +# headers[0]='FullName' +# headers[1]='UserName' +# headers[2]='SID' +# headers[3]='TotalPcnt' +# headers[4]='TotalScore' + +d = pd.read_csv(infile, skiprows=[0,1], header=None, names=headers) +# d.loc[0, 'FullName']='MaxScore' +# d.loc[0, 'UserName']='MaxScore' + +# cleanup +# c = d.columns +# c=c.drop(['FullName', 'UserName', 'SID']) +# index = d[c] == 'ND' +# d[index] = np.nan +# index = d[c] == 'NS' +# d[index] = np.nan + +# TheExperTA last column contains 'Averages' per student which we do not need +d.drop(columns=['Averages'], inplace=True) +# TheExperTA last row contains Averages per assignment which we do not need +row = d[(d['LastName']=='Averages') & (d['FirstName'].isna()) & (d['UserName'].isna())] +d.drop(row.index, inplace=True) + +# hand tuned fixes +# d['UserName'].replace('phanng@hotmail.com@tj.va$', 'kphan@wm.edu', regex=True, inplace=True) + +d.to_csv('TheExperTA.csv') + +# now import to sqlite3 + +os.popen('rm -f TheExperTA.db') +p = os.popen('printf ".mode csv\n.import \"TheExperTA.csv\" export_table\n.q" | sqlite3 TheExperTA.db') +p.close() + |