#!/usr/bin/python3 import sys import pandas as pd import numpy as np import os # infile = 'wa.csv' infile = sys.argv[1] f = open(infile) l= f.readlines() h= l[0] hsub=l[1] # TheExpertTA keep headers in 2 lines (mixed with max possible points) maxPossible=l[1] f.close() # clean up of headers h=h.strip() h = h.replace('"', '') hsub = hsub.strip() hsub = hsub.replace('"', '') hsub = hsub.split(',') headers = h.split(',') # we should fail hard if this column names are not present headers[hsub.index('Last')]='LastName' headers[hsub.index('First')]='FirstName' headers[hsub.index('Email')]='UserName' headers[hsub.index('Student No')]='SID' # headers[0]='FullName' # headers[1]='UserName' # headers[2]='SID' # headers[3]='TotalPcnt' # headers[4]='TotalScore' d = pd.read_csv(infile, skiprows=[0], header=None, names=headers) # First row contains max points information, so let's reassign it d.loc[0, 'UserName']='_Max_Points_' d.loc[0, 'LastName']='MaxScore' d.loc[0, 'FirstName']='MaxScore' d.loc[0, 'SID']=pd.NA specialUsers=[] specialUsers.append('_Max_Points_') # lets add row which will be in charge of the column type d=pd.concat([d, pd.DataFrame({'UserName': ['_Col_Category_']}, columns=d.columns)], ignore_index=True) specialUsers.append('_Col_Category_') # cleanup # c = d.columns # c=c.drop(['FullName', 'UserName', 'SID']) # index = d[c] == 'ND' # d[index] = np.nan # index = d[c] == 'NS' # d[index] = np.nan # TheExperTA last column contains 'Averages' per student which we do not need d.drop(columns=['Averages'], inplace=True) # TheExperTA last row contains Averages per assignment which we do not need row = d[(d['LastName']=='Averages') & (d['FirstName'].isna()) & (d['UserName'].isna())] d.drop(row.index, inplace=True) # hand tuned fixes # d['UserName'].replace('phanng@hotmail.com@tj.va$', 'kphan@wm.edu', regex=True, inplace=True) # Now let's convert percentage which TheExperTA reports to points as GradeTable expects for c in d.columns: if c in ['LastName', 'FirstName', 'UserName', 'SID']: continue maxP = d.loc[(d['UserName'] == '_Max_Points_')][c].values[0] d.loc[(d['UserName'] == '_Max_Points_')][c] index = ~d['UserName'].isin( specialUsers ) d.loc[index,c] *= maxP/100 # convert percentage to points # now we are trying to guess column category if 'hw' in c.lower(): d.loc[(d['UserName'] == '_Col_Category_'), c] = 'HomeWork' d.to_csv('TheExperTA.csv', index=False) # now import to sqlite3 os.popen('rm -f TheExperTA.db') p = os.popen('printf ".mode csv\n.import \"TheExperTA.csv\" export_table\n.q" | sqlite3 TheExperTA.db') p.close()