#!/usr/bin/python3 import sys import pandas as pd import numpy as np import os # infile = 'wa.csv' infile = sys.argv[1] f = open(infile) l = f.readlines() h = l[0] hsub = l[1] # TheExpertTA keep headers in 2 lines (mixed with max possible points) maxPossible = l[1] f.close() # clean up of headers h = h.strip() h = h.replace('"', "") hsub = hsub.strip() hsub = hsub.replace('"', "") hsub = hsub.split(",") headers = h.split(",") # we should fail hard if this column names are not present headers[hsub.index("Last")] = "LastName" headers[hsub.index("First")] = "FirstName" headers[hsub.index("Email")] = "UserName" headers[hsub.index("Student No")] = "SID" # headers[0]='FullName' # headers[1]='UserName' # headers[2]='SID' # headers[3]='TotalPcnt' # headers[4]='TotalScore' d = pd.read_csv(infile, skiprows=[0], header=None, names=headers) # First row contains max points information, so let's reassign it d.loc[0, "UserName"] = "_Max_Points_" d.loc[0, "LastName"] = "MaxScore" d.loc[0, "FirstName"] = "MaxScore" d.loc[0, "SID"] = pd.NA specialUsers = [] specialUsers.append("_Max_Points_") # lets add row which will be in charge of the column type d = pd.concat( [d, pd.DataFrame({"UserName": ["_Col_Category_"]}, columns=d.columns)], ignore_index=True, ) specialUsers.append("_Col_Category_") # cleanup # c = d.columns # c=c.drop(['FullName', 'UserName', 'SID']) # index = d[c] == 'ND' # d[index] = np.nan # index = d[c] == 'NS' # d[index] = np.nan # TheExperTA last column contains 'Averages' per student which we do not need d.drop(columns=["Averages"], inplace=True) # TheExperTA last row contains Averages per assignment which we do not need row = d[ (d["LastName"] == "Averages") & (d["FirstName"].isna()) & (d["UserName"].isna()) ] d.drop(row.index, inplace=True) # hand tuned fixes d["UserName"].replace("@email.wm.edu$", "@wm.edu", regex=True, inplace=True) # d['UserName'].replace('phanng@hotmail.com@tj.va$', 'kphan@wm.edu', regex=True, inplace=True) # Now let's convert percentage which TheExperTA reports to points as GradeTable expects for c in d.columns: if c in ["LastName", "FirstName", "UserName", "SID"]: continue maxP = d.loc[(d["UserName"] == "_Max_Points_")][c].values[0] index = ~d["UserName"].isin(specialUsers) d.loc[index, c] *= maxP / 100 # convert percentage to points # TheExpertTA due to their percentage system generates +/- 0.0000001 scores # which is useless and make very long numbers in tables. # So we are rounding here. d.loc[index, c] = d.loc[index, c].round(4) # now we are trying to guess column category if "hw" in c.lower(): d.loc[(d["UserName"] == "_Col_Category_"), c] = "HomeWork" d.to_csv("TheExpertTA.csv", index=False) # now import to sqlite3 os.popen("rm -f TheExpertTA.db") p = os.popen( 'printf ".mode csv\n.import "TheExpertTA.csv" export_table\n.q" | sqlite3 TheExpertTA.db' ) p.close()