diff options
author | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2024-11-11 09:29:18 -0500 |
---|---|---|
committer | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2024-11-11 09:29:18 -0500 |
commit | fb11e4ced498d2ee0b6c66cfdb4d8259f9698715 (patch) | |
tree | 12145a1ae039d4ae1eabeb2ca58b109ae70f0404 /regenTheExpertTA.py | |
parent | d1892ef5c9a08049a3be73e0e9903c605bfedcd9 (diff) | |
parent | e3ff0c17fe50ef42303f49257b683f6e057ca5c1 (diff) | |
download | GradeBook-master.tar.gz GradeBook-master.zip |
Diffstat (limited to 'regenTheExpertTA.py')
-rwxr-xr-x | regenTheExpertTA.py | 74 |
1 files changed, 40 insertions, 34 deletions
diff --git a/regenTheExpertTA.py b/regenTheExpertTA.py index d0277bc..34c7f8e 100755 --- a/regenTheExpertTA.py +++ b/regenTheExpertTA.py @@ -9,25 +9,25 @@ import os infile = sys.argv[1] f = open(infile) -l= f.readlines() -h= l[0] -hsub=l[1] # TheExpertTA keep headers in 2 lines (mixed with max possible points) -maxPossible=l[1] +l = f.readlines() +h = l[0] +hsub = l[1] # TheExpertTA keep headers in 2 lines (mixed with max possible points) +maxPossible = l[1] f.close() # clean up of headers -h=h.strip() -h = h.replace('"', '') +h = h.strip() +h = h.replace('"', "") hsub = hsub.strip() -hsub = hsub.replace('"', '') -hsub = hsub.split(',') -headers = h.split(',') +hsub = hsub.replace('"', "") +hsub = hsub.split(",") +headers = h.split(",") # we should fail hard if this column names are not present -headers[hsub.index('Last')]='LastName' -headers[hsub.index('First')]='FirstName' -headers[hsub.index('Email')]='UserName' -headers[hsub.index('Student No')]='SID' +headers[hsub.index("Last")] = "LastName" +headers[hsub.index("First")] = "FirstName" +headers[hsub.index("Email")] = "UserName" +headers[hsub.index("Student No")] = "SID" # headers[0]='FullName' # headers[1]='UserName' @@ -38,16 +38,19 @@ headers[hsub.index('Student No')]='SID' d = pd.read_csv(infile, skiprows=[0], header=None, names=headers) # First row contains max points information, so let's reassign it -d.loc[0, 'UserName']='_Max_Points_' -d.loc[0, 'LastName']='MaxScore' -d.loc[0, 'FirstName']='MaxScore' -d.loc[0, 'SID']=pd.NA +d.loc[0, "UserName"] = "_Max_Points_" +d.loc[0, "LastName"] = "MaxScore" +d.loc[0, "FirstName"] = "MaxScore" +d.loc[0, "SID"] = pd.NA -specialUsers=[] -specialUsers.append('_Max_Points_') +specialUsers = [] +specialUsers.append("_Max_Points_") # lets add row which will be in charge of the column type -d=pd.concat([d, pd.DataFrame({'UserName': ['_Col_Category_']}, columns=d.columns)], ignore_index=True) -specialUsers.append('_Col_Category_') +d = pd.concat( + [d, pd.DataFrame({"UserName": ["_Col_Category_"]}, columns=d.columns)], + ignore_index=True, +) +specialUsers.append("_Col_Category_") # cleanup # c = d.columns @@ -58,22 +61,24 @@ specialUsers.append('_Col_Category_') # d[index] = np.nan # TheExperTA last column contains 'Averages' per student which we do not need -d.drop(columns=['Averages'], inplace=True) +d.drop(columns=["Averages"], inplace=True) # TheExperTA last row contains Averages per assignment which we do not need -row = d[(d['LastName']=='Averages') & (d['FirstName'].isna()) & (d['UserName'].isna())] +row = d[ + (d["LastName"] == "Averages") & (d["FirstName"].isna()) & (d["UserName"].isna()) +] d.drop(row.index, inplace=True) # hand tuned fixes -d['UserName'].replace('@email.wm.edu$', '@wm.edu', regex=True, inplace=True) +d["UserName"].replace("@email.wm.edu$", "@wm.edu", regex=True, inplace=True) # d['UserName'].replace('phanng@hotmail.com@tj.va$', 'kphan@wm.edu', regex=True, inplace=True) # Now let's convert percentage which TheExperTA reports to points as GradeTable expects for c in d.columns: - if c in ['LastName', 'FirstName', 'UserName', 'SID']: + if c in ["LastName", "FirstName", "UserName", "SID"]: continue - maxP = d.loc[(d['UserName'] == '_Max_Points_')][c].values[0] - index = ~d['UserName'].isin( specialUsers ) - d.loc[index,c] *= maxP/100 # convert percentage to points + maxP = d.loc[(d["UserName"] == "_Max_Points_")][c].values[0] + index = ~d["UserName"].isin(specialUsers) + d.loc[index, c] *= maxP / 100 # convert percentage to points # TheExpertTA due to their percentage system generates +/- 0.0000001 scores # which is useless and make very long numbers in tables. @@ -81,15 +86,16 @@ for c in d.columns: d.loc[index, c] = d.loc[index, c].round(4) # now we are trying to guess column category - if 'hw' in c.lower(): - d.loc[(d['UserName'] == '_Col_Category_'), c] = 'HomeWork' + if "hw" in c.lower(): + d.loc[(d["UserName"] == "_Col_Category_"), c] = "HomeWork" -d.to_csv('TheExpertTA.csv', index=False) +d.to_csv("TheExpertTA.csv", index=False) # now import to sqlite3 -os.popen('rm -f TheExpertTA.db') -p = os.popen('printf ".mode csv\n.import \"TheExpertTA.csv\" export_table\n.q" | sqlite3 TheExpertTA.db') +os.popen("rm -f TheExpertTA.db") +p = os.popen( + 'printf ".mode csv\n.import "TheExpertTA.csv" export_table\n.q" | sqlite3 TheExpertTA.db' +) p.close() - |