From df5a16c2f4e29bc2ab28a7bb04ba62279da9ca5a Mon Sep 17 00:00:00 2001 From: "Eugeniy E. Mikhailov" Date: Sun, 15 Oct 2023 19:33:55 -0400 Subject: improve table conversion from TheExperTA --- regenTheExpertTA.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/regenTheExpertTA.py b/regenTheExpertTA.py index 335fd7c..81ea437 100755 --- a/regenTheExpertTA.py +++ b/regenTheExpertTA.py @@ -23,7 +23,7 @@ hsub = hsub.replace('"', '') hsub = hsub.split(',') headers = h.split(',') -# we should fail hard if this column namess are not present +# we should fail hard if this column names are not present headers[hsub.index('Last')]='LastName' headers[hsub.index('First')]='FirstName' headers[hsub.index('Email')]='UserName' @@ -35,9 +35,19 @@ headers[hsub.index('Student No')]='SID' # headers[3]='TotalPcnt' # headers[4]='TotalScore' -d = pd.read_csv(infile, skiprows=[0,1], header=None, names=headers) -# d.loc[0, 'FullName']='MaxScore' -# d.loc[0, 'UserName']='MaxScore' +d = pd.read_csv(infile, skiprows=[0], header=None, names=headers) + +# First row contains max points information, so let's reassign it +d.loc[0, 'UserName']='_Max_Points_' +d.loc[0, 'LastName']='MaxScore' +d.loc[0, 'FirstName']='MaxScore' +d.loc[0, 'SID']=pd.NA + +specialUsers=[] +specialUsers.append('_Max_Points_') +# lets add row which will be in charge of the column type +d=pd.concat([d, pd.DataFrame({'UserName': ['_Col_Category_']}, columns=d.columns)], ignore_index=True) +specialUsers.append('_Col_Category_') # cleanup # c = d.columns @@ -56,6 +66,21 @@ d.drop(row.index, inplace=True) # hand tuned fixes # d['UserName'].replace('phanng@hotmail.com@tj.va$', 'kphan@wm.edu', regex=True, inplace=True) +# Now let's convert percentage which TheExperTA reports to points as GradeTable expects +for c in d.columns: + if c in ['LastName', 'FirstName', 'UserName', 'SID']: + continue + maxP = d.loc[(d['UserName'] == '_Max_Points_')][c].values[0] + d.loc[(d['UserName'] == '_Max_Points_')][c] + index = ~d['UserName'].isin( specialUsers ) + d.loc[index,c] *= maxP/100 # convert percentage to points + + # now we are trying to guess column category + if 'hw' in c.lower(): + d.loc[(d['UserName'] == '_Col_Category_'), c] = 'HomeWork' + + + d.to_csv('TheExperTA.csv') # now import to sqlite3 -- cgit v1.2.3