From df5a16c2f4e29bc2ab28a7bb04ba62279da9ca5a Mon Sep 17 00:00:00 2001
From: "Eugeniy E. Mikhailov" <evgmik@gmail.com>
Date: Sun, 15 Oct 2023 19:33:55 -0400
Subject: improve table conversion from TheExperTA

---
 regenTheExpertTA.py | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/regenTheExpertTA.py b/regenTheExpertTA.py
index 335fd7c..81ea437 100755
--- a/regenTheExpertTA.py
+++ b/regenTheExpertTA.py
@@ -23,7 +23,7 @@ hsub = hsub.replace('"', '')
 hsub = hsub.split(',')
 headers = h.split(',')
 
-# we should fail hard if this column namess are not present
+# we should fail hard if this column names are not present
 headers[hsub.index('Last')]='LastName'
 headers[hsub.index('First')]='FirstName'
 headers[hsub.index('Email')]='UserName'
@@ -35,9 +35,19 @@ headers[hsub.index('Student No')]='SID'
 # headers[3]='TotalPcnt'
 # headers[4]='TotalScore'
 
-d = pd.read_csv(infile, skiprows=[0,1], header=None, names=headers)
-# d.loc[0, 'FullName']='MaxScore'
-# d.loc[0, 'UserName']='MaxScore'
+d = pd.read_csv(infile, skiprows=[0], header=None, names=headers)
+
+# First row contains max points information, so let's reassign it
+d.loc[0, 'UserName']='_Max_Points_'
+d.loc[0, 'LastName']='MaxScore'
+d.loc[0, 'FirstName']='MaxScore'
+d.loc[0, 'SID']=pd.NA
+
+specialUsers=[]
+specialUsers.append('_Max_Points_')
+# lets add row which will be in charge of the column type
+d=pd.concat([d, pd.DataFrame({'UserName': ['_Col_Category_']}, columns=d.columns)], ignore_index=True)
+specialUsers.append('_Col_Category_')
 
 # cleanup
 # c = d.columns
@@ -56,6 +66,21 @@ d.drop(row.index, inplace=True)
 # hand tuned fixes
 # d['UserName'].replace('phanng@hotmail.com@tj.va$', 'kphan@wm.edu', regex=True, inplace=True)
 
+# Now let's convert percentage which TheExperTA reports to points as GradeTable expects
+for c in d.columns:
+    if c in ['LastName', 'FirstName', 'UserName', 'SID']:
+        continue
+    maxP = d.loc[(d['UserName'] == '_Max_Points_')][c].values[0]
+    d.loc[(d['UserName'] == '_Max_Points_')][c]
+    index = ~d['UserName'].isin( specialUsers )
+    d.loc[index,c] *= maxP/100  # convert percentage to points
+
+    # now we are trying to guess column category
+    if 'hw' in c.lower():
+        d.loc[(d['UserName'] == '_Col_Category_'), c] = 'HomeWork'
+    
+
+
 d.to_csv('TheExperTA.csv')
 
 # now import to sqlite3
-- 
cgit v1.2.3