template for reflow/postprocessing table function

author: Eugeniy E. Mikhailov <evgmik@gmail.com> 2024-05-28 23:50:32 -0400
committer: Eugeniy E. Mikhailov <evgmik@gmail.com> 2024-05-28 23:50:32 -0400
commit: 593e395eb7d17873ddddd5c57bce35d7d0c30e1d (patch)
tree: 254e14da610f2300572445db1ef4400ddf0ce9e4
parent: a68804ebc415b1642fbbeaf10702880da4ab3c88 (diff)
download: qolab-593e395eb7d17873ddddd5c57bce35d7d0c30e1d.tar.gz
qolab-593e395eb7d17873ddddd5c57bce35d7d0c30e1d.zip
3 files changed, 48 insertions, 1 deletions
diff --git a/qolab/tableflow/__init__.py b/qolab/tableflow/__init__.py
index a83120e..ea380c3 100644
--- a/qolab/tableflow/__init__.py
+++ b/qolab/tableflow/__init__.py
@@ -13,6 +13,7 @@ Super handy for bulk processing data files where only a few parameters changed.
 """
 
 import pandas as pd
+import warnings
 
 def loadInOutTables(inputFileName=None, outputFileName=None, comment=None):
     if not inputFileName:
@@ -52,7 +53,7 @@ def updateTblRowAt(tbl, i, row):
     return
 
 def isRedoNeeded(row, cols2check):
-    # redo is required if all required entries in cols2check are NA
+    # redo is required if *all* required entries in cols2check are NA
     # or we are missing columns in cols2check list
     for c in cols2check:
         if c not in row.keys():
@@ -61,3 +62,29 @@ def isRedoNeeded(row, cols2check):
         return True
     return False
 
+def reflowTable(tIn, tOut, process_row_func=None, postProcessedColums=None, extraInfo=None, redo=False):
+    # update tOut in place based on the inputs specified in tIn
+    # effectively maps unprocess rows in to process_row_func
+    # - postProcessedColums is a list of column names which need to be generated
+    # - extraInfo is dictionary of additional parameter supplied to process_row_func
+    # - process_row_func expected to behave like:
+    #   rowOut = process_row_func(rowIn, extraInfo=userInfo)
+    # - redo controls if reflow is needed unconditionally (i.e. force reflow)
+    if not process_row_func:
+        warnings.warn("process_row_func is not provided, exiting reflowTable")
+        return
+    if not postProcessedColums:
+        warnings.warn("postProcessedColums are not provided, exiting reflowTable")
+        return
+
+    for index, rowIn in tIn.iterrows():
+        iOut = ilocRowOrAdd(tOut, rowIn)
+        rowOutBefore = tOut.iloc[iOut]
+
+        if not (redo or isRedoNeeded(rowOut, postProcessedColums) ):
+            continue
+
+        # processing data describing row
+        rowOut = process_row_func(rowOutBefore, extraInfo=extraInfo)
+        updateTblRowAt(tOut, iOut, rowOut)
+
diff --git a/tests/tableflow_test_data/tableOut1pariallyProcessed.csv b/tests/tableflow_test_data/tableOut1pariallyProcessed.csv
new file mode 100644
index 0000000..250a55e
--- /dev/null
+++ b/tests/tableflow_test_data/tableOut1pariallyProcessed.csv
@@ -0,0 +1,6 @@
+# this is comment line1
+# this is comment line2
+# make sure that the very first column has numbers in it
+x,y,z,out1,out2
+2,3,4,4,9
+
diff --git a/tests/test_tableflow.py b/tests/test_tableflow.py
index 9d6bb3e..0ab8e76 100644
--- a/tests/test_tableflow.py
+++ b/tests/test_tableflow.py
@@ -59,3 +59,17 @@ def test_for_nonexisting_row_and_its_insertion():
     assert tblfl.ilocRowOrAdd(tbl1, r) == 3
     assert len(tbl1) == 4
 
+def test_isRedoNeeded():
+    r = pd.Series({'a':2, 'b':4, 'c':pd.NA})
+    assert not tblfl.isRedoNeeded(r, ['a','b'])
+    assert tblfl.isRedoNeeded(r, ['c'])
+    assert tblfl.isRedoNeeded(r, ['non_existing'])
+    assert not tblfl.isRedoNeeded(r, ['b', 'c'])
+
+def test_reflowTable():
+    tIn,tOut = tblfl.loadInOutTables(inputFileName='tests/tableflow_test_data/tableIn1.csv', outputFileName='tests/tableflow_test_data/tableOut1pariallyProcessed.csv', comment='#')
+    tOutRef = tOut.copy()
+    with pytest.warns(UserWarning):
+        tblfl.reflowTable(tIn,tOut)
+    
+
author	Eugeniy E. Mikhailov <evgmik@gmail.com>	2024-05-28 23:50:32 -0400
committer	Eugeniy E. Mikhailov <evgmik@gmail.com>	2024-05-28 23:50:32 -0400
commit	593e395eb7d17873ddddd5c57bce35d7d0c30e1d (patch)
tree	254e14da610f2300572445db1ef4400ddf0ce9e4
parent	a68804ebc415b1642fbbeaf10702880da4ab3c88 (diff)
download	qolab-593e395eb7d17873ddddd5c57bce35d7d0c30e1d.tar.gz qolab-593e395eb7d17873ddddd5c57bce35d7d0c30e1d.zip