aboutsummaryrefslogtreecommitdiff
path: root/qolab/tableflow
diff options
context:
space:
mode:
authorEugeniy E. Mikhailov <evgmik@gmail.com>2024-05-28 23:50:32 -0400
committerEugeniy E. Mikhailov <evgmik@gmail.com>2024-05-28 23:50:32 -0400
commit593e395eb7d17873ddddd5c57bce35d7d0c30e1d (patch)
tree254e14da610f2300572445db1ef4400ddf0ce9e4 /qolab/tableflow
parenta68804ebc415b1642fbbeaf10702880da4ab3c88 (diff)
downloadqolab-593e395eb7d17873ddddd5c57bce35d7d0c30e1d.tar.gz
qolab-593e395eb7d17873ddddd5c57bce35d7d0c30e1d.zip
template for reflow/postprocessing table function
Diffstat (limited to 'qolab/tableflow')
-rw-r--r--qolab/tableflow/__init__.py29
1 files changed, 28 insertions, 1 deletions
diff --git a/qolab/tableflow/__init__.py b/qolab/tableflow/__init__.py
index a83120e..ea380c3 100644
--- a/qolab/tableflow/__init__.py
+++ b/qolab/tableflow/__init__.py
@@ -13,6 +13,7 @@ Super handy for bulk processing data files where only a few parameters changed.
"""
import pandas as pd
+import warnings
def loadInOutTables(inputFileName=None, outputFileName=None, comment=None):
if not inputFileName:
@@ -52,7 +53,7 @@ def updateTblRowAt(tbl, i, row):
return
def isRedoNeeded(row, cols2check):
- # redo is required if all required entries in cols2check are NA
+ # redo is required if *all* required entries in cols2check are NA
# or we are missing columns in cols2check list
for c in cols2check:
if c not in row.keys():
@@ -61,3 +62,29 @@ def isRedoNeeded(row, cols2check):
return True
return False
+def reflowTable(tIn, tOut, process_row_func=None, postProcessedColums=None, extraInfo=None, redo=False):
+ # update tOut in place based on the inputs specified in tIn
+ # effectively maps unprocess rows in to process_row_func
+ # - postProcessedColums is a list of column names which need to be generated
+ # - extraInfo is dictionary of additional parameter supplied to process_row_func
+ # - process_row_func expected to behave like:
+ # rowOut = process_row_func(rowIn, extraInfo=userInfo)
+ # - redo controls if reflow is needed unconditionally (i.e. force reflow)
+ if not process_row_func:
+ warnings.warn("process_row_func is not provided, exiting reflowTable")
+ return
+ if not postProcessedColums:
+ warnings.warn("postProcessedColums are not provided, exiting reflowTable")
+ return
+
+ for index, rowIn in tIn.iterrows():
+ iOut = ilocRowOrAdd(tOut, rowIn)
+ rowOutBefore = tOut.iloc[iOut]
+
+ if not (redo or isRedoNeeded(rowOut, postProcessedColums) ):
+ continue
+
+ # processing data describing row
+ rowOut = process_row_func(rowOutBefore, extraInfo=extraInfo)
+ updateTblRowAt(tOut, iOut, rowOut)
+