diff options
author | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2024-05-28 23:50:32 -0400 |
---|---|---|
committer | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2024-05-28 23:50:32 -0400 |
commit | 593e395eb7d17873ddddd5c57bce35d7d0c30e1d (patch) | |
tree | 254e14da610f2300572445db1ef4400ddf0ce9e4 | |
parent | a68804ebc415b1642fbbeaf10702880da4ab3c88 (diff) | |
download | qolab-593e395eb7d17873ddddd5c57bce35d7d0c30e1d.tar.gz qolab-593e395eb7d17873ddddd5c57bce35d7d0c30e1d.zip |
template for reflow/postprocessing table function
-rw-r--r-- | qolab/tableflow/__init__.py | 29 | ||||
-rw-r--r-- | tests/tableflow_test_data/tableOut1pariallyProcessed.csv | 6 | ||||
-rw-r--r-- | tests/test_tableflow.py | 14 |
3 files changed, 48 insertions, 1 deletions
diff --git a/qolab/tableflow/__init__.py b/qolab/tableflow/__init__.py index a83120e..ea380c3 100644 --- a/qolab/tableflow/__init__.py +++ b/qolab/tableflow/__init__.py @@ -13,6 +13,7 @@ Super handy for bulk processing data files where only a few parameters changed. """ import pandas as pd +import warnings def loadInOutTables(inputFileName=None, outputFileName=None, comment=None): if not inputFileName: @@ -52,7 +53,7 @@ def updateTblRowAt(tbl, i, row): return def isRedoNeeded(row, cols2check): - # redo is required if all required entries in cols2check are NA + # redo is required if *all* required entries in cols2check are NA # or we are missing columns in cols2check list for c in cols2check: if c not in row.keys(): @@ -61,3 +62,29 @@ def isRedoNeeded(row, cols2check): return True return False +def reflowTable(tIn, tOut, process_row_func=None, postProcessedColums=None, extraInfo=None, redo=False): + # update tOut in place based on the inputs specified in tIn + # effectively maps unprocess rows in to process_row_func + # - postProcessedColums is a list of column names which need to be generated + # - extraInfo is dictionary of additional parameter supplied to process_row_func + # - process_row_func expected to behave like: + # rowOut = process_row_func(rowIn, extraInfo=userInfo) + # - redo controls if reflow is needed unconditionally (i.e. force reflow) + if not process_row_func: + warnings.warn("process_row_func is not provided, exiting reflowTable") + return + if not postProcessedColums: + warnings.warn("postProcessedColums are not provided, exiting reflowTable") + return + + for index, rowIn in tIn.iterrows(): + iOut = ilocRowOrAdd(tOut, rowIn) + rowOutBefore = tOut.iloc[iOut] + + if not (redo or isRedoNeeded(rowOut, postProcessedColums) ): + continue + + # processing data describing row + rowOut = process_row_func(rowOutBefore, extraInfo=extraInfo) + updateTblRowAt(tOut, iOut, rowOut) + diff --git a/tests/tableflow_test_data/tableOut1pariallyProcessed.csv b/tests/tableflow_test_data/tableOut1pariallyProcessed.csv new file mode 100644 index 0000000..250a55e --- /dev/null +++ b/tests/tableflow_test_data/tableOut1pariallyProcessed.csv @@ -0,0 +1,6 @@ +# this is comment line1 +# this is comment line2 +# make sure that the very first column has numbers in it +x,y,z,out1,out2 +2,3,4,4,9 + diff --git a/tests/test_tableflow.py b/tests/test_tableflow.py index 9d6bb3e..0ab8e76 100644 --- a/tests/test_tableflow.py +++ b/tests/test_tableflow.py @@ -59,3 +59,17 @@ def test_for_nonexisting_row_and_its_insertion(): assert tblfl.ilocRowOrAdd(tbl1, r) == 3 assert len(tbl1) == 4 +def test_isRedoNeeded(): + r = pd.Series({'a':2, 'b':4, 'c':pd.NA}) + assert not tblfl.isRedoNeeded(r, ['a','b']) + assert tblfl.isRedoNeeded(r, ['c']) + assert tblfl.isRedoNeeded(r, ['non_existing']) + assert not tblfl.isRedoNeeded(r, ['b', 'c']) + +def test_reflowTable(): + tIn,tOut = tblfl.loadInOutTables(inputFileName='tests/tableflow_test_data/tableIn1.csv', outputFileName='tests/tableflow_test_data/tableOut1pariallyProcessed.csv', comment='#') + tOutRef = tOut.copy() + with pytest.warns(UserWarning): + tblfl.reflowTable(tIn,tOut) + + |