diff options
author | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2024-07-13 18:56:04 -0400 |
---|---|---|
committer | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2024-07-13 18:56:04 -0400 |
commit | 898a3b65d90b3f4b27757504bf2b405e87179e03 (patch) | |
tree | 690638932cb56382841390d1941c1b10401c1a88 /qolab/tableflow | |
parent | 0a731c74d16343c87b660f0f7908c97c044fe238 (diff) | |
download | qolab-898a3b65d90b3f4b27757504bf2b405e87179e03.tar.gz qolab-898a3b65d90b3f4b27757504bf2b405e87179e03.zip |
black formatter
Diffstat (limited to 'qolab/tableflow')
-rw-r--r-- | qolab/tableflow/__init__.py | 36 |
1 files changed, 24 insertions, 12 deletions
diff --git a/qolab/tableflow/__init__.py b/qolab/tableflow/__init__.py index d185d6f..23e78a9 100644 --- a/qolab/tableflow/__init__.py +++ b/qolab/tableflow/__init__.py @@ -16,6 +16,7 @@ Super handy for bulk processing data files where only a few parameters changed. import pandas as pd import warnings + def loadInOutTables(inputFileName=None, outputFileName=None, comment=None): """Load the input and the output tables from files. @@ -38,42 +39,46 @@ def loadInOutTables(inputFileName=None, outputFileName=None, comment=None): return None, None if not comment: - comment = '#' + comment = "#" tIn = pd.read_csv(inputFileName, comment=comment) - tIn.columns = tIn.columns.str.removeprefix(' '); # clean up leading white space in columns names + tIn.columns = tIn.columns.str.removeprefix(" ") + # clean up leading white space in columns names try: - tOut=pd.read_csv(outputFileName, comment=comment) + tOut = pd.read_csv(outputFileName, comment=comment) except Exception: - tOut=tIn.copy(deep=True) + tOut = tIn.copy(deep=True) return tIn, tOut + def ilocRowOrAdd(tbl, row): """Find a row in a table (`tbl`) similar to a provided `row`. NA in both sets treated as a match. - If similar 'row' not found in the table, insert it. + If similar 'row' not found in the table, insert it. """ tSub = tbl[row.keys()] - res = (tSub == row) | (tSub.isna() & row.isna() ) - res = res.all(axis=1) # which rows coincide + res = (tSub == row) | (tSub.isna() & row.isna()) + res = res.all(axis=1) # which rows coincide if res.any(): # we have a similar row i = res[res].index[0] else: # we need to create new row since tbl does not has it - i=len(tbl) + i = len(tbl) updateTblRowAt(tbl, i, row) return i + def updateTblRowAt(tbl, i, row): """Update row with position 'i' in the table ('tbl') with values from 'row'.""" for k in row.keys(): tbl.at[i, k] = row[k] return + def isRedoNeeded(row, cols2check): """Check is Redo required in a given row. @@ -94,11 +99,19 @@ def isRedoNeeded(row, cols2check): return True return False -def reflowTable(tIn, tOut, process_row_func=None, postProcessedColums=None, extraInfo=None, redo=False): + +def reflowTable( + tIn, + tOut, + process_row_func=None, + postProcessedColums=None, + extraInfo=None, + redo=False, +): """Reflow/update table tOut in place based on the inputs specified in table tIn. Effectively maps unprocessed rows to ``process_row_func``. - + Parameters ========== postProcessedColums : list of strings @@ -125,10 +138,9 @@ def reflowTable(tIn, tOut, process_row_func=None, postProcessedColums=None, extr iOut = ilocRowOrAdd(tOut, rowIn) rowOutBefore = tOut.iloc[iOut] - if not (redo or isRedoNeeded(rowOutBefore, postProcessedColums) ): + if not (redo or isRedoNeeded(rowOutBefore, postProcessedColums)): continue # processing data describing row rowOut = process_row_func(rowOutBefore, extraInfo=extraInfo) updateTblRowAt(tOut, iOut, rowOut) - |