aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--qolab/tableflow/__init__.py36
1 files changed, 24 insertions, 12 deletions
diff --git a/qolab/tableflow/__init__.py b/qolab/tableflow/__init__.py
index d185d6f..23e78a9 100644
--- a/qolab/tableflow/__init__.py
+++ b/qolab/tableflow/__init__.py
@@ -16,6 +16,7 @@ Super handy for bulk processing data files where only a few parameters changed.
import pandas as pd
import warnings
+
def loadInOutTables(inputFileName=None, outputFileName=None, comment=None):
"""Load the input and the output tables from files.
@@ -38,42 +39,46 @@ def loadInOutTables(inputFileName=None, outputFileName=None, comment=None):
return None, None
if not comment:
- comment = '#'
+ comment = "#"
tIn = pd.read_csv(inputFileName, comment=comment)
- tIn.columns = tIn.columns.str.removeprefix(' '); # clean up leading white space in columns names
+ tIn.columns = tIn.columns.str.removeprefix(" ")
+ # clean up leading white space in columns names
try:
- tOut=pd.read_csv(outputFileName, comment=comment)
+ tOut = pd.read_csv(outputFileName, comment=comment)
except Exception:
- tOut=tIn.copy(deep=True)
+ tOut = tIn.copy(deep=True)
return tIn, tOut
+
def ilocRowOrAdd(tbl, row):
"""Find a row in a table (`tbl`) similar to a provided `row`.
NA in both sets treated as a match.
- If similar 'row' not found in the table, insert it.
+ If similar 'row' not found in the table, insert it.
"""
tSub = tbl[row.keys()]
- res = (tSub == row) | (tSub.isna() & row.isna() )
- res = res.all(axis=1) # which rows coincide
+ res = (tSub == row) | (tSub.isna() & row.isna())
+ res = res.all(axis=1) # which rows coincide
if res.any():
# we have a similar row
i = res[res].index[0]
else:
# we need to create new row since tbl does not has it
- i=len(tbl)
+ i = len(tbl)
updateTblRowAt(tbl, i, row)
return i
+
def updateTblRowAt(tbl, i, row):
"""Update row with position 'i' in the table ('tbl') with values from 'row'."""
for k in row.keys():
tbl.at[i, k] = row[k]
return
+
def isRedoNeeded(row, cols2check):
"""Check is Redo required in a given row.
@@ -94,11 +99,19 @@ def isRedoNeeded(row, cols2check):
return True
return False
-def reflowTable(tIn, tOut, process_row_func=None, postProcessedColums=None, extraInfo=None, redo=False):
+
+def reflowTable(
+ tIn,
+ tOut,
+ process_row_func=None,
+ postProcessedColums=None,
+ extraInfo=None,
+ redo=False,
+):
"""Reflow/update table tOut in place based on the inputs specified in table tIn.
Effectively maps unprocessed rows to ``process_row_func``.
-
+
Parameters
==========
postProcessedColums : list of strings
@@ -125,10 +138,9 @@ def reflowTable(tIn, tOut, process_row_func=None, postProcessedColums=None, extr
iOut = ilocRowOrAdd(tOut, rowIn)
rowOutBefore = tOut.iloc[iOut]
- if not (redo or isRedoNeeded(rowOutBefore, postProcessedColums) ):
+ if not (redo or isRedoNeeded(rowOutBefore, postProcessedColums)):
continue
# processing data describing row
rowOut = process_row_func(rowOutBefore, extraInfo=extraInfo)
updateTblRowAt(tOut, iOut, rowOut)
-