aboutsummaryrefslogtreecommitdiff
path: root/qolab/tableflow
diff options
context:
space:
mode:
authorEugeniy E. Mikhailov <evgmik@gmail.com>2024-05-28 00:26:23 -0400
committerEugeniy E. Mikhailov <evgmik@gmail.com>2024-05-28 00:26:23 -0400
commitec39d0f666a91e73306bba7d809db52747cb6836 (patch)
treee751750f711e16a302e9d689aa3281cf5f771b1a /qolab/tableflow
parentf66c475ed89ab125cff573ffa692e3d29a596fd5 (diff)
downloadqolab-ec39d0f666a91e73306bba7d809db52747cb6836.tar.gz
qolab-ec39d0f666a91e73306bba7d809db52747cb6836.zip
added some preliminary files for table flow
Diffstat (limited to 'qolab/tableflow')
-rw-r--r--qolab/tableflow/__init__.py63
1 files changed, 63 insertions, 0 deletions
diff --git a/qolab/tableflow/__init__.py b/qolab/tableflow/__init__.py
new file mode 100644
index 0000000..a83120e
--- /dev/null
+++ b/qolab/tableflow/__init__.py
@@ -0,0 +1,63 @@
+"""
+Provide basic method to process data describing tables
+Created by Eugeniy E. Mikhailov 2024/05/27
+
+The basic idea that we will have an *input* table
+with data description and we (re)generate *output* table
+based on the input table with processed rows.
+
+If output table already have processed rows with entries different from NA
+such rows are skipped.
+
+Super handy for bulk processing data files where only a few parameters changed.
+"""
+
+import pandas as pd
+
+def loadInOutTables(inputFileName=None, outputFileName=None, comment=None):
+ if not inputFileName:
+ return None, None
+
+ if not comment:
+ comment = '#'
+
+ tIn = pd.read_csv(inputFileName, comment=comment)
+ tIn.columns = tIn.columns.str.removeprefix(' '); # clean up leading white space in columns names
+
+ try:
+ tOut=pd.read_csv(results_file)
+ except Exception:
+ tOut=tIn.copy(deep=True)
+
+ return tIn, tOut
+
+def ilocRowOrAdd(tbl, row):
+ # Find similar 'row' in 'tbl', NA in both set treated as a hit.
+ # if similar row not found, insert it.
+ tSub = tbl[row.keys()]
+ res = (tSub == row) | (tSub.isna() & row.isna() )
+ res = res.all(axis=1) # which rows coincide
+ if res.any():
+ # we have a similar row
+ i = res[res].index[0]
+ else:
+ # we need to create new row since tbl does not has it
+ i=len(tbl)
+ updateTblRowAt(tbl, i, row)
+ return i
+
+def updateTblRowAt(tbl, i, row):
+ for k in row.keys():
+ tbl.at[i, k] = row[k]
+ return
+
+def isRedoNeeded(row, cols2check):
+ # redo is required if all required entries in cols2check are NA
+ # or we are missing columns in cols2check list
+ for c in cols2check:
+ if c not in row.keys():
+ return True
+ if row[cols2check].isna().all():
+ return True
+ return False
+