added some preliminary files for table flow

author: Eugeniy E. Mikhailov <evgmik@gmail.com> 2024-05-28 00:26:23 -0400
committer: Eugeniy E. Mikhailov <evgmik@gmail.com> 2024-05-28 00:26:23 -0400
commit: ec39d0f666a91e73306bba7d809db52747cb6836 (patch)
tree: e751750f711e16a302e9d689aa3281cf5f771b1a /qolab/tableflow
parent: f66c475ed89ab125cff573ffa692e3d29a596fd5 (diff)
download: qolab-ec39d0f666a91e73306bba7d809db52747cb6836.tar.gz
qolab-ec39d0f666a91e73306bba7d809db52747cb6836.zip
1 files changed, 63 insertions, 0 deletions
diff --git a/qolab/tableflow/__init__.py b/qolab/tableflow/__init__.py
new file mode 100644
index 0000000..a83120e
--- /dev/null
+++ b/qolab/tableflow/__init__.py
@@ -0,0 +1,63 @@
+"""
+Provide basic method to process data describing tables
+Created by Eugeniy E. Mikhailov 2024/05/27
+
+The basic idea that we will have an *input* table
+with data description and we (re)generate *output* table
+based on the input table with processed rows.
+
+If output table already have processed rows with entries different from NA
+such rows are skipped.
+
+Super handy for bulk processing data files where only a few parameters changed.
+"""
+
+import pandas as pd
+
+def loadInOutTables(inputFileName=None, outputFileName=None, comment=None):
+    if not inputFileName:
+        return None, None
+
+    if not comment:
+        comment = '#'
+
+    tIn = pd.read_csv(inputFileName, comment=comment)
+    tIn.columns = tIn.columns.str.removeprefix(' '); # clean up leading white space in columns names
+
+    try:
+        tOut=pd.read_csv(results_file)
+    except Exception:
+        tOut=tIn.copy(deep=True)
+
+    return tIn, tOut
+
+def ilocRowOrAdd(tbl, row):
+    # Find similar 'row' in 'tbl', NA in both set treated as a hit.
+    # if similar row not found, insert it.
+    tSub = tbl[row.keys()]
+    res = (tSub == row) | (tSub.isna() & row.isna() )
+    res = res.all(axis=1) # which rows coincide
+    if res.any():
+        # we have a similar row
+        i = res[res].index[0]
+    else:
+        # we need to create new row since tbl does not has it
+        i=len(tbl)
+        updateTblRowAt(tbl, i, row)
+    return i
+
+def updateTblRowAt(tbl, i, row):
+    for k in row.keys():
+        tbl.at[i, k] = row[k]
+    return
+
+def isRedoNeeded(row, cols2check):
+    # redo is required if all required entries in cols2check are NA
+    # or we are missing columns in cols2check list
+    for c in cols2check:
+        if c not in row.keys():
+            return True
+    if row[cols2check].isna().all():
+        return True
+    return False
+
author	Eugeniy E. Mikhailov <evgmik@gmail.com>	2024-05-28 00:26:23 -0400
committer	Eugeniy E. Mikhailov <evgmik@gmail.com>	2024-05-28 00:26:23 -0400
commit	ec39d0f666a91e73306bba7d809db52747cb6836 (patch)
tree	e751750f711e16a302e9d689aa3281cf5f771b1a /qolab/tableflow
parent	f66c475ed89ab125cff573ffa692e3d29a596fd5 (diff)
download	qolab-ec39d0f666a91e73306bba7d809db52747cb6836.tar.gz qolab-ec39d0f666a91e73306bba7d809db52747cb6836.zip