diff options
author | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2024-05-28 00:26:23 -0400 |
---|---|---|
committer | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2024-05-28 00:26:23 -0400 |
commit | ec39d0f666a91e73306bba7d809db52747cb6836 (patch) | |
tree | e751750f711e16a302e9d689aa3281cf5f771b1a /qolab | |
parent | f66c475ed89ab125cff573ffa692e3d29a596fd5 (diff) | |
download | qolab-ec39d0f666a91e73306bba7d809db52747cb6836.tar.gz qolab-ec39d0f666a91e73306bba7d809db52747cb6836.zip |
added some preliminary files for table flow
Diffstat (limited to 'qolab')
-rw-r--r-- | qolab/tableflow/__init__.py | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/qolab/tableflow/__init__.py b/qolab/tableflow/__init__.py new file mode 100644 index 0000000..a83120e --- /dev/null +++ b/qolab/tableflow/__init__.py @@ -0,0 +1,63 @@ +""" +Provide basic method to process data describing tables +Created by Eugeniy E. Mikhailov 2024/05/27 + +The basic idea that we will have an *input* table +with data description and we (re)generate *output* table +based on the input table with processed rows. + +If output table already have processed rows with entries different from NA +such rows are skipped. + +Super handy for bulk processing data files where only a few parameters changed. +""" + +import pandas as pd + +def loadInOutTables(inputFileName=None, outputFileName=None, comment=None): + if not inputFileName: + return None, None + + if not comment: + comment = '#' + + tIn = pd.read_csv(inputFileName, comment=comment) + tIn.columns = tIn.columns.str.removeprefix(' '); # clean up leading white space in columns names + + try: + tOut=pd.read_csv(results_file) + except Exception: + tOut=tIn.copy(deep=True) + + return tIn, tOut + +def ilocRowOrAdd(tbl, row): + # Find similar 'row' in 'tbl', NA in both set treated as a hit. + # if similar row not found, insert it. + tSub = tbl[row.keys()] + res = (tSub == row) | (tSub.isna() & row.isna() ) + res = res.all(axis=1) # which rows coincide + if res.any(): + # we have a similar row + i = res[res].index[0] + else: + # we need to create new row since tbl does not has it + i=len(tbl) + updateTblRowAt(tbl, i, row) + return i + +def updateTblRowAt(tbl, i, row): + for k in row.keys(): + tbl.at[i, k] = row[k] + return + +def isRedoNeeded(row, cols2check): + # redo is required if all required entries in cols2check are NA + # or we are missing columns in cols2check list + for c in cols2check: + if c not in row.keys(): + return True + if row[cols2check].isna().all(): + return True + return False + |