qolab/tableflow/__init__.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

"""
Provide basic method to process data describing tables
Created by Eugeniy E. Mikhailov 2024/05/27

The basic idea that we will have an *input* table
with data description and we (re)generate *output* table
based on the input table with processed rows.

If output table already have processed rows with entries different from NA
such rows are skipped.

Super handy for bulk processing data files where only a few parameters changed.
"""

import pandas as pd

def loadInOutTables(inputFileName=None, outputFileName=None, comment=None):
    if not inputFileName:
        return None, None

    if not comment:
        comment = '#'

    tIn = pd.read_csv(inputFileName, comment=comment)
    tIn.columns = tIn.columns.str.removeprefix(' '); # clean up leading white space in columns names

    try:
        tOut=pd.read_csv(results_file)
    except Exception:
        tOut=tIn.copy(deep=True)

    return tIn, tOut

def ilocRowOrAdd(tbl, row):
    # Find similar 'row' in 'tbl', NA in both set treated as a hit.
    # if similar row not found, insert it.
    tSub = tbl[row.keys()]
    res = (tSub == row) | (tSub.isna() & row.isna() )
    res = res.all(axis=1) # which rows coincide
    if res.any():
        # we have a similar row
        i = res[res].index[0]
    else:
        # we need to create new row since tbl does not has it
        i=len(tbl)
        updateTblRowAt(tbl, i, row)
    return i

def updateTblRowAt(tbl, i, row):
    for k in row.keys():
        tbl.at[i, k] = row[k]
    return

def isRedoNeeded(row, cols2check):
    # redo is required if all required entries in cols2check are NA
    # or we are missing columns in cols2check list
    for c in cols2check:
        if c not in row.keys():
            return True
    if row[cols2check].isna().all():
        return True
    return False