diff options
Diffstat (limited to 'qolab/tableflow/__init__.py')
-rw-r--r-- | qolab/tableflow/__init__.py | 53 |
1 files changed, 43 insertions, 10 deletions
diff --git a/qolab/tableflow/__init__.py b/qolab/tableflow/__init__.py index ab86dbc..4c81a34 100644 --- a/qolab/tableflow/__init__.py +++ b/qolab/tableflow/__init__.py @@ -17,7 +17,23 @@ import pandas as pd import warnings def loadInOutTables(inputFileName=None, outputFileName=None, comment=None): - """Load input table from a file and if exist output table too, otherwise clone input table to the output one.""" + """Load the input and the output tables from files. + + The output table loaded only if the corresponding file exists. + Otherwise it is a clone of the input table. + + Parameters + ========== + inputFileName : path or string + Path to the input table filename. If this file does not exists, + return None for both tables. + outputFileName : path or string or None + Path to the outpyt table filename. If such file does not exit, + clone the input table to the output one. + comment : string or None (default) + String which indicates a comment in the input `csv` file. + Usually it is either '#' or '%'. If set to None, internally changed to '#'. + """ if not inputFileName: return None, None @@ -35,7 +51,7 @@ def loadInOutTables(inputFileName=None, outputFileName=None, comment=None): return tIn, tOut def ilocRowOrAdd(tbl, row): - """Find a row in a table ('tbl') similar to a provided 'row' in 'tbl'. + """Find a row in a table (`tbl`) similar to a provided `row`. NA in both sets treated as a match. If similar 'row' not found in the table, insert it. @@ -63,6 +79,13 @@ def isRedoNeeded(row, cols2check): Redo is required if *all* required entries in 'cols2check' are NA or we are missing columns in cols2check list + + Parameters + ========== + row: pandas row + row to perform check on + cols2check: list of strings + List of strings with column names which considered as generated outputs. """ for c in cols2check: if c not in row.keys(): @@ -72,14 +95,24 @@ def isRedoNeeded(row, cols2check): return False def reflowTable(tIn, tOut, process_row_func=None, postProcessedColums=None, extraInfo=None, redo=False): - """Reflow/update tOut in place based on the inputs specified in tIn. - - Effectively maps unprocess rows to 'process_row_func' - - postProcessedColums is a list of column names which need to be generated - - extraInfo is dictionary of additional parameter supplied to process_row_func - - process_row_func expected to behave like: - rowOut = process_row_func(rowIn, extraInfo=userInfo) - - redo controls if reflow is needed unconditionally (i.e. force reflow) + """Reflow/update table tOut in place based on the inputs specified in table tIn. + + Effectively maps unprocessed rows to 'process_row_func'. + + Parameters + ========== + postProcessedColums : list of strings + List of column names which need to be generated + extraInfo : dictionary (optional) + Dictionary of additional parameter supplied to `process_row_func` + process_row_func : function + Function which will take a row from the input table and generate + row with post processed entries (columns). + Expected to have signature like: + rowOut = process_row_func(rowIn, extraInfo=userInfo) + redo : True or False (default) + Flag indicating if reflow is needed unconditionally + (i.e. True forces reflow of all entries). """ if not process_row_func: warnings.warn("process_row_func is not provided, exiting reflowTable") |