import pytest import qolab.tableflow as tblfl import pandas as pd def test_table_load_noinputs(): assert tblfl.loadInOutTables() == (None, None) assert tblfl.loadInOutTables(inputFileName=None, outputFileName="non_existing_file") == (None, None) def test_wrong_comment_in_table_file_to_load(): with pytest.raises(Exception) as exc_info: # should raise ParserError tblfl.loadInOutTables(inputFileName='tests/tableflow_test_data/tableIn1.csv', outputFileName=None, comment='%') def test_right_comment_in_table_file_to_load(): tIn,tOut = tblfl.loadInOutTables(inputFileName='tests/tableflow_test_data/tableIn1.csv', outputFileName=None, comment='#') assert type(tIn) == pd.core.frame.DataFrame def test_table_equality_with_no_output_file_name(): tIn,tOut = tblfl.loadInOutTables(inputFileName='tests/tableflow_test_data/tableIn1.csv', outputFileName=None, comment='#') assert type(tIn) == pd.core.frame.DataFrame assert type(tOut) == pd.core.frame.DataFrame assert tIn.equals(tOut) col0 = tIn.keys()[0] vBefore = tIn.at[0, col0] tIn.at[0, col0] = vBefore + 1 assert not tIn.equals(tOut) def test_table_load_with_in_out_file_names(): # different filenames, same content for ease of testing tIn,tOut = tblfl.loadInOutTables(inputFileName='tests/tableflow_test_data/tableIn1.csv', outputFileName='tests/tableflow_test_data/tableOut1nonProcessed.csv', comment='#') assert type(tIn) == pd.core.frame.DataFrame assert type(tOut) == pd.core.frame.DataFrame assert tIn.equals(tOut) # different filenames, different content tIn,tOut = tblfl.loadInOutTables(inputFileName='tests/tableflow_test_data/tableIn1.csv', outputFileName='tests/tableflow_test_data/tableOut1pariallyProcessed.csv', comment='#') assert type(tIn) == pd.core.frame.DataFrame assert type(tOut) == pd.core.frame.DataFrame assert not tIn.equals(tOut) assert 'out1' in tOut.columns assert 'out1' not in tIn.columns def test_for_existing_row(): tbl1 = pd.DataFrame( {'a':[1,2,3], 'b':[1,4,6]}) r = pd.Series({'a':2, 'b':4}) assert tblfl.ilocRowOrAdd(tbl1, r) == 1 def test_for_existing_row_with_NA(): # NA in both table and raw should return a hit tbl1 = pd.DataFrame( {'a':[1,2,3], 'b':[1,pd.NA,6]}) r = pd.Series({'a':2, 'b':pd.NA}) assert tblfl.ilocRowOrAdd(tbl1, r) == 1 # should insert new row tbl1 = pd.DataFrame( {'a':[1,2,3], 'b':[1,4,6]}) r = pd.Series({'a':2, 'b':pd.NA}) assert tblfl.ilocRowOrAdd(tbl1, r) == 3 # should insert new row tbl1 = pd.DataFrame( {'a':[1,2,3], 'b':[1,4,6]}) r = pd.Series({'a':2, 'b':pd.NA}) assert tblfl.ilocRowOrAdd(tbl1, r) == 3 def test_for_nonexisting_row_and_its_insertion(): tbl1 = pd.DataFrame( {'a':[1,2,3], 'b':[1,4,6]}) r = pd.Series({'a':2, 'b':10}) assert len(tbl1) == 3 assert tblfl.ilocRowOrAdd(tbl1, r) == 3 assert len(tbl1) == 4 def test_isRedoNeeded(): r = pd.Series({'a':2, 'b':4, 'c':pd.NA}) assert not tblfl.isRedoNeeded(r, ['a','b']) assert tblfl.isRedoNeeded(r, ['c']) assert tblfl.isRedoNeeded(r, ['non_existing']) assert not tblfl.isRedoNeeded(r, ['b', 'c']) def test_reflowTable(): tIn,tOut = tblfl.loadInOutTables(inputFileName='tests/tableflow_test_data/tableIn1.csv', outputFileName='tests/tableflow_test_data/tableOut1pariallyProcessed.csv', comment='#') tOutRef = tOut.copy() # check for warnings with pytest.warns(UserWarning): tblfl.reflowTable(tIn,tOut) with pytest.warns(UserWarning): tblfl.reflowTable(tIn,tOut,postProcessedColums=['dummyName']) def frow(row): return row with pytest.warns(UserWarning): tblfl.reflowTable(tIn,tOut, process_row_func=frow) # now run reflow def frow(row, extraInfo=None): row['out1'] = row['x']*row['x'] return row assert len(tIn) != len(tOut) tblfl.reflowTable(tIn,tOut, process_row_func=frow, postProcessedColums=['out1','out2']) assert len(tIn) == len(tOut) assert (tOut['out1'] == tOut['x']*tOut['x']).all() # check that reflow is done tOut.loc[tOut['x']==1, 'out1'] = pd.NA tblfl.reflowTable(tIn,tOut, process_row_func=frow, postProcessedColums=['out1','out2']) assert (tOut['out1'] == tOut['x']*tOut['x']).all() # check that reflow is not reprocessed tOut.loc[tOut['x']==1, 'out1'] = 12121 # crazy number tblfl.reflowTable(tIn,tOut, process_row_func=frow, postProcessedColums=['out1','out2']) assert (tOut.loc[tOut['x']==1, 'out1'] == 12121).all() # should not change # now we are forcing redo tOut.loc[tOut['x']==1, 'out1'] = 12121 # crazy number tblfl.reflowTable(tIn,tOut, process_row_func=frow, postProcessedColums=['out1','out2'], redo=True) assert not (tOut.loc[tOut['x']==1, 'out1'] == 12121).all() # must not be the same assert (tOut['out1'] == tOut['x']*tOut['x']).all()