import pytest
import qolab.tableflow as tblfl
import pandas as pd


def test_table_load_noinputs():
    assert tblfl.loadInOutTables() == (None, None)
    assert tblfl.loadInOutTables(
        inputFileName=None, outputFileName="non_existing_file"
    ) == (None, None)


def test_wrong_comment_in_table_file_to_load():
    with pytest.raises(Exception) as exc_info:
        # should raise ParserError
        tblfl.loadInOutTables(
            inputFileName="tests/tableflow_test_data/tableIn1.csv",
            outputFileName=None,
            comment="%",
        )


def test_right_comment_in_table_file_to_load():
    tIn, tOut = tblfl.loadInOutTables(
        inputFileName="tests/tableflow_test_data/tableIn1.csv",
        outputFileName=None,
        comment="#",
    )
    assert type(tIn) == pd.core.frame.DataFrame


def test_table_equality_with_no_output_file_name():
    tIn, tOut = tblfl.loadInOutTables(
        inputFileName="tests/tableflow_test_data/tableIn1.csv",
        outputFileName=None,
        comment="#",
    )
    assert type(tIn) == pd.core.frame.DataFrame
    assert type(tOut) == pd.core.frame.DataFrame
    assert tIn.equals(tOut)
    col0 = tIn.keys()[0]
    vBefore = tIn.at[0, col0]
    tIn.at[0, col0] = vBefore + 1
    assert not tIn.equals(tOut)


def test_table_load_with_in_out_file_names():
    # different filenames, same content for ease of testing
    tIn, tOut = tblfl.loadInOutTables(
        inputFileName="tests/tableflow_test_data/tableIn1.csv",
        outputFileName="tests/tableflow_test_data/tableOut1nonProcessed.csv",
        comment="#",
    )
    assert type(tIn) == pd.core.frame.DataFrame
    assert type(tOut) == pd.core.frame.DataFrame
    assert tIn.equals(tOut)

    # different filenames, different content
    tIn, tOut = tblfl.loadInOutTables(
        inputFileName="tests/tableflow_test_data/tableIn1.csv",
        outputFileName="tests/tableflow_test_data/tableOut1pariallyProcessed.csv",
        comment="#",
    )
    assert type(tIn) == pd.core.frame.DataFrame
    assert type(tOut) == pd.core.frame.DataFrame
    assert not tIn.equals(tOut)
    assert "out1" in tOut.columns
    assert "out1" not in tIn.columns


def test_for_existing_row():
    tbl1 = pd.DataFrame({"a": [1, 2, 3], "b": [1, 4, 6]})
    r = pd.Series({"a": 2, "b": 4})
    assert tblfl.ilocRowOrAdd(tbl1, r) == 1


def test_for_existing_row_with_NA():
    # NA in both table and raw should return a hit
    tbl1 = pd.DataFrame({"a": [1, 2, 3], "b": [1, pd.NA, 6]})
    r = pd.Series({"a": 2, "b": pd.NA})
    assert tblfl.ilocRowOrAdd(tbl1, r) == 1

    # should insert new row
    tbl1 = pd.DataFrame({"a": [1, 2, 3], "b": [1, 4, 6]})
    r = pd.Series({"a": 2, "b": pd.NA})
    assert tblfl.ilocRowOrAdd(tbl1, r) == 3

    # should insert new row
    tbl1 = pd.DataFrame({"a": [1, 2, 3], "b": [1, 4, 6]})
    r = pd.Series({"a": 2, "b": pd.NA})
    assert tblfl.ilocRowOrAdd(tbl1, r) == 3


def test_for_nonexisting_row_and_its_insertion():
    tbl1 = pd.DataFrame({"a": [1, 2, 3], "b": [1, 4, 6]})
    r = pd.Series({"a": 2, "b": 10})
    assert len(tbl1) == 3
    assert tblfl.ilocRowOrAdd(tbl1, r) == 3
    assert len(tbl1) == 4


def test_isRedoNeeded():
    r = pd.Series({"a": 2, "b": 4, "c": pd.NA})
    assert not tblfl.isRedoNeeded(r, ["a", "b"])
    assert tblfl.isRedoNeeded(r, ["c"])
    assert tblfl.isRedoNeeded(r, ["non_existing"])
    assert not tblfl.isRedoNeeded(r, ["b", "c"])


def test_reflowTable():
    tIn, tOut = tblfl.loadInOutTables(
        inputFileName="tests/tableflow_test_data/tableIn1.csv",
        outputFileName="tests/tableflow_test_data/tableOut1pariallyProcessed.csv",
        comment="#",
    )
    tOutRef = tOut.copy()
    # check for warnings
    with pytest.warns(UserWarning):
        tblfl.reflowTable(tIn, tOut)

    with pytest.warns(UserWarning):
        tblfl.reflowTable(tIn, tOut, postProcessedColums=["dummyName"])

    def frow(row):
        return row

    with pytest.warns(UserWarning):
        tblfl.reflowTable(tIn, tOut, process_row_func=frow)

    # now run reflow
    def frow(row, extraInfo=None):
        row["out1"] = row["x"] * row["x"]
        return row

    assert len(tIn) != len(tOut)
    tblfl.reflowTable(
        tIn, tOut, process_row_func=frow, postProcessedColums=["out1", "out2"]
    )
    assert len(tIn) == len(tOut)
    assert (tOut["out1"] == tOut["x"] * tOut["x"]).all()

    # check that reflow is done
    tOut.loc[tOut["x"] == 1, "out1"] = pd.NA
    tblfl.reflowTable(
        tIn, tOut, process_row_func=frow, postProcessedColums=["out1", "out2"]
    )
    assert (tOut["out1"] == tOut["x"] * tOut["x"]).all()

    # check that reflow is not reprocessed
    tOut.loc[tOut["x"] == 1, "out1"] = 12121  # crazy number
    tblfl.reflowTable(
        tIn, tOut, process_row_func=frow, postProcessedColums=["out1", "out2"]
    )
    assert (tOut.loc[tOut["x"] == 1, "out1"] == 12121).all()  # should not change

    # now we are forcing redo
    tOut.loc[tOut["x"] == 1, "out1"] = 12121  # crazy number
    tblfl.reflowTable(
        tIn,
        tOut,
        process_row_func=frow,
        postProcessedColums=["out1", "out2"],
        redo=True,
    )
    assert not (tOut.loc[tOut["x"] == 1, "out1"] == 12121).all()  # must not be the same
    assert (tOut["out1"] == tOut["x"] * tOut["x"]).all()