diff options
-rw-r--r-- | qolab/data/trace.py | 27 | ||||
-rw-r--r-- | tests/test_trace.py | 8 | ||||
-rw-r--r-- | tests/trace_test_data/xtrace1.dat.gz | bin | 0 -> 146 bytes |
3 files changed, 25 insertions, 10 deletions
diff --git a/qolab/data/trace.py b/qolab/data/trace.py index c941fd8..7a139b8 100644 --- a/qolab/data/trace.py +++ b/qolab/data/trace.py @@ -19,22 +19,29 @@ def from_timestamps_to_dates(timestamps): def loadTraceRawHeaderAndData(fname): headerstr=[] data = None - with open(fname) as tracefile: + # we will try to guess if the file compressed + _open = open + if fname[-3:] == '.gz': + # TODO improve detection: gzip files have first 2 bytes set to b'\x1f\x8b' + import gzip + _open = gzip.open + with _open(fname, mode='rb') as tracefile: # Reading yaml header prefixed by '% ' # It sits at the top and below is just data in TSV format while True: ln = tracefile.readline() - if ln[0:2]=='% ': - headerstr.append(ln[2:]) + if ln[0:2]==b'% ': + headerstr.append(ln[2:].decode('utf-8')) else: break - header=yaml.load(str.join('\n', headerstr), Loader=yaml.BaseLoader) - # now we load the data itself - # data=np.genfromtxt(fname, comments='%', delimiter='\t') - # Note: pandas reads csv faster by factor of 8 then numpy.genfromtxt - # data=pandas.read_csv('/home/evmik/hopping_trace_20220706_02141.dat', comment='%', delimiter='\t', header=None) - df = pandas.read_csv(fname, comment='%', delimiter='\t', header=None) - data = df.to_numpy() + header=yaml.load(str.join('\n', headerstr), Loader=yaml.BaseLoader) + # now we load the data itself + # data=np.genfromtxt(fname, comments='%', delimiter='\t') + # Note: pandas reads csv faster by factor of 8 then numpy.genfromtxt + # data=pandas.read_csv('/home/evmik/hopping_trace_20220706_02141.dat', comment='%', delimiter='\t', header=None) + tracefile.seek(0) # rewind file to the beginning + df = pandas.read_csv(tracefile, comment='%', delimiter='\t', header=None) + data = df.to_numpy() return(header, data) def loadTrace(fname): diff --git a/tests/test_trace.py b/tests/test_trace.py index 06f43c5..981d1b7 100644 --- a/tests/test_trace.py +++ b/tests/test_trace.py @@ -9,3 +9,11 @@ def test_load_uncompressed_v0dot1_trace(): data = tr.getData() assert np.all( (data - np.array([[1], [3], [2], [5]])) == 0 ) +def test_load_gzip_compressed_v0dot1_trace(): + tr = loadTrace('tests/trace_test_data/xtrace1.dat.gz') + cfg = tr.getConfig() + assert cfg['config']['version'] == '0.1' + assert cfg['config']['model'] == 'Trace' + data = tr.getData() + assert np.all( (data - np.array([[1], [3], [2], [5]])) == 0 ) + diff --git a/tests/trace_test_data/xtrace1.dat.gz b/tests/trace_test_data/xtrace1.dat.gz Binary files differnew file mode 100644 index 0000000..4b8fa2c --- /dev/null +++ b/tests/trace_test_data/xtrace1.dat.gz |