aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugeniy E. Mikhailov <evgmik@gmail.com>2024-06-18 23:53:17 -0400
committerEugeniy E. Mikhailov <evgmik@gmail.com>2024-06-18 23:53:17 -0400
commitc61321af05390ceca865fb816246b7c1bca57a32 (patch)
treecd5b32253bdebc0a956cfd2a855beccca80951a5
parentdf78f1f1e8fc390a039dd76ab892919a15da640c (diff)
downloadqolab-c61321af05390ceca865fb816246b7c1bca57a32.tar.gz
qolab-c61321af05390ceca865fb816246b7c1bca57a32.zip
added ability to read gzip compressed traces
-rw-r--r--qolab/data/trace.py27
-rw-r--r--tests/test_trace.py8
-rw-r--r--tests/trace_test_data/xtrace1.dat.gzbin0 -> 146 bytes
3 files changed, 25 insertions, 10 deletions
diff --git a/qolab/data/trace.py b/qolab/data/trace.py
index c941fd8..7a139b8 100644
--- a/qolab/data/trace.py
+++ b/qolab/data/trace.py
@@ -19,22 +19,29 @@ def from_timestamps_to_dates(timestamps):
def loadTraceRawHeaderAndData(fname):
headerstr=[]
data = None
- with open(fname) as tracefile:
+ # we will try to guess if the file compressed
+ _open = open
+ if fname[-3:] == '.gz':
+ # TODO improve detection: gzip files have first 2 bytes set to b'\x1f\x8b'
+ import gzip
+ _open = gzip.open
+ with _open(fname, mode='rb') as tracefile:
# Reading yaml header prefixed by '% '
# It sits at the top and below is just data in TSV format
while True:
ln = tracefile.readline()
- if ln[0:2]=='% ':
- headerstr.append(ln[2:])
+ if ln[0:2]==b'% ':
+ headerstr.append(ln[2:].decode('utf-8'))
else:
break
- header=yaml.load(str.join('\n', headerstr), Loader=yaml.BaseLoader)
- # now we load the data itself
- # data=np.genfromtxt(fname, comments='%', delimiter='\t')
- # Note: pandas reads csv faster by factor of 8 then numpy.genfromtxt
- # data=pandas.read_csv('/home/evmik/hopping_trace_20220706_02141.dat', comment='%', delimiter='\t', header=None)
- df = pandas.read_csv(fname, comment='%', delimiter='\t', header=None)
- data = df.to_numpy()
+ header=yaml.load(str.join('\n', headerstr), Loader=yaml.BaseLoader)
+ # now we load the data itself
+ # data=np.genfromtxt(fname, comments='%', delimiter='\t')
+ # Note: pandas reads csv faster by factor of 8 then numpy.genfromtxt
+ # data=pandas.read_csv('/home/evmik/hopping_trace_20220706_02141.dat', comment='%', delimiter='\t', header=None)
+ tracefile.seek(0) # rewind file to the beginning
+ df = pandas.read_csv(tracefile, comment='%', delimiter='\t', header=None)
+ data = df.to_numpy()
return(header, data)
def loadTrace(fname):
diff --git a/tests/test_trace.py b/tests/test_trace.py
index 06f43c5..981d1b7 100644
--- a/tests/test_trace.py
+++ b/tests/test_trace.py
@@ -9,3 +9,11 @@ def test_load_uncompressed_v0dot1_trace():
data = tr.getData()
assert np.all( (data - np.array([[1], [3], [2], [5]])) == 0 )
+def test_load_gzip_compressed_v0dot1_trace():
+ tr = loadTrace('tests/trace_test_data/xtrace1.dat.gz')
+ cfg = tr.getConfig()
+ assert cfg['config']['version'] == '0.1'
+ assert cfg['config']['model'] == 'Trace'
+ data = tr.getData()
+ assert np.all( (data - np.array([[1], [3], [2], [5]])) == 0 )
+
diff --git a/tests/trace_test_data/xtrace1.dat.gz b/tests/trace_test_data/xtrace1.dat.gz
new file mode 100644
index 0000000..4b8fa2c
--- /dev/null
+++ b/tests/trace_test_data/xtrace1.dat.gz
Binary files differ