diff options
author | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2022-07-06 23:47:06 -0400 |
---|---|---|
committer | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2022-07-06 23:47:06 -0400 |
commit | 70cc272cfc9a08ea45a0bf8f6efdf69e4f606b7f (patch) | |
tree | 87c4b3d534b6ec1b8d6281eb3207a6c67352455c /qolab/data | |
parent | b7c4c06d86b07e24f1fbdca01234cd37ce6a0ec0 (diff) | |
download | qolab-70cc272cfc9a08ea45a0bf8f6efdf69e4f606b7f.tar.gz qolab-70cc272cfc9a08ea45a0bf8f6efdf69e4f606b7f.zip |
drastic speedup of loadTrace
old way of appending line by line to numpy array was super inefficient,
now we read it with numpy 'genfromtxt' which is drastically faster
Diffstat (limited to 'qolab/data')
-rw-r--r-- | qolab/data/trace.py | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/qolab/data/trace.py b/qolab/data/trace.py index 67d9301..700105a 100644 --- a/qolab/data/trace.py +++ b/qolab/data/trace.py @@ -1,6 +1,5 @@ from qolab.file_utils import save_table_with_header import datetime -import csv import numpy as np import yaml @@ -19,18 +18,21 @@ def from_timestamps_to_dates(timestamps): def loadTraceRawHeaderAndData(fname): headerstr=[] data = None - with open(fname) as csvfile: - rowreader = csv.reader(csvfile, delimiter='\t') - for row in rowreader: - if row[0][0:2]=='% ': - headerstr.append(row[0][2:]) + with open(fname) as tracefile: + # Reading yaml header prefixed by '% ' + # It sits at the top and below is just data in TSV format + while True: + ln = tracefile.readline() + if ln[0:2]=='% ': + headerstr.append(ln[2:]) else: - rdata = np.array(row, dtype=float) - if data is None: - data=rdata - else: - data=np.vstack((data,rdata)) + break header=yaml.load(str.join('\n', headerstr), Loader=yaml.BaseLoader) + # now we load the data itself + data=np.genfromtxt(fname, comments='%', delimiter='\t') + # Note: panda reads csv faster by factor of 8, but it needs column names provided + # otherwise it eats the first data line and makes meaningless column names from it + # data=panda.read_csv('/home/evmik/hopping_trace_20220706_02141.dat', comment='%', delimiter='\t') return(header, data) def loadTrace(fname): |