diff options
author | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2022-07-06 23:47:06 -0400 |
---|---|---|
committer | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2022-07-06 23:47:06 -0400 |
commit | 70cc272cfc9a08ea45a0bf8f6efdf69e4f606b7f (patch) | |
tree | 87c4b3d534b6ec1b8d6281eb3207a6c67352455c | |
parent | b7c4c06d86b07e24f1fbdca01234cd37ce6a0ec0 (diff) | |
download | qolab-70cc272cfc9a08ea45a0bf8f6efdf69e4f606b7f.tar.gz qolab-70cc272cfc9a08ea45a0bf8f6efdf69e4f606b7f.zip |
drastic speedup of loadTrace
old way of appending line by line to numpy array was super inefficient,
now we read it with numpy 'genfromtxt' which is drastically faster
-rw-r--r-- | qolab/data/trace.py | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/qolab/data/trace.py b/qolab/data/trace.py index 67d9301..700105a 100644 --- a/qolab/data/trace.py +++ b/qolab/data/trace.py @@ -1,6 +1,5 @@ from qolab.file_utils import save_table_with_header import datetime -import csv import numpy as np import yaml @@ -19,18 +18,21 @@ def from_timestamps_to_dates(timestamps): def loadTraceRawHeaderAndData(fname): headerstr=[] data = None - with open(fname) as csvfile: - rowreader = csv.reader(csvfile, delimiter='\t') - for row in rowreader: - if row[0][0:2]=='% ': - headerstr.append(row[0][2:]) + with open(fname) as tracefile: + # Reading yaml header prefixed by '% ' + # It sits at the top and below is just data in TSV format + while True: + ln = tracefile.readline() + if ln[0:2]=='% ': + headerstr.append(ln[2:]) else: - rdata = np.array(row, dtype=float) - if data is None: - data=rdata - else: - data=np.vstack((data,rdata)) + break header=yaml.load(str.join('\n', headerstr), Loader=yaml.BaseLoader) + # now we load the data itself + data=np.genfromtxt(fname, comments='%', delimiter='\t') + # Note: panda reads csv faster by factor of 8, but it needs column names provided + # otherwise it eats the first data line and makes meaningless column names from it + # data=panda.read_csv('/home/evmik/hopping_trace_20220706_02141.dat', comment='%', delimiter='\t') return(header, data) def loadTrace(fname): |