From 70cc272cfc9a08ea45a0bf8f6efdf69e4f606b7f Mon Sep 17 00:00:00 2001 From: "Eugeniy E. Mikhailov" Date: Wed, 6 Jul 2022 23:47:06 -0400 Subject: drastic speedup of loadTrace old way of appending line by line to numpy array was super inefficient, now we read it with numpy 'genfromtxt' which is drastically faster --- qolab/data/trace.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'qolab/data/trace.py') diff --git a/qolab/data/trace.py b/qolab/data/trace.py index 67d9301..700105a 100644 --- a/qolab/data/trace.py +++ b/qolab/data/trace.py @@ -1,6 +1,5 @@ from qolab.file_utils import save_table_with_header import datetime -import csv import numpy as np import yaml @@ -19,18 +18,21 @@ def from_timestamps_to_dates(timestamps): def loadTraceRawHeaderAndData(fname): headerstr=[] data = None - with open(fname) as csvfile: - rowreader = csv.reader(csvfile, delimiter='\t') - for row in rowreader: - if row[0][0:2]=='% ': - headerstr.append(row[0][2:]) + with open(fname) as tracefile: + # Reading yaml header prefixed by '% ' + # It sits at the top and below is just data in TSV format + while True: + ln = tracefile.readline() + if ln[0:2]=='% ': + headerstr.append(ln[2:]) else: - rdata = np.array(row, dtype=float) - if data is None: - data=rdata - else: - data=np.vstack((data,rdata)) + break header=yaml.load(str.join('\n', headerstr), Loader=yaml.BaseLoader) + # now we load the data itself + data=np.genfromtxt(fname, comments='%', delimiter='\t') + # Note: panda reads csv faster by factor of 8, but it needs column names provided + # otherwise it eats the first data line and makes meaningless column names from it + # data=panda.read_csv('/home/evmik/hopping_trace_20220706_02141.dat', comment='%', delimiter='\t') return(header, data) def loadTrace(fname): -- cgit v1.2.3