From 70cc272cfc9a08ea45a0bf8f6efdf69e4f606b7f Mon Sep 17 00:00:00 2001
From: "Eugeniy E. Mikhailov" <evgmik@gmail.com>
Date: Wed, 6 Jul 2022 23:47:06 -0400
Subject: drastic speedup of loadTrace

old way of appending line by line to numpy array was super inefficient,
now we read it with numpy 'genfromtxt' which is drastically faster
---
 qolab/data/trace.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'qolab/data/trace.py')

diff --git a/qolab/data/trace.py b/qolab/data/trace.py
index 67d9301..700105a 100644
--- a/qolab/data/trace.py
+++ b/qolab/data/trace.py
@@ -1,6 +1,5 @@
 from qolab.file_utils import save_table_with_header
 import datetime
-import csv
 import numpy as np
 import yaml
 
@@ -19,18 +18,21 @@ def from_timestamps_to_dates(timestamps):
 def loadTraceRawHeaderAndData(fname):
     headerstr=[]
     data = None
-    with open(fname) as csvfile:
-        rowreader = csv.reader(csvfile, delimiter='\t')
-        for row in rowreader:
-            if row[0][0:2]=='% ':
-                headerstr.append(row[0][2:])
+    with open(fname) as tracefile:
+        # Reading yaml header prefixed by '% '
+        # It sits at the top and below is just data in TSV format
+        while True:
+            ln = tracefile.readline()
+            if ln[0:2]=='% ':
+                headerstr.append(ln[2:])
             else:
-                rdata = np.array(row, dtype=float)
-                if data is None:
-                    data=rdata
-                else:
-                    data=np.vstack((data,rdata))
+                break
     header=yaml.load(str.join('\n', headerstr), Loader=yaml.BaseLoader)
+    # now we load the data itself
+    data=np.genfromtxt(fname, comments='%', delimiter='\t')
+    # Note: panda reads csv faster by factor of 8, but it needs column names provided
+    # otherwise it eats the first data line and makes meaningless column names from it
+    # data=panda.read_csv('/home/evmik/hopping_trace_20220706_02141.dat', comment='%', delimiter='\t')
     return(header, data)
 
 def loadTrace(fname):
-- 
cgit v1.2.3