From bbe79343213083a1cc5904731cfd0081a718fad1 Mon Sep 17 00:00:00 2001
From: Harry Bevins <40355093+htjb@users.noreply.github.com>
Date: Tue, 16 Jan 2024 16:23:32 +0000
Subject: [PATCH] Test preprocessing (#26)

* fixing #24

* fixing afb subtraction for test data

* bumping bug fix version number

* division of test data by training standard deviation
---
 README.rst              |  2 +-
 globalemu/preprocess.py | 18 ++++++++----------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/README.rst b/README.rst
index d352f96..9eb7a44 100644
--- a/README.rst
+++ b/README.rst
@@ -7,7 +7,7 @@ Introduction
 
 :globalemu: Robust Global 21-cm Signal Emulation
 :Author: Harry Thomas Jones Bevins
-:Version: 1.8.0
+:Version: 1.8.1
 :Homepage: https://github.com/htjb/globalemu
 :Documentation: https://globalemu.readthedocs.io/
 
diff --git a/globalemu/preprocess.py b/globalemu/preprocess.py
index 31d3168..eb5b046 100644
--- a/globalemu/preprocess.py
+++ b/globalemu/preprocess.py
@@ -73,13 +73,9 @@ class process():
                 data set or not. Set to True by default as this is advised for
                 training both neutral fraction and global signal emulators.
 
-        logs: **list / default: [0, 1, 2]**
+        logs: **list / default: []**
             | The indices corresponding to the astrophysical parameters in
-                "train_data.txt" that need to be logged. The default assumes
-                that the first three columns in "train_data.txt" are
-                :math:`{f_*}` (star formation efficiency),
-                :math:`{V_c}` (minimum virial circular velocity) and
-                :math:`{f_x}` (X-ray efficieny).
+                "train_data.txt" that need to be logged.
     """
 
     def __init__(self, num, z, **kwargs):
@@ -137,7 +133,7 @@ def __init__(self, num, z, **kwargs):
             if type(bool_kwargs[i]) is not bool:
                 raise TypeError(bool_strings[i] + " must be a bool.")
 
-        self.logs = kwargs.pop('logs', [0, 1, 2])
+        self.logs = kwargs.pop('logs', [])
         if type(self.logs) is not list:
             raise TypeError("'logs' must be a list.")
 
@@ -170,7 +166,6 @@ def load_data(file):
             train_data = full_train_data.copy()
             if self.preprocess_settings['AFB'] is True:
                 train_labels = full_train_labels.copy() - res.deltaT
-                test_labels -= res.deltaT
             else:
                 train_labels = full_train_labels.copy()
         else:
@@ -189,10 +184,14 @@ def load_data(file):
                     train_data.append(full_train_data[i, :])
                     if self.preprocess_settings['AFB'] is True:
                         train_labels.append(full_train_labels[i] - res.deltaT)
+                        
                     else:
                         train_labels.append(full_train_labels[i])
             train_data, train_labels = np.array(train_data), \
                 np.array(train_labels)
+        
+        if self.preprocess_settings['AFB'] is True:
+            test_labels = test_labels.copy() - res.deltaT
 
         log_train_data = []
         for i in range(train_data.shape[1]):
@@ -268,9 +267,8 @@ def load_data(file):
             norm_train_labels = norm_train_labels.flatten()
             np.save(self.base_dir + 'labels_stds.npy', labels_stds)
 
-            test_labels_stds = test_labels.std()
             norm_test_labels = [
-                test_labels[i, :]/test_labels_stds
+                test_labels[i, :]/labels_stds
                 for i in range(test_labels.shape[0])]
             norm_test_labels = np.array(norm_test_labels)