Skip to content

Commit

Permalink
Keeminlee (#11)
Browse files Browse the repository at this point in the history
* added verbose flag to reduce print statements

* merging

* more verbose print removal

* more, more verbose print removal

* outputs nll values

* zscore threshold set

* eks scalar covariance inflation, initial pytest setup

* removed SLEAP fish workaround

* merge

* added posterior var to eks output csvs

* ens var dynamic update fix

* merge

* removed debug prints

* fixed zscore indexing

* removed debug print for covariance scaling

* flake8

* pytests for core functions WIP

* pytests and refactoring for cleaner file i/o

* Delete scripts/plotting_aeks.py

* Delete tests/run_tests.py

* added comment for E_blocks
  • Loading branch information
keeminlee authored Nov 20, 2024
1 parent d601c1a commit 33ebba0
Show file tree
Hide file tree
Showing 12 changed files with 1,239 additions and 147 deletions.
6 changes: 5 additions & 1 deletion eks/command_line_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,14 @@ def handle_parse_args(script_type):
parser = argparse.ArgumentParser()
parser.add_argument(
'--input-dir',
required=True,
help='directory of model prediction csv files',
type=str,
)
parser.add_argument(
'--input-files',
help='list model prediction csv files in various directories',
nargs='+'
)
parser.add_argument(
'--save-dir',
help='save directory for outputs (default is input-dir)',
Expand Down
3 changes: 2 additions & 1 deletion eks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,8 @@ def compute_covariance_matrix(ensemble_preds):
# Index covariance matrix into blocks for each keypoint
cov_mats = []
for i in range(n_keypoints):
E_block = extract_submatrix(E, i)
# E_block = extract_submatrix(E, i) -- using E_block instead of the identity matrix
# leads to a correlated dynamics model, but further debugging required due to negative vars
cov_mats.append([[1, 0], [0, 1]])
cov_mats = jnp.array(cov_mats)
return cov_mats
Expand Down
37 changes: 36 additions & 1 deletion eks/ibl_pupil_smoother.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from scipy.optimize import minimize

from eks.core import backward_pass, compute_nll, eks_zscore, ensemble, forward_pass
from eks.utils import crop_frames, make_dlc_pandas_index
from eks.utils import crop_frames, make_dlc_pandas_index, format_data


# -----------------------
Expand Down Expand Up @@ -79,6 +79,41 @@ def add_mean_to_array(pred_arr, keys, mean_x, mean_y):
return processed_arr_dict


def fit_eks_pupil(input_source, data_type, save_dir, smooth_params, s_frames):
"""
Wrapper function to fit the Ensemble Kalman Smoother for the ibl-pupil dataset.
Args:
input_source (str or list): Directory path or list of input CSV files.
data_type (str): Type of data (e.g., 'csv', 'slp').
save_dir (str): Directory to save outputs.
smooth_params (list): List containing diameter_s and com_s.
s_frames (list or None): Frames for automatic optimization if needed.
Returns:
df_dicts (dict): Dictionary containing smoothed DataFrames.
smooth_params (list): Final smoothing parameters used.
input_dfs_list (list): List of input DataFrames.
keypoint_names (list): List of keypoint names.
nll_values (list): List of NLL values.
"""
# Load and format input files
input_dfs_list, output_df, keypoint_names = format_data(input_source, data_type)

print(f"Input data loaded for keypoints: {keypoint_names}")

# Run the ensemble Kalman smoother
df_dicts, smooth_params, nll_values = ensemble_kalman_smoother_ibl_pupil(
markers_list=input_dfs_list,
keypoint_names=keypoint_names,
tracker_name='ensemble-kalman_tracker',
smooth_params=smooth_params,
s_frames=s_frames
)

return df_dicts, smooth_params, input_dfs_list, keypoint_names, nll_values


def ensemble_kalman_smoother_ibl_pupil(
markers_list,
keypoint_names,
Expand Down
6 changes: 4 additions & 2 deletions eks/multicam_smoother.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def ensemble_kalman_smoother_multicam(
# final cleanup
# --------------------------------------
pdindex = make_dlc_pandas_index([keypoint_ensemble],
labels=["x", "y", "likelihood", "x_var", "y_var", "zscore"])
labels=["x", "y", "likelihood", "x_var", "y_var", "zscore", "nll", "ensemble_std"])
camera_indices = []
for camera in range(num_cameras):
camera_indices.append([camera * 2, camera * 2 + 1])
Expand All @@ -180,7 +180,7 @@ def ensemble_kalman_smoother_multicam(
y_m_smooth.T[camera_indices[camera][1]] + means_camera[camera_indices[camera][1]]
# compute zscore for EKS to see how it deviates from the ensemble
eks_predictions = np.asarray([eks_pred_x, eks_pred_y]).T
zscore, _ = eks_zscore(
zscore, ensemble_std = eks_zscore(
eks_predictions, cam_ensemble_preds[camera], cam_ensemble_vars[camera],
min_ensemble_std=zscore_threshold)
pred_arr = np.vstack([
Expand All @@ -190,6 +190,8 @@ def ensemble_kalman_smoother_multicam(
y_v_smooth[:, camera_indices[camera][0], camera_indices[camera][0]],
y_v_smooth[:, camera_indices[camera][1], camera_indices[camera][1]],
zscore,
nll_values,
ensemble_std
]).T
camera_dfs[camera_name + '_df'] = pd.DataFrame(pred_arr, columns=pdindex)
return camera_dfs, smooth_param_final, nll_values
Expand Down
73 changes: 71 additions & 2 deletions eks/singlecam_smoother.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from functools import partial

import os
import jax
import jax.numpy as jnp
import numpy as np
Expand All @@ -17,7 +17,76 @@
jax_forward_pass_nlls,
pkf_and_loss,
)
from eks.utils import crop_frames, make_dlc_pandas_index
from eks.utils import crop_frames, make_dlc_pandas_index, format_data, populate_output_dataframe


def fit_eks_singlecam(input_source, data_type, save_dir, save_filename, bodypart_list, s, s_frames,
blocks, verbose):
"""
Function to fit the Ensemble Kalman Smoother for single-camera data.
Args:
input_source (str or list): Directory path or list of CSV file paths.
data_type (str): Type of data (e.g., 'csv', 'slp').
save_dir (str): Directory to save outputs.
save_filename (str): Name of the output file.
bodypart_list (list): List of body parts to analyze.
s (float or None): Smoothing factor.
s_frames (list or None): Frames for automatic optimization if s is not provided.
blocks (int): Number of blocks for processing.
verbose (bool): If True, enables verbose output.
Returns:
output_df (DataFrame): DataFrame containing the smoothed results.
s_finals (list): List of optimized smoothing factors for each keypoint.
input_dfs (list): List of input DataFrames for plotting.
bodypart_list (list): List of body parts used.
"""
# Load and format input files using the unified format_data function
input_dfs, output_df, keypoint_names = format_data(input_source, data_type)

if bodypart_list is None:
bodypart_list = keypoint_names
print(f'Input data has been read in for the following keypoints:\n{bodypart_list}')

# Convert list of DataFrames to a 3D NumPy array
data_arrays = [df.to_numpy() for df in input_dfs]
markers_3d_array = np.stack(data_arrays, axis=0)

# Map keypoint names to indices and crop markers_3d_array
keypoint_is = {}
keys = []
for i, col in enumerate(input_dfs[0].columns):
keypoint_is[col] = i
for part in bodypart_list:
keys.append(keypoint_is[part + '_x'])
keys.append(keypoint_is[part + '_y'])
keys.append(keypoint_is[part + '_likelihood'])
key_cols = np.array(keys)
markers_3d_array = markers_3d_array[:, :, key_cols]

# Call the smoother function
df_dicts, s_finals = ensemble_kalman_smoother_singlecam(
markers_3d_array,
bodypart_list,
s,
s_frames,
blocks,
verbose=verbose
)

# Save eks results in new DataFrames and .csv output files
keypoint_i = -1 # keypoint to be plotted
for k in range(len(bodypart_list)):
df = df_dicts[k][bodypart_list[k] + '_df']
output_df = populate_output_dataframe(df, bodypart_list[k], output_df)

# Save the output DataFrame to CSV
save_filename = save_filename or f'singlecam_{s_finals[keypoint_i]}.csv'
output_df.to_csv(os.path.join(save_dir, save_filename))
print("DataFrames successfully converted to CSV")

return output_df, s_finals, input_dfs, bodypart_list


def ensemble_kalman_smoother_singlecam(
Expand Down
63 changes: 43 additions & 20 deletions eks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,40 +81,63 @@ def convert_slp_dlc(base_dir, slp_file):
return df


def format_data(input_dir, data_type):
input_files = os.listdir(input_dir)
def format_data(input_source, data_type):
"""
Load and format input files from a directory or a list of file paths.
Args:
input_source (str or list): Directory path or list of file paths.
data_type (str): Type of data (e.g., 'csv', 'slp').
Returns:
input_dfs_list (list): List of formatted DataFrames.
output_df (DataFrame): Empty DataFrame for storing results.
keypoint_names (list): List of keypoint names.
"""
input_dfs_list = []
# Extracting markers from data
# Applies correct format conversion and stores each file's markers in a list
for input_file in input_files:
keypoint_names = None

# Determine if input_source is a directory or a list of file paths
if isinstance(input_source, str) and os.path.isdir(input_source):
# If it's a directory, list all files in the directory
input_files = os.listdir(input_source)
file_paths = [os.path.join(input_source, file) for file in input_files]
elif isinstance(input_source, list):
# If it's a list of file paths, use it directly
file_paths = input_source
else:
raise ValueError("input_source must be a directory path or a list of file paths")

if data_type == 'slp':
if not input_file.endswith('.slp'):
continue
markers_curr = convert_slp_dlc(input_dir, input_file)
# Process each file based on the data type
for file_path in file_paths:
if data_type == 'slp' and file_path.endswith('.slp'):
markers_curr = convert_slp_dlc(os.path.dirname(file_path), os.path.basename(file_path))
keypoint_names = [c[1] for c in markers_curr.columns[::3]]
markers_curr_fmt = markers_curr
elif data_type == 'lp' or 'dlc':
if not input_file.endswith('csv'):
continue
markers_curr = pd.read_csv(
os.path.join(input_dir, input_file), header=[0, 1, 2], index_col=0)

elif data_type in ['lp', 'dlc'] and file_path.endswith('.csv'):
markers_curr = pd.read_csv(file_path, header=[0, 1, 2], index_col=0)
keypoint_names = [c[1] for c in markers_curr.columns[::3]]
model_name = markers_curr.columns[0][0]

if data_type == 'lp':
markers_curr_fmt = convert_lp_dlc(
markers_curr, keypoint_names, model_name=model_name)
markers_curr_fmt = convert_lp_dlc(markers_curr, keypoint_names,
model_name=model_name)
else:
markers_curr_fmt = markers_curr

# markers_curr_fmt.to_csv('fmt_input.csv', index=False)
else:
continue

input_dfs_list.append(markers_curr_fmt)

# Check if we found any valid input files
if len(input_dfs_list) == 0:
raise FileNotFoundError(f'No marker input files found in {input_dir}')
raise FileNotFoundError(f'No valid marker input files found in {input_source}')

# Create an empty output DataFrame using the last processed DataFrame as a template
output_df = make_output_dataframe(input_dfs_list[0])

output_df = make_output_dataframe(markers_curr)
# returns both the formatted marker data and the empty dataframe for EKS output
return input_dfs_list, output_df, keypoint_names


Expand Down
84 changes: 44 additions & 40 deletions scripts/ibl_pupil_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,56 @@
import os

from eks.command_line_args import handle_io, handle_parse_args
from eks.ibl_pupil_smoother import ensemble_kalman_smoother_ibl_pupil
from eks.ibl_pupil_smoother import fit_eks_pupil
from eks.utils import format_data, plot_results

# Collect User-Provided Args
# Collect User-Provided Arguments
smoother_type = 'pupil'
args = handle_parse_args(smoother_type)
input_dir = os.path.abspath(args.input_dir)
data_type = args.data_type # Note: LP and DLC are .csv, SLP is .slp
save_dir = handle_io(input_dir, args.save_dir) # defaults to outputs\

# Determine input source (directory or list of files)
input_source = args.input_dir if isinstance(args.input_dir, str) else args.input_files
data_type = args.data_type # LP and DLC are .csv, SLP is .slp

# Set up the save directory
if isinstance(input_source, str):
input_dir = os.path.abspath(input_source)
else:
input_dir = os.path.abspath(os.path.dirname(input_source[0]))
save_dir = handle_io(input_dir, args.save_dir)
save_filename = args.save_filename
diameter_s = args.diameter_s # defaults to automatic optimization
com_s = args.com_s # defaults to automatic optimization
s_frames = args.s_frames # frames to be used for automatic optimization (only if no --s flag)

# Load and format input files and prepare an empty DataFrame for output.
input_dfs_list, output_df, keypoint_names = format_data(input_dir, data_type)

# run eks
df_dicts, smooth_params, nll_values = ensemble_kalman_smoother_ibl_pupil(
markers_list=input_dfs_list,
keypoint_names=keypoint_names,
tracker_name='ensemble-kalman_tracker',

# Parameters for smoothing
diameter_s = args.diameter_s
com_s = args.com_s
s_frames = args.s_frames

# Run the smoothing function
df_dicts, smooth_params, input_dfs_list, keypoint_names, nll_values = fit_eks_pupil(
input_source=input_source,
data_type=data_type,
save_dir=save_dir,
smooth_params=[diameter_s, com_s],
s_frames=s_frames
)

save_file = os.path.join(save_dir, 'kalman_smoothed_pupil_traces.csv')
print(f'saving smoothed predictions to {save_file}')
df_dicts['markers_df'].to_csv(save_file)

save_file = os.path.join(save_dir, 'kalman_smoothed_latents.csv')
print(f'saving latents to {save_file}')
df_dicts['latents_df'].to_csv(save_file)


# ---------------------------------------------
# plot results
# ---------------------------------------------

# plot results
plot_results(output_df=df_dicts['markers_df'],
input_dfs_list=input_dfs_list,
key=f'{keypoint_names[-1]}',
idxs=(0, 500),
s_final=(smooth_params[0], smooth_params[1]),
nll_values=nll_values,
save_dir=save_dir,
smoother_type=smoother_type
)
# Save the results
print("Saving smoothed predictions and latents...")
markers_save_file = os.path.join(save_dir, 'kalman_smoothed_pupil_traces.csv')
latents_save_file = os.path.join(save_dir, 'kalman_smoothed_latents.csv')
df_dicts['markers_df'].to_csv(markers_save_file)
print(f'Smoothed predictions saved to {markers_save_file}')
df_dicts['latents_df'].to_csv(latents_save_file)
print(f'Latents saved to {latents_save_file}')

# Plot results
plot_results(
output_df=df_dicts['markers_df'],
input_dfs_list=input_dfs_list,
key=f'{keypoint_names[-1]}',
idxs=(0, 500),
s_final=(smooth_params[0], smooth_params[1]),
nll_values=nll_values,
save_dir=save_dir,
smoother_type=smoother_type
)
3 changes: 2 additions & 1 deletion scripts/multicam_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

# loop over keypoints; apply eks to each individually
# Note: all camera views must be stored in the same csv file
# TODO: dictionary where keys are view names, values are lists of csv paths
for keypoint_ensemble in bodypart_list:
# Separate body part predictions by camera view
marker_list_by_cam = [[] for _ in range(len(camera_names))]
Expand All @@ -51,7 +52,7 @@
# put results into new dataframe
for camera in camera_names:
cameras_df = cameras_df_dict[f'{camera}_df']
populate_output_dataframe(cameras_df, keypoint_ensemble, output_df,
output_df = populate_output_dataframe(cameras_df, keypoint_ensemble, output_df,
key_suffix=f'_{camera}')

# save eks results
Expand Down
Loading

0 comments on commit 33ebba0

Please sign in to comment.