Source code for data.behavior.gaze
from data.abstract import Dataset
import os
import glob
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm
[docs]
class Gaze(Dataset):
_unit_second = 0.002 # Gaze data is sampled at 500 Hz, which is 0.002 seconds per sample
[docs]
def __init__(self): super().__init__()
def _load(self):
path = os.path.join(os.path.dirname(__file__), "../bin/gaze")
files = sorted(glob.glob(f"{path}/*.parquet"))
if not files: raise FileNotFoundError(f"No gaze files found in this path {path}")
data = [pd.read_parquet(fname) for fname in tqdm(files)]
tdf = pd.concat(data, ignore_index=True)
return self._preprocess_gaze(tdf, self._unit_second)
[docs]
def rescale(self, us: float) -> np.ndarray:
indices = self._rescale_indices(us)
return self.raw.iloc[indices]
def _preprocess_gaze(self, df, unit_scale=0.002):
all_times = np.concatenate(df['RecTime'].values)
t_min, t_max = all_times.min(), all_times.max()
common_index = np.arange(t_min, t_max + 1e-9, unit_scale)
aligned_dfs = []
for _, row in df.iterrows():
sess_name = row['sess'] # e.g. "P41CSR1"
times = np.array(row['RecTime'])
x = np.array(row['GazeX'])
y = np.array(row['GazeY'])
# Build a DataFrame indexed by the sessionās own rec times:
temp = pd.DataFrame({'x': x, 'y': y}, index=times)
# Reindex onto the common grid. You can choose method='nearest', 'ffill', or .interpolate():
temp_reindexed = (
temp
.reindex(common_index) # puts NaN where exact time is missing
.interpolate(method='index') # linearāinterpolate between samples
#.fillna(method='ffill') # alternatively, forwardāfill
)
# Rename the columns so they become x_<sess> and y_<sess>
temp_reindexed = temp_reindexed.rename(columns={
'x': f"x_{sess_name}",
'y': f"y_{sess_name}"
})
aligned_dfs.append(temp_reindexed)
return pd.concat(aligned_dfs, axis=1)