Exporting basic features

As each files takes some time to process, it can prove useful to process them once and then save the extracted features to be used.

To being with the basic feature extraction we are going to:

Resample the data.
Bandpass (0.3, 49) Hz.
Extract bandpowers.

Then we will be saving them in a .csv file to ease the experimentation.

import os
from glob import glob

import numpy as np
import pandas as pd
from rich.progress import track

import yasa
import mne

from fastcore.foundation import L

from sleepstagingidal.data import *
from sleepstagingidal.dataa import *
from sleepstagingidal.dataa import swap_dict
from sleepstagingidal.feature_extraction import *
from sleepstagingidal.feature_extraction import get_trainable_from_patients

path_files = glob(os.path.join(path_data, "*.edf"))

channels = ["C3", "C4", "A1", "A2", "O1", "O2", "LOC", "ROC", "LAT1", "LAT2", "ECGL", "ECGR", "CHIN1", "CHIN2"]

The first step is going to be loading the different .edf files an processing them:

patients = L([read_clean_edf(path, resample=100, bandpass=(0.3, 49)) for path in track(path_files, description="Pre-processing recordings")])

As we want to the extracted features to be as flexible as possible we are going to store each entry with indicating the corresponding patient, so that we can perform different forms of cross-validation with this data without having to recalculate it:

for i, patient in enumerate(patients):
    name = patient.filenames[0].split("/")[-1]
    try:
        features, labels = get_trainable_from_patients([patient], channels=channels, feature_extraction_fn=calculate_bandpower)
    except:
        continue
    if i == 0:
        df_total = pd.DataFrame(features)
        df_total["Label"] = labels
        df_total["Patient"] = name
    else:
        df_temp = pd.DataFrame(features)
        df_temp["Label"] = labels
        df_temp["Patient"] = name
        df_total = pd.concat([df_total, df_temp])

df_total.shape

(27680, 86)

df_total.head()

	0	1	2	3	4	5	6	7	8	9	...	76	77	78	79	80	81	82	83	Label	Patient
0	0.495066	0.112756	0.112814	0.064382	0.165632	0.049350	0.362000	0.116360	0.171950	0.159524	...	0.064126	0.025867	0.687305	0.084597	0.083275	0.047782	0.070681	0.026361	Sleep stage W	PSG29.edf
1	0.465074	0.117853	0.177654	0.101286	0.104765	0.033368	0.347451	0.205411	0.208832	0.093656	...	0.050156	0.026275	0.811640	0.054457	0.050954	0.026253	0.037776	0.018920	Sleep stage W	PSG29.edf
2	0.493321	0.083727	0.160615	0.093394	0.127338	0.041605	0.509627	0.099740	0.174744	0.106081	...	0.057003	0.022188	0.761949	0.065250	0.071764	0.036249	0.047086	0.017702	Sleep stage W	PSG29.edf
3	0.496456	0.078696	0.145985	0.073315	0.168728	0.036820	0.415612	0.150171	0.162990	0.084801	...	0.102050	0.051728	0.657698	0.107005	0.083062	0.039670	0.079945	0.032621	Sleep stage W	PSG29.edf
4	0.499096	0.090408	0.128249	0.119394	0.126275	0.036577	0.355009	0.114010	0.227973	0.142860	...	0.064617	0.030325	0.728514	0.062010	0.082649	0.035992	0.059911	0.030924	Sleep stage W	PSG29.edf

5 rows × 86 columns

df_total.to_csv("basic_features.csv")