import os
from glob import glob
import numpy as np
import pandas as pd
from rich.progress import track
import yasa
import mne
from fastcore.foundation import L
from sleepstagingidal.data import *
from sleepstagingidal.dataa import *
from sleepstagingidal.dataa import swap_dict
from sleepstagingidal.feature_extraction import *
from sleepstagingidal.feature_extraction import get_trainable_from_patientsExporting basic features
As each files takes some time to process, it can prove useful to process them once and then save the extracted features to be used.
To being with the basic feature extraction we are going to:
- Resample the data.
- Bandpass (0.3, 49) Hz.
- Extract bandpowers.
Then we will be saving them in a .csv file to ease the experimentation.
path_files = glob(os.path.join(path_data, "*.edf"))channels = ["C3", "C4", "A1", "A2", "O1", "O2", "LOC", "ROC", "LAT1", "LAT2", "ECGL", "ECGR", "CHIN1", "CHIN2"]The first step is going to be loading the different .edf files an processing them:
patients = L([read_clean_edf(path, resample=100, bandpass=(0.3, 49)) for path in track(path_files, description="Pre-processing recordings")])As we want to the extracted features to be as flexible as possible we are going to store each entry with indicating the corresponding patient, so that we can perform different forms of cross-validation with this data without having to recalculate it:
for i, patient in enumerate(patients):
name = patient.filenames[0].split("/")[-1]
try:
features, labels = get_trainable_from_patients([patient], channels=channels, feature_extraction_fn=calculate_bandpower)
except:
continue
if i == 0:
df_total = pd.DataFrame(features)
df_total["Label"] = labels
df_total["Patient"] = name
else:
df_temp = pd.DataFrame(features)
df_temp["Label"] = labels
df_temp["Patient"] = name
df_total = pd.concat([df_total, df_temp])df_total.shape(27680, 86)
df_total.head()| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | Label | Patient | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.495066 | 0.112756 | 0.112814 | 0.064382 | 0.165632 | 0.049350 | 0.362000 | 0.116360 | 0.171950 | 0.159524 | ... | 0.064126 | 0.025867 | 0.687305 | 0.084597 | 0.083275 | 0.047782 | 0.070681 | 0.026361 | Sleep stage W | PSG29.edf |
| 1 | 0.465074 | 0.117853 | 0.177654 | 0.101286 | 0.104765 | 0.033368 | 0.347451 | 0.205411 | 0.208832 | 0.093656 | ... | 0.050156 | 0.026275 | 0.811640 | 0.054457 | 0.050954 | 0.026253 | 0.037776 | 0.018920 | Sleep stage W | PSG29.edf |
| 2 | 0.493321 | 0.083727 | 0.160615 | 0.093394 | 0.127338 | 0.041605 | 0.509627 | 0.099740 | 0.174744 | 0.106081 | ... | 0.057003 | 0.022188 | 0.761949 | 0.065250 | 0.071764 | 0.036249 | 0.047086 | 0.017702 | Sleep stage W | PSG29.edf |
| 3 | 0.496456 | 0.078696 | 0.145985 | 0.073315 | 0.168728 | 0.036820 | 0.415612 | 0.150171 | 0.162990 | 0.084801 | ... | 0.102050 | 0.051728 | 0.657698 | 0.107005 | 0.083062 | 0.039670 | 0.079945 | 0.032621 | Sleep stage W | PSG29.edf |
| 4 | 0.499096 | 0.090408 | 0.128249 | 0.119394 | 0.126275 | 0.036577 | 0.355009 | 0.114010 | 0.227973 | 0.142860 | ... | 0.064617 | 0.030325 | 0.728514 | 0.062010 | 0.082649 | 0.035992 | 0.059911 | 0.030924 | Sleep stage W | PSG29.edf |
5 rows × 86 columns
df_total.to_csv("basic_features.csv")