import os
from glob import glob
import numpy as np
import pandas as pd
from rich.progress import track
import yasa
import mne
from fastcore.foundation import L
from sleepstagingidal.data import *
from sleepstagingidal.dataa import *
from sleepstagingidal.dataa import swap_dict
from sleepstagingidal.feature_extraction import *
from sleepstagingidal.feature_extraction import get_trainable_from_patients
Exporting basic features
As each files takes some time to process, it can prove useful to process them once and then save the extracted features to be used.
To being with the basic feature extraction we are going to:
- Resample the data.
- Bandpass (0.3, 49) Hz.
- Extract bandpowers.
Then we will be saving them in a .csv
file to ease the experimentation.
= glob(os.path.join(path_data, "*.edf")) path_files
= ["C3", "C4", "A1", "A2", "O1", "O2", "LOC", "ROC", "LAT1", "LAT2", "ECGL", "ECGR", "CHIN1", "CHIN2"] channels
The first step is going to be loading the different .edf
files an processing them:
= L([read_clean_edf(path, resample=100, bandpass=(0.3, 49)) for path in track(path_files, description="Pre-processing recordings")]) patients
As we want to the extracted features to be as flexible as possible we are going to store each entry with indicating the corresponding patient, so that we can perform different forms of cross-validation with this data without having to recalculate it:
for i, patient in enumerate(patients):
= patient.filenames[0].split("/")[-1]
name try:
= get_trainable_from_patients([patient], channels=channels, feature_extraction_fn=calculate_bandpower)
features, labels except:
continue
if i == 0:
= pd.DataFrame(features)
df_total "Label"] = labels
df_total["Patient"] = name
df_total[else:
= pd.DataFrame(features)
df_temp "Label"] = labels
df_temp["Patient"] = name
df_temp[= pd.concat([df_total, df_temp]) df_total
df_total.shape
(27680, 86)
df_total.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | Label | Patient | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.495066 | 0.112756 | 0.112814 | 0.064382 | 0.165632 | 0.049350 | 0.362000 | 0.116360 | 0.171950 | 0.159524 | ... | 0.064126 | 0.025867 | 0.687305 | 0.084597 | 0.083275 | 0.047782 | 0.070681 | 0.026361 | Sleep stage W | PSG29.edf |
1 | 0.465074 | 0.117853 | 0.177654 | 0.101286 | 0.104765 | 0.033368 | 0.347451 | 0.205411 | 0.208832 | 0.093656 | ... | 0.050156 | 0.026275 | 0.811640 | 0.054457 | 0.050954 | 0.026253 | 0.037776 | 0.018920 | Sleep stage W | PSG29.edf |
2 | 0.493321 | 0.083727 | 0.160615 | 0.093394 | 0.127338 | 0.041605 | 0.509627 | 0.099740 | 0.174744 | 0.106081 | ... | 0.057003 | 0.022188 | 0.761949 | 0.065250 | 0.071764 | 0.036249 | 0.047086 | 0.017702 | Sleep stage W | PSG29.edf |
3 | 0.496456 | 0.078696 | 0.145985 | 0.073315 | 0.168728 | 0.036820 | 0.415612 | 0.150171 | 0.162990 | 0.084801 | ... | 0.102050 | 0.051728 | 0.657698 | 0.107005 | 0.083062 | 0.039670 | 0.079945 | 0.032621 | Sleep stage W | PSG29.edf |
4 | 0.499096 | 0.090408 | 0.128249 | 0.119394 | 0.126275 | 0.036577 | 0.355009 | 0.114010 | 0.227973 | 0.142860 | ... | 0.064617 | 0.030325 | 0.728514 | 0.062010 | 0.082649 | 0.035992 | 0.059911 | 0.030924 | Sleep stage W | PSG29.edf |
5 rows × 86 columns
"basic_features.csv") df_total.to_csv(