nina-m-m commited on
Commit
4d9ffd8
1 Parent(s): f501147

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +207 -0
  2. requirements.txt +116 -0
main.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Final RestAPI File"""
2
+ from io import BytesIO
3
+ from datetime import datetime
4
+ import pandas as pd
5
+ from typing import List
6
+
7
+ import fastapi
8
+ from fastapi import FastAPI, File, UploadFile, Body, Form
9
+ from fastapi.responses import StreamingResponse, Response
10
+
11
+ import app.src
12
+ from app.src.conversion import h5_to_pandas, csv_to_pandas
13
+ from app.src.ecg_processing import process_batch
14
+ from app.src.pydantic_models import ECGBatch, ECGSample, ECGConfig
15
+ from app.src.configs import OutputFormats
16
+
17
+ from app.src.logger import setup_logger
18
+ logger = setup_logger(__name__)
19
+
20
+ # Set metadata
21
+ with open("app/docs/description.md", "r", encoding="utf-8") as f:
22
+ description = f.read()
23
+
24
+ tags_metadata = [
25
+ {
26
+ "name": "💾conversion",
27
+ "description": "Convert and create data files without HRV feature processing.",
28
+ },
29
+ {
30
+ "name": "🚀feature processing",
31
+ "description": "Run HRV feature processing.",
32
+ "externalDocs": {
33
+ "description": "Input Data Form external docs",
34
+ "url": "https://github.com/hubii-world/pipeline_hrv-02#input-data-form",
35
+ },
36
+ },
37
+ ]
38
+
39
+ # Initialize an instance of FastAPI
40
+ app = FastAPI(
41
+ default_response_class=fastapi.responses.ORJSONResponse,
42
+ openapi_tags=tags_metadata,
43
+ title="hrv-pipeline-02 💓",
44
+ description=description,
45
+ version="0.0.1",
46
+ contact={
47
+ "name": "The Open HUman BIosignal Intelligence Platform (HUBII)",
48
+ "url": "https://hubii.world/hrv-pipeline-02/",
49
+ })
50
+
51
+
52
+ @app.post("/raw_json_input/", tags=["🚀feature processing"], summary="📥 Run feature processing given a raw json input.")
53
+ def process_features_by_raw_json_input(data: ECGBatch = Body(...)):
54
+ try:
55
+ samples = data.samples
56
+ configs = data.configs
57
+
58
+ features_df = process_batch(samples, configs)
59
+ features_dict = features_df.to_dict(orient='records')
60
+
61
+ return {
62
+ "supervisor": data.supervisor,
63
+ "record_date": data.record_date,
64
+ "configs": configs,
65
+ "features": features_dict}
66
+
67
+ except Exception as e:
68
+ error_message = str(e)
69
+ return {"error": error_message}
70
+
71
+
72
+ @app.post("/h5_input/", tags=["🚀feature processing"], summary="📂 Run feature processing given multiple h5 files.")
73
+ def process_features_by_h5_file_input(
74
+ output_format: OutputFormats = Form(..., alias="Output Format",
75
+ description="Output file format ('csv' or 'json' or 'excel_spreadsheet')."),
76
+ supervisor: str = Form(..., alias="Supervisor", description="Name of the supervisor doing the analysis."),
77
+ configs: ECGConfig = Form(None, alias="Additional Configurations",
78
+ description="Additional configurations that should be included."),
79
+ subject_ids: List[str] = Form(..., alias="Subject ID", description="Id of the subject of the sample data"),
80
+ ecg_files: List[UploadFile] = File(..., alias="ECG Data", description="HDF5 file with the ecg data."),
81
+ labels: List[str] = Form(None, alias="Labels", description="List with the label data."),
82
+ ):
83
+ try:
84
+ logger.info(f"Received {len(ecg_files)} ECG file(s)...")
85
+ logger.info("Validating inputs...")
86
+ assert len(labels) in [0, len(ecg_files)], "Not enough labels defined, none or one for each sample."
87
+ assert len(subject_ids) <= len(ecg_files), "Too many subject IDs defined, maximal one for each sample."
88
+ if len(subject_ids) == 1:
89
+ subject_ids = [subject_ids[0]] * len(ecg_files)
90
+ if len(subject_ids) != len(ecg_files):
91
+ subject_ids += ["unknown"] * (len(ecg_files) - len(subject_ids))
92
+
93
+ logger.info("Extracting samples from files...")
94
+ samples = []
95
+ for i, file in enumerate(ecg_files):
96
+ sample_df = h5_to_pandas(file.file)
97
+ freq = int(sample_df["frequency"].iloc[0])
98
+ device_name = str(sample_df["device_name"].iloc[0])
99
+
100
+ samples.append(
101
+ ECGSample(
102
+ subject_id=subject_ids[i],
103
+ frequency=freq,
104
+ device_name=device_name,
105
+ timestamp_idx=sample_df["timestamp_idx"].tolist(),
106
+ ecg=sample_df["ecg"].tolist(),
107
+ label=labels[i] if labels else None
108
+ )
109
+ )
110
+
111
+ logger.info("Processing batch of samples...")
112
+ features_df = process_batch(samples, configs)
113
+
114
+ if output_format == "json":
115
+ features_dict = features_df.to_dict(orient='records')
116
+ # Return JSON response
117
+ return {
118
+ "supervisor": supervisor,
119
+ "record_date": datetime.now(),
120
+ "configs": configs,
121
+ "features": features_dict
122
+ }
123
+ elif output_format == "csv":
124
+ # Return CSV file
125
+ csv_data = features_df.to_csv(index=False)
126
+ filename = "features_output.csv"
127
+ return StreamingResponse(iter([csv_data]), media_type='text/csv',
128
+ headers={'Content-Disposition': f'attachment; filename="{filename}"'})
129
+ elif output_format == "excel_spreadsheet":
130
+ # Return Excel file
131
+ output_buffer = BytesIO()
132
+ with pd.ExcelWriter(output_buffer, engine='xlsxwriter') as writer:
133
+ features_df.to_excel(writer, index=False, sheet_name='Sheet1')
134
+ output_buffer.seek(0)
135
+ response = Response(content=output_buffer.getvalue(),
136
+ media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
137
+ response.headers['Content-Disposition'] = 'attachment; filename="features_output.xlsx"'
138
+ return response
139
+ else:
140
+ raise ValueError(f"Output format '{output_format}' not supported.")
141
+
142
+ except Exception as e:
143
+ error_message = str(e)
144
+ return {"error": error_message}
145
+
146
+
147
+ @app.post("/csv_input/", tags=["🚀feature processing"], summary="📂 Run feature processing given multiple csv files.")
148
+ def process_features_by_csv_file_input(
149
+ output_format: OutputFormats = Form(..., alias="Output Format",
150
+ description="Output file format ('csv' or 'json' or 'excel_spreadsheet')."),
151
+ csv_file: UploadFile = File(..., alias="CSV Data", description="CSV file with the ecg data."),
152
+ ):
153
+ try:
154
+ # Read csv file
155
+ df = csv_to_pandas(csv_file.file)
156
+ # Implode
157
+ cols_to_implode = ['timestamp_idx', 'ecg', 'label']
158
+ df_imploded = df.groupby(list(set(df.columns) - set(cols_to_implode))) \
159
+ .agg({'timestamp_idx': list,
160
+ 'ecg': list,
161
+ 'label': list}) \
162
+ .reset_index()
163
+ # Get metadata
164
+ config_cols = [col for col in df.columns if col.startswith('configs.')]
165
+ configs = df_imploded[config_cols].iloc[0].to_dict()
166
+ configs = {key.removeprefix('configs.'): value for key, value in configs.items()}
167
+ configs = ECGConfig(**configs)
168
+ batch_cols = [col for col in df.columns if col.startswith('batch.')]
169
+ batch = df_imploded[batch_cols].iloc[0].to_dict()
170
+ batch = {key.removeprefix('batch.'): value for key, value in batch.items()}
171
+ # Get samples
172
+ samples = df_imploded.to_dict(orient='records')
173
+ samples = [ECGSample(**sample) for sample in samples]
174
+
175
+ logger.info("Processing batch of samples...")
176
+ features_df = process_batch(samples, configs)
177
+
178
+ if output_format == "json":
179
+ features_dict = features_df.to_dict(orient='records')
180
+ # Return JSON response
181
+ return {
182
+ "supervisor": batch['supervisor'],
183
+ "record_date": batch['record_date'],
184
+ "configs": configs,
185
+ "features": features_dict
186
+ }
187
+ elif output_format == "csv":
188
+ # Return CSV file
189
+ csv_data = features_df.to_csv(index=False)
190
+ filename = "features_output.csv"
191
+ return StreamingResponse(iter([csv_data]), media_type='text/csv',
192
+ headers={'Content-Disposition': f'attachment; filename="{filename}"'})
193
+ elif output_format == "excel_spreadsheet":
194
+ output_buffer = BytesIO()
195
+ with pd.ExcelWriter(output_buffer, engine='xlsxwriter') as writer:
196
+ features_df.to_excel(writer, index=False, sheet_name='Sheet1')
197
+ output_buffer.seek(0)
198
+ response = Response(content=output_buffer.getvalue(),
199
+ media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
200
+ response.headers['Content-Disposition'] = 'attachment; filename="features_output.xlsx"'
201
+ return response
202
+ else:
203
+ raise ValueError(f"Output format '{output_format}' not supported.")
204
+
205
+ except Exception as e:
206
+ error_message = str(e)
207
+ return {"error": error_message}
requirements.txt ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.10
3
+ # by the following command:
4
+ #
5
+ # pip-compile requirements.in
6
+ #
7
+ annotated-types==0.6.0
8
+ # via pydantic
9
+ anyio==3.7.1
10
+ # via
11
+ # fastapi
12
+ # starlette
13
+ certifi==2023.11.17
14
+ # via requests
15
+ charset-normalizer==3.3.2
16
+ # via requests
17
+ click==8.1.7
18
+ # via uvicorn
19
+ colorama==0.4.6
20
+ # via
21
+ # click
22
+ # colorlog
23
+ colorlog==6.8.0
24
+ # via -r requirements.in
25
+ contourpy==1.2.0
26
+ # via matplotlib
27
+ cycler==0.12.1
28
+ # via matplotlib
29
+ exceptiongroup==1.2.0
30
+ # via anyio
31
+ fastapi==0.104.1
32
+ # via -r requirements.in
33
+ fonttools==4.46.0
34
+ # via matplotlib
35
+ h11==0.14.0
36
+ # via uvicorn
37
+ h5py==3.10.0
38
+ # via -r requirements.in
39
+ idna==3.6
40
+ # via
41
+ # anyio
42
+ # requests
43
+ joblib==1.3.2
44
+ # via scikit-learn
45
+ kiwisolver==1.4.5
46
+ # via matplotlib
47
+ matplotlib==3.8.2
48
+ # via neurokit2
49
+ neurokit2==0.2.7
50
+ # via -r requirements.in
51
+ numpy==1.26.2
52
+ # via
53
+ # contourpy
54
+ # h5py
55
+ # matplotlib
56
+ # neurokit2
57
+ # pandas
58
+ # scikit-learn
59
+ # scipy
60
+ packaging==23.2
61
+ # via matplotlib
62
+ pandas==2.1.3
63
+ # via
64
+ # -r requirements.in
65
+ # neurokit2
66
+ pillow==10.1.0
67
+ # via matplotlib
68
+ pydantic==2.5.2
69
+ # via
70
+ # -r requirements.in
71
+ # fastapi
72
+ pydantic-core==2.14.5
73
+ # via pydantic
74
+ pyparsing==3.1.1
75
+ # via matplotlib
76
+ python-dateutil==2.8.2
77
+ # via
78
+ # matplotlib
79
+ # pandas
80
+ python-multipart==0.0.6
81
+ # via -r requirements.in
82
+ pytz==2023.3.post1
83
+ # via pandas
84
+ requests==2.31.0
85
+ # via -r requirements.in
86
+ scikit-learn==1.3.2
87
+ # via
88
+ # -r requirements.in
89
+ # neurokit2
90
+ scipy==1.11.4
91
+ # via
92
+ # -r requirements.in
93
+ # neurokit2
94
+ # scikit-learn
95
+ six==1.16.0
96
+ # via python-dateutil
97
+ sniffio==1.3.0
98
+ # via anyio
99
+ starlette==0.27.0
100
+ # via fastapi
101
+ threadpoolctl==3.2.0
102
+ # via scikit-learn
103
+ typing-extensions==4.8.0
104
+ # via
105
+ # fastapi
106
+ # pydantic
107
+ # pydantic-core
108
+ # uvicorn
109
+ tzdata==2023.3
110
+ # via pandas
111
+ urllib3==2.1.0
112
+ # via requests
113
+ uuid==1.30
114
+ # via -r requirements.in
115
+ uvicorn==0.24.0.post1
116
+ # via -r requirements.in