Spaces:
Runtime error
Runtime error
from datetime import datetime | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
pd.set_option('display.max_columns', None) | |
pd.set_option('display.max_rows', None) | |
log_files = [ | |
'call_history_sentiment_1_bash.csv', | |
'call_history_text2int_1_bash.csv', | |
] | |
for log_file in log_files: | |
path_ = f"./data/{log_file}" | |
df = pd.read_csv(filepath_or_buffer=path_, sep=";") | |
df["elapsed"] = df["finished"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) - df["started"].apply( | |
lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) | |
df["elapsed"] = df["elapsed"].apply(lambda x: x.total_seconds()) | |
df["success"] = df["outputs"].apply(lambda x: 0 if "Time-out" in x else 1) | |
df.to_csv(f"./data/processed_{log_file}", index=False, sep=";") | |
student_numbers = sorted(df['active_students'].unique()) | |
result = df.groupby(['active_students', 'success']) \ | |
.agg({ | |
'elapsed': ['mean', 'median', 'min', 'max'], | |
'success': ['count'], | |
}) | |
print(f"Results for {log_file}") | |
print(result, "\n") | |
title = None | |
if "sentiment" in log_file.lower(): | |
title = "API result for 'sentiment-analysis' endpoint" | |
elif "text2int" in log_file.lower(): | |
title = "API result for 'text2int' endpoint" | |
for student_number in student_numbers: | |
try: | |
failed_calls = result.loc[(student_number, 0), 'success'][0] | |
except: | |
failed_calls = 0 | |
successful_calls = result.loc[(student_number, 1), 'success'][0] | |
percentage = (successful_calls / (failed_calls + successful_calls)) * 100 | |
print(f"Percentage of successful API calls for {student_number} students: {percentage.__round__(2)}") | |
rows = len(student_numbers) | |
# plt.figure(figsize=(16, 10)) | |
# for index, student_number in enumerate(student_numbers, 1): | |
# data = df[df["active_students"] == student_number] | |
# fig = plt.subplot(rows, 2, 2 * index - 1) | |
# plt.title("y=seconds, x=active students", x=0.75, y=0.75) | |
# plt.boxplot(x=data["elapsed"], labels=[student_number]) | |
# plt.subplot(rows, 2, 2 * index) | |
# plt.title("y=count of seconds, x=seconds", x=0.75, y=0.75) | |
# plt.hist(x=data["elapsed"], bins=25, edgecolor='white') | |
fig, axs = plt.subplots(rows, 2) # (rows, columns) | |
for index, student_number in enumerate(student_numbers): | |
data = df[df["active_students"] == student_number] | |
axs[index][0].boxplot(x=data["elapsed"]) # axs[row][column] | |
# axs[index][0].set_title(f'Boxplot for {student_number} students') | |
axs[index][0].set_xlabel(f'student number {student_number}') | |
axs[index][0].set_ylabel('Elapsed time (s)') | |
axs[index][1].hist(x=data["elapsed"], bins=25) # axs[row][column] | |
# axs[index][1].set_title(f'Histogram for {student_number} students') | |
axs[index][1].set_xlabel('seconds') | |
axs[index][1].set_ylabel('Count of API calls') | |
fig.suptitle(title, fontsize=16) | |
plt.show() | |