File size: 3,032 Bytes
43ac953
1d9b90e
05b4410
 
 
1d9b90e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05b4410
1d9b90e
43ac953
1d9b90e
 
 
 
 
 
 
 
 
43ac953
1d9b90e
43ac953
1d9b90e
 
 
 
 
 
43ac953
1d9b90e
 
 
 
43ac953
1d9b90e
05b4410
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from datetime import datetime

import matplotlib.pyplot as plt
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

log_files = [
    'call_history_sentiment_1_bash.csv',
    'call_history_text2int_1_bash.csv',
]

for log_file in log_files:
    path_ = f"./data/{log_file}"
    df = pd.read_csv(filepath_or_buffer=path_, sep=";")
    df["elapsed"] = df["finished"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) - df["started"].apply(
        lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"))
    df["elapsed"] = df["elapsed"].apply(lambda x: x.total_seconds())
    df["success"] = df["outputs"].apply(lambda x: 0 if "Time-out" in x else 1)
    df.to_csv(f"./data/processed_{log_file}", index=False, sep=";")

    student_numbers = sorted(df['active_students'].unique())

    result = df.groupby(['active_students', 'success']) \
        .agg({
        'elapsed': ['mean', 'median', 'min', 'max'],
        'success': ['count'],
    })

    print(f"Results for {log_file}")
    print(result, "\n")

    title = None
    if "sentiment" in log_file.lower():
        title = "API result for 'sentiment-analysis' endpoint"
    elif "text2int" in log_file.lower():
        title = "API result for 'text2int' endpoint"

    for student_number in student_numbers:
        try:
            failed_calls = result.loc[(student_number, 0), 'success'][0]
        except:
            failed_calls = 0
        successful_calls = result.loc[(student_number, 1), 'success'][0]
        percentage = (successful_calls / (failed_calls + successful_calls)) * 100
        print(f"Percentage of successful API calls for {student_number} students: {percentage.__round__(2)}")

    rows = len(student_numbers)

    # plt.figure(figsize=(16, 10))
    # for index, student_number in enumerate(student_numbers, 1):
    #     data = df[df["active_students"] == student_number]
    #     fig = plt.subplot(rows, 2, 2 * index - 1)
    #     plt.title("y=seconds, x=active students", x=0.75, y=0.75)
    #     plt.boxplot(x=data["elapsed"], labels=[student_number])
    #     plt.subplot(rows, 2, 2 * index)
    #     plt.title("y=count of seconds, x=seconds", x=0.75, y=0.75)
    #     plt.hist(x=data["elapsed"], bins=25, edgecolor='white')

    fig, axs = plt.subplots(rows, 2)  # (rows, columns)

    for index, student_number in enumerate(student_numbers):
        data = df[df["active_students"] == student_number]
        axs[index][0].boxplot(x=data["elapsed"])  # axs[row][column]
        # axs[index][0].set_title(f'Boxplot for {student_number} students')
        axs[index][0].set_xlabel(f'student number {student_number}')
        axs[index][0].set_ylabel('Elapsed time (s)')

        axs[index][1].hist(x=data["elapsed"], bins=25)  # axs[row][column]
        # axs[index][1].set_title(f'Histogram for {student_number} students')
        axs[index][1].set_xlabel('seconds')
        axs[index][1].set_ylabel('Count of API calls')

    fig.suptitle(title, fontsize=16)

plt.show()