cetinca commited on
Commit
1d9b90e
1 Parent(s): 43ac953

Update test script

Browse files
Files changed (3) hide show
  1. .gitignore +5 -5
  2. plot_calls.py +65 -17
  3. test_api.sh +25 -20
.gitignore CHANGED
@@ -98,8 +98,8 @@ docs/**/*.html
98
  .bash_env
99
  **/*secret*
100
  **/*private*
101
- /call_history.csv
102
- /call_history.txt
103
- /output.csv
104
- /call_history_bash.csv
105
- /call_history_sentiment_bash.csv
 
98
  .bash_env
99
  **/*secret*
100
  **/*private*
101
+ /bad_response.txt
102
+ /data/call_history_sentiment_1_bash.csv
103
+ /data/call_history_text2int_1_bash.csv
104
+ /data/processed_call_history_sentiment_1_bash.csv
105
+ /data/processed_call_history_text2int_1_bash.csv
plot_calls.py CHANGED
@@ -1,29 +1,77 @@
1
  from datetime import datetime
 
2
  import matplotlib.pyplot as plt
3
  import pandas as pd
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- # pd.set_option('display.max_columns', None)
7
- # pd.set_option('display.max_rows', None)
8
 
9
- df = pd.read_csv(filepath_or_buffer='call_history_bash.csv', sep=";")
10
- df["elapsed"] = df["finished"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) - df["started"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"))
11
- df["elapsed"] = df["elapsed"].apply(lambda x: x.total_seconds())
12
- df.to_csv("output.csv", index=False, sep=";")
 
 
 
 
 
13
 
14
- student_numbers = df['active_students'].unique()
15
 
16
- plt.figure(figsize=(16, 10))
17
- rows = len(student_numbers)
 
 
 
 
18
 
19
- for index, student_number in enumerate(student_numbers, 1):
20
- data = df[df["active_students"] == student_number]
21
- plt.subplot(rows, 2, 2 * index - 1)
22
- plt.title("y=seconds, x=active students", x=0.75, y=0.75)
23
- plt.boxplot(x=data["elapsed"], labels=[student_number])
24
- plt.subplot(rows, 2, 2 * index)
25
- plt.title("y=count of seconds, x=seconds", x=0.75, y=0.75)
26
- plt.hist(x=data["elapsed"], bins=25, edgecolor='white')
27
 
 
28
 
29
  plt.show()
 
1
  from datetime import datetime
2
+
3
  import matplotlib.pyplot as plt
4
  import pandas as pd
5
 
6
+ pd.set_option('display.max_columns', None)
7
+ pd.set_option('display.max_rows', None)
8
+
9
+ log_files = [
10
+ 'call_history_sentiment_1_bash.csv',
11
+ 'call_history_text2int_1_bash.csv',
12
+ ]
13
+
14
+ for log_file in log_files:
15
+ path_ = f"./data/{log_file}"
16
+ df = pd.read_csv(filepath_or_buffer=path_, sep=";")
17
+ df["elapsed"] = df["finished"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) - df["started"].apply(
18
+ lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"))
19
+ df["elapsed"] = df["elapsed"].apply(lambda x: x.total_seconds())
20
+ df["success"] = df["outputs"].apply(lambda x: 0 if "Time-out" in x else 1)
21
+ df.to_csv(f"./data/processed_{log_file}", index=False, sep=";")
22
+
23
+ student_numbers = sorted(df['active_students'].unique())
24
+
25
+ result = df.groupby(['active_students', 'success']) \
26
+ .agg({
27
+ 'elapsed': ['mean', 'median', 'min', 'max'],
28
+ 'success': ['count'],
29
+ })
30
+
31
+ print(f"Results for {log_file}")
32
+ print(result, "\n")
33
+
34
+ title = None
35
+ if "sentiment" in log_file.lower():
36
+ title = "API result for 'sentiment-analysis' endpoint"
37
+ elif "text2int" in log_file.lower():
38
+ title = "API result for 'text2int' endpoint"
39
+
40
+ for student_number in student_numbers:
41
+ try:
42
+ failed_calls = result.loc[(student_number, 0), 'success'][0]
43
+ except:
44
+ failed_calls = 0
45
+ successful_calls = result.loc[(student_number, 1), 'success'][0]
46
+ percentage = (successful_calls / (failed_calls + successful_calls)) * 100
47
+ print(f"Percentage of successful API calls for {student_number} students: {percentage.__round__(2)}")
48
 
49
+ rows = len(student_numbers)
 
50
 
51
+ # plt.figure(figsize=(16, 10))
52
+ # for index, student_number in enumerate(student_numbers, 1):
53
+ # data = df[df["active_students"] == student_number]
54
+ # fig = plt.subplot(rows, 2, 2 * index - 1)
55
+ # plt.title("y=seconds, x=active students", x=0.75, y=0.75)
56
+ # plt.boxplot(x=data["elapsed"], labels=[student_number])
57
+ # plt.subplot(rows, 2, 2 * index)
58
+ # plt.title("y=count of seconds, x=seconds", x=0.75, y=0.75)
59
+ # plt.hist(x=data["elapsed"], bins=25, edgecolor='white')
60
 
61
+ fig, axs = plt.subplots(rows, 2) # (rows, columns)
62
 
63
+ for index, student_number in enumerate(student_numbers):
64
+ data = df[df["active_students"] == student_number]
65
+ axs[index][0].boxplot(x=data["elapsed"]) # axs[row][column]
66
+ # axs[index][0].set_title(f'Boxplot for {student_number} students')
67
+ axs[index][0].set_xlabel(f'student number {student_number}')
68
+ axs[index][0].set_ylabel('Elapsed time (s)')
69
 
70
+ axs[index][1].hist(x=data["elapsed"], bins=25) # axs[row][column]
71
+ # axs[index][1].set_title(f'Histogram for {student_number} students')
72
+ axs[index][1].set_xlabel('seconds')
73
+ axs[index][1].set_ylabel('Count of API calls')
 
 
 
 
74
 
75
+ fig.suptitle(title, fontsize=16)
76
 
77
  plt.show()
test_api.sh CHANGED
@@ -3,11 +3,10 @@
3
  LOG_FILE_NAME="call_history_bash.csv"
4
 
5
  if [[ ! -f "$LOG_FILE_NAME" ]]; then
6
- # Creation of column names if the file does not exits
7
- echo "student_id;active_students;endpoint;inputs;outputs;started;finished" > $LOG_FILE_NAME
8
  fi
9
 
10
-
11
  data_list_1() {
12
  responses=(
13
  "one hundred forty five"
@@ -30,48 +29,54 @@ data_list_2() {
30
  echo "${responses[$1]}"
31
  }
32
 
33
- # endpoints: "text2int" "text2int-preprocessed" "sentiment-analysis"
34
  # selected endpoint to test
35
- endpoint="text2int"
36
 
37
- create_random_delay () {
38
  # creates a random delay for given arguments
39
  echo "scale=8; $RANDOM/32768*$1" | bc
40
  }
41
 
42
-
43
  simulate_student() {
44
  # Student simulator waits randomly between 0-10s after an interaction.
45
  # Based on 100 interactions per student
46
  for i in {1..100}; do
 
 
 
 
 
47
  start_=$(date +"%F %T.%6N")
48
- url="https://tangibleai-mathtext.hf.space/run/$3"
49
 
50
- response=$(curl --silent -X POST "$url" -H 'Content-Type: application/json' -d "$4")
 
51
 
52
- if [[ "$response" == *"504"* ]]; then
 
 
 
 
53
  response="504 Gateway Time-out"
54
  fi
55
 
56
  end_=$(date +"%F %T.%6N")
57
- printf "%s;%s;%s;%s;%s;%s;%s\n" "$1" "$2" "$3" "$4" "$response" "$start_" "$end_" >>$LOG_FILE_NAME
 
58
  sleep "$(create_random_delay 10)"
 
59
  done
60
  }
61
 
62
  echo "start: $(date)"
63
 
64
- active_students=250 # the number of students using the system at the same time
65
 
66
  i=1
67
- while [[ "$i" -le "$active_students" ]]
68
- do
69
- random_value=$((RANDOM % 5))
70
- text=$(data_list_2 $random_value)
71
- data='{"data": ["'$text'"]}'
72
- simulate_student "student$i" "$active_students" "$endpoint" "$data" &
73
- sleep "$(create_random_delay 1)" # adding a random delay between students
74
- i=$(( "$i" + 1 ))
75
  done
76
 
77
  wait
 
3
  LOG_FILE_NAME="call_history_bash.csv"
4
 
5
  if [[ ! -f "$LOG_FILE_NAME" ]]; then
6
+ # Creation of column names if the file does not exits
7
+ echo "student_id;active_students;endpoint;inputs;outputs;started;finished" >$LOG_FILE_NAME
8
  fi
9
 
 
10
  data_list_1() {
11
  responses=(
12
  "one hundred forty five"
 
29
  echo "${responses[$1]}"
30
  }
31
 
32
+ # endpoints: "text2int" "sentiment-analysis"
33
  # selected endpoint to test
34
+ endpoint="sentiment-analysis"
35
 
36
+ create_random_delay() {
37
  # creates a random delay for given arguments
38
  echo "scale=8; $RANDOM/32768*$1" | bc
39
  }
40
 
 
41
  simulate_student() {
42
  # Student simulator waits randomly between 0-10s after an interaction.
43
  # Based on 100 interactions per student
44
  for i in {1..100}; do
45
+
46
+ random_value=$((RANDOM % 5))
47
+ text=$(data_list_2 $random_value)
48
+ data='{"data": ["'$text'"]}'
49
+
50
  start_=$(date +"%F %T.%6N")
 
51
 
52
+ url="https://tangibleai-mathtext.hf.space/run/$3"
53
+ response=$(curl --silent --connect-timeout 30 --max-time 30 -X POST "$url" -H 'Content-Type: application/json' -d "$data")
54
 
55
+ if [[ "$response" == *"Time-out"* ]]; then
56
+ echo "$response" >>bad_response.txt
57
+ response="504 Gateway Time-out"
58
+ elif [[ -z "$response" ]]; then
59
+ echo "No response" >>bad_response.txt
60
  response="504 Gateway Time-out"
61
  fi
62
 
63
  end_=$(date +"%F %T.%6N")
64
+
65
+ printf "%s;%s;%s;%s;%s;%s;%s\n" "$1" "$2" "$3" "$data" "$response" "$start_" "$end_" >>$LOG_FILE_NAME
66
  sleep "$(create_random_delay 10)"
67
+
68
  done
69
  }
70
 
71
  echo "start: $(date)"
72
 
73
+ active_students=250 # the number of students using the system at the same time
74
 
75
  i=1
76
+ while [[ "$i" -le "$active_students" ]]; do
77
+ simulate_student "student$i" "$active_students" "$endpoint" &
78
+ sleep "$(create_random_delay 1)" # adding a random delay between students
79
+ i=$(("$i" + 1))
 
 
 
 
80
  done
81
 
82
  wait