Mengyuan Liu commited on
Commit
dfe37be
1 Parent(s): 1188a5e

Upload 71 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. dividing_into_different_subsets/3/EI/even.py +32 -0
  2. dividing_into_different_subsets/3/EI/humaneval_new.json +0 -0
  3. dividing_into_different_subsets/3/QS/CC_QS.csv +12 -0
  4. dividing_into_different_subsets/3/QS/QS.json +0 -0
  5. dividing_into_different_subsets/3/QS/calculate_humaneval_result.py +115 -0
  6. dividing_into_different_subsets/3/QS/cata_result.csv +12 -0
  7. dividing_into_different_subsets/3/QS/draw_line.py +38 -0
  8. dividing_into_different_subsets/3/QS/even.py +53 -0
  9. dividing_into_different_subsets/3/QS/flagged/log.csv +2 -0
  10. dividing_into_different_subsets/3/QS/humaneval_new.json +0 -0
  11. dividing_into_different_subsets/3/QS/humaneval_with_cata.json +658 -0
  12. dividing_into_different_subsets/3/QS/line_counts_QS.csv +12 -0
  13. dividing_into_different_subsets/3/QS/test.py +100 -0
  14. dividing_into_different_subsets/3/QS/token_counts_QS.csv +12 -0
  15. dividing_into_different_subsets/4/QS/CC_QS.csv +12 -0
  16. dividing_into_different_subsets/4/QS/QS.json +0 -0
  17. dividing_into_different_subsets/4/QS/calculate_humaneval_result.py +139 -0
  18. dividing_into_different_subsets/4/QS/even.py +65 -0
  19. dividing_into_different_subsets/4/QS/humaneval_new.json +0 -0
  20. dividing_into_different_subsets/4/QS/humaneval_with_cata.json +658 -0
  21. dividing_into_different_subsets/4/QS/line_counts_QS.csv +12 -0
  22. dividing_into_different_subsets/4/QS/token_counts_QS.csv +12 -0
  23. dividing_into_different_subsets/5/QS/CC_QS.csv +12 -0
  24. dividing_into_different_subsets/5/QS/QS.json +0 -0
  25. dividing_into_different_subsets/5/QS/calculate_humaneval_result.py +157 -0
  26. dividing_into_different_subsets/5/QS/draw_line.py +38 -0
  27. dividing_into_different_subsets/5/QS/even.py +71 -0
  28. dividing_into_different_subsets/5/QS/humaneval_new.json +0 -0
  29. dividing_into_different_subsets/5/QS/humaneval_with_cata.json +658 -0
  30. dividing_into_different_subsets/5/QS/line_counts_QS.csv +12 -0
  31. dividing_into_different_subsets/5/QS/token_counts_QS.csv +12 -0
  32. dividing_into_different_subsets/6/QS/CC_QS.csv +13 -0
  33. dividing_into_different_subsets/6/QS/QS.json +0 -0
  34. dividing_into_different_subsets/6/QS/calculate_humaneval_result.py +176 -0
  35. dividing_into_different_subsets/6/QS/even.py +80 -0
  36. dividing_into_different_subsets/6/QS/humaneval_new.json +0 -0
  37. dividing_into_different_subsets/6/QS/humaneval_with_cata.json +658 -0
  38. dividing_into_different_subsets/6/QS/line_counts_QS.csv +13 -0
  39. dividing_into_different_subsets/6/QS/token_counts_QS.csv +13 -0
  40. dividing_into_different_subsets/7/QS/CC_QS.csv +13 -0
  41. dividing_into_different_subsets/7/QS/QS.json +0 -0
  42. dividing_into_different_subsets/7/QS/calculate_humaneval_result.py +195 -0
  43. dividing_into_different_subsets/7/QS/even.py +87 -0
  44. dividing_into_different_subsets/7/QS/humaneval_new.json +0 -0
  45. dividing_into_different_subsets/7/QS/humaneval_with_cata.json +658 -0
  46. dividing_into_different_subsets/7/QS/line_counts_QS.csv +13 -0
  47. dividing_into_different_subsets/7/QS/token_counts_QS.csv +13 -0
  48. dividing_into_different_subsets/8/QS/CC_QS.csv +13 -0
  49. dividing_into_different_subsets/8/QS/QS.json +0 -0
  50. dividing_into_different_subsets/8/QS/TEST.json +0 -0
dividing_into_different_subsets/3/EI/even.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ # 读取 JSON 文件
4
+ with open('humaneval_new.json', 'r') as file:
5
+ data = json.load(file)
6
+
7
+ # 提取属性 A 的值
8
+ values_of_A = [obj['line'] for obj in data]
9
+
10
+ # 计算属性 A 的范围
11
+ min_A = min(values_of_A)
12
+ max_A = max(values_of_A)
13
+
14
+ # 确定区间数量和宽度
15
+ num_intervals = 3
16
+ interval_width = (max_A - min_A) / num_intervals
17
+
18
+ # 划分区间
19
+ intervals = [(min_A + i * interval_width, min_A + (i + 1) * interval_width) for i in range(num_intervals)]
20
+
21
+ # 将数据分配到各个区间
22
+ subsets = [[] for _ in range(num_intervals)]
23
+
24
+ for obj in data:
25
+ value_A = obj['line']
26
+ for i, (start, end) in enumerate(intervals):
27
+ if start <= value_A < end:
28
+ subsets[i].append(obj)
29
+ break
30
+
31
+ with open('EI.json', 'w', encoding='utf-8') as file:
32
+ json.dump(data, file, ensure_ascii=False, indent=4)
dividing_into_different_subsets/3/EI/humaneval_new.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/3/QS/CC_QS.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,CC_subset_1,CC_subset_2,CC_subset_3
2
+ CodeFuse-DeepSeek-33b,80.0,77.78,72.73
3
+ Nxcode-CQ-7B,88.18,85.37,88.27
4
+ codegemma-2b,37.55,25.09,18.73
5
+ codegemma-7b,51.64,37.96,29.73
6
+ codegemma-7b-it,60.55,52.31,46.55
7
+ deepseek-coder-1.3b-base,45.45,31.2,20.27
8
+ deepseek-coder-6.7b-base,57.0,45.09,34.91
9
+ deepseek_coder-6.7b-instruct,75.18,72.78,66.82
10
+ deepseek_coder_33b-base,60.73,51.94,45.55
11
+ deepseek_coder_33b-instruct,70.36,66.3,61.73
12
+ codeqwen1.5-7b,58.73,51.76,43.64
dividing_into_different_subsets/3/QS/QS.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/3/QS/calculate_humaneval_result.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import csv
4
+ # 定义文件所在的目录
5
+ input_dir = 'E:\python-testn\pythonProject3\hh_1\evaluate_result'
6
+
7
+ # 获取目录中的所有文件
8
+ files = os.listdir(input_dir)
9
+
10
+ # with open("token_counts_QS.csv","w", newline='') as csvfile:
11
+ # writer = csv.writer(csvfile)
12
+ # writer.writerow(["Model", "token_subset_1", "token_subset_2","token_subset_3"])
13
+
14
+
15
+ # with open("line_counts_QS.csv","w", newline='') as csvfile:
16
+ # writer = csv.writer(csvfile)
17
+ # writer.writerow(["Model", "line_subset_1", "line_subset_2","line_subset_3"])
18
+
19
+ with open("CC_QS.csv", "w", newline='') as csvfile:
20
+ writer = csv.writer(csvfile)
21
+ writer.writerow(["Model", "CC_subset_1", "CC_subset_2","CC_subset_3"])
22
+
23
+
24
+
25
+ for file_name in files:
26
+ # 构建完整的文件路径
27
+ input_file_path = os.path.join(input_dir, file_name)
28
+ first_underscore_index = file_name.find('_')
29
+
30
+ # 找到最后一个 - 的位置
31
+ last_dash_index = file_name.rfind('-')
32
+ model_name = file_name[first_underscore_index + 1:last_dash_index]
33
+ print(model_name)
34
+ with open(input_file_path,"r",encoding="utf-8") as file:
35
+ data1=json.load(file)
36
+
37
+ with open("QS.json", "r", encoding="utf-8") as file:
38
+ data2=json.load(file)
39
+ sum0=0
40
+ count0=0
41
+ sum1=0
42
+ count1=0
43
+ sum2=0
44
+ count2=0
45
+
46
+ for (item1,item2) in zip(data1["humaneval"]["pass@1"],data2):
47
+ # #按照token个数划分后的评估结果
48
+ # if item2["token_diff"] == 0:
49
+ # index, value = item1
50
+ # print(item2["token_diff"],index,value)
51
+ # sum0=sum0+value
52
+ # count0=count0+1
53
+ # if item2["token_diff"] == 1:
54
+ # index, value = item1
55
+ # print(item2["token_diff"], index, value)
56
+ # sum1=sum1+value
57
+ # count1=count1+1
58
+ # if item2["token_diff"] == 2:
59
+ # index, value = item1
60
+ # print(item2["token_diff"], index, value)
61
+ # sum2=sum2+value
62
+ # count2=count2+1
63
+
64
+
65
+ #按照行数划分后的评估结果
66
+ # if item2["line_diff"] == 0:
67
+ # index, value = item1
68
+ # print(item2["line_diff"],index,value)
69
+ # sum0=sum0+value
70
+ # count0=count0+1
71
+ # if item2["line_diff"] == 1:
72
+ # index, value = item1
73
+ # print(item2["line_diff"], index, value)
74
+ # sum1=sum1+value
75
+ # count1=count1+1
76
+ # if item2["line_diff"] == 2:
77
+ # index, value = item1
78
+ # print(item2["line_diff"], index, value)
79
+ # sum2=sum2+value
80
+ # count2=count2+1
81
+
82
+ #按照圈复杂度划分后的评估结果
83
+ if item2["CC_diff"] == 0:
84
+ index, value = item1
85
+ print(item2["CC_diff"],index,value)
86
+ sum0=sum0+value
87
+ count0=count0+1
88
+ if item2["CC_diff"] == 1:
89
+ index, value = item1
90
+ print(item2["CC_diff"], index, value)
91
+ sum1=sum1+value
92
+ count1=count1+1
93
+ if item2["CC_diff"] == 2:
94
+ index, value = item1
95
+ print(item2["CC_diff"], index, value)
96
+ sum2=sum2+value
97
+ count2=count2+1
98
+
99
+
100
+
101
+ mean0=round(sum0/count0*100,2)
102
+
103
+ mean1=round(sum1/count1*100,2)
104
+ mean2=round(sum2/count2*100,2)
105
+ print("count_result!!")
106
+ print(count0,count1,count2)
107
+ print(mean0,mean1,mean2)
108
+ # with open("token_counts_QS.csv", mode='a', newline='', encoding='utf-8') as file:
109
+ # writer = csv.writer(file)
110
+ # writer.writerow([model_name,mean0,mean1,mean2])
111
+ with open("CC_QS.csv", mode='a', newline='', encoding='utf-8') as file:
112
+ writer = csv.writer(file)
113
+ writer.writerow([model_name,mean0,mean1,mean2])
114
+
115
+
dividing_into_different_subsets/3/QS/cata_result.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,String,Math,Array,Sorting,Hash Table,Stack,Search,Matrix
2
+ CodeFuse-DeepSeek-33b,78.57,72.84,77.78,72.41,83.33,100.0,78.26,100.0
3
+ Nxcode-CQ-7B,87.29,86.54,87.06,88.28,74.17,85.0,83.91,20.0
4
+ codegemma-2b,21.21,29.2,29.0,18.28,7.92,17.14,25.43,0.0
5
+ codegemma-7b,34.86,41.91,41.39,29.83,27.5,23.57,32.61,0.0
6
+ codegemma-7b-it,53.0,53.46,53.72,43.28,46.25,38.57,50.65,0.0
7
+ deepseek-coder-1.3b-base,28.0,36.91,30.11,25.17,15.83,22.14,25.65,0.0
8
+ deepseek-coder-6.7b-base,39.64,49.01,44.72,40.86,28.75,37.14,39.57,0.0
9
+ deepseek_coder-6.7b-instruct,69.79,71.98,74.11,73.28,38.75,53.57,78.48,0.0
10
+ deepseek_coder_33b-base,47.64,54.26,52.78,49.48,33.75,41.43,49.57,0.0
11
+ deepseek_coder_33b-instruct,63.29,69.07,66.44,60.34,50.0,46.43,61.09,0.0
12
+ codeqwen1.5-7b,47.64,55.0,50.67,45.69,35.0,45.71,43.91,0.0
dividing_into_different_subsets/3/QS/draw_line.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import plotly.express as px
4
+
5
+
6
+ def plot_csv(df):
7
+ # 将第一列作为索引
8
+ df.set_index('Model', inplace=True)
9
+
10
+ # 转置数据框,使得模型作为列,横轴作为行
11
+ df_transposed = df.T
12
+
13
+ # 使用plotly绘制折线图
14
+ fig = px.line(df_transposed, x=df_transposed.index, y=df_transposed.columns,
15
+ title='Model Evaluation Results',
16
+ labels={'value': 'Evaluation Score', 'index': 'Evaluation Metric'},
17
+ color_discrete_sequence=px.colors.qualitative.Plotly)
18
+
19
+ # 设置悬停效果
20
+ fig.update_traces(hovertemplate='%{y}')
21
+
22
+ return fig
23
+
24
+
25
+ # 读取本地的CSV文件
26
+ file_path = 'line_counts_QS.csv'
27
+ df = pd.read_csv(file_path)
28
+
29
+
30
+ iface = gr.Interface(
31
+ fn=plot_csv,
32
+ inputs=gr.Dataframe(df),
33
+ outputs=gr.Plot(label="Line Plot"),
34
+ title="CSV to Line Plot",
35
+ description="Visualize the evaluation results as a line plot."
36
+ )
37
+
38
+ iface.launch()
dividing_into_different_subsets/3/QS/even.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ with open("humaneval_new.json", "r", encoding="utf-8") as f:
3
+ data = json.load(f)
4
+ line_counts=[33,33,34]
5
+ line_counts_I=line_counts[0]*0.01*164
6
+ line_counts_II=line_counts[1]*0.01*164
7
+ line_counts_III=164-line_counts_I-line_counts_II
8
+
9
+ token_counts=[33,33,34]
10
+ token_counts_I=token_counts[0]*0.01*164
11
+ token_counts_II=token_counts[1]*0.01*164
12
+ token_counts_III=164-token_counts_I-token_counts_II
13
+
14
+ cyclomatic_complexity=[33,33,34]
15
+ cyclomatic_complexity_I=cyclomatic_complexity[0]*0.01*164
16
+ cyclomatic_complexity_II=cyclomatic_complexity[1]*0.01*164
17
+ cyclomatic_complexity_III=164-cyclomatic_complexity_II-cyclomatic_complexity_I
18
+
19
+
20
+
21
+ data.sort(key=lambda x: x['line'])
22
+ for i, item in enumerate(data):
23
+ if i < line_counts_I:
24
+ item['line_diff'] = 0
25
+ elif i <line_counts_I+line_counts_II:
26
+ item['line_diff'] = 1
27
+ else:
28
+ item['line_diff'] = 2
29
+
30
+ data.sort(key=lambda x: x['token'])
31
+ for i, item in enumerate(data):
32
+ if i < token_counts_I:
33
+ item['token_diff'] = 0
34
+ elif i < token_counts_I + token_counts_II:
35
+ item['token_diff'] = 1
36
+ else:
37
+ item['token_diff'] = 2
38
+
39
+ data.sort(key=lambda x: x['cyclomatic_complexity'])
40
+ for i, item in enumerate(data):
41
+ if i < cyclomatic_complexity_I:
42
+ item['CC_diff'] = 0
43
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II:
44
+ item['CC_diff'] = 1
45
+ else:
46
+ item['CC_diff'] = 2
47
+
48
+
49
+ data.sort(key=lambda x: x['id'])
50
+ # 将更新后的数据写回JSON文件
51
+ with open('QS.json', 'w', encoding='utf-8') as file:
52
+ json.dump(data, file, ensure_ascii=False, indent=4)
53
+
dividing_into_different_subsets/3/QS/flagged/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ df,Line Plot,flag,username,timestamp
2
+ "{""headers"": [""Model"", ""line_subset_1"", ""line_subset_2"", ""line_subset_3""], ""data"": [[""CodeFuse-DeepSeek-33b"", 81.82, 72.22, 76.36], [""Nxcode-CQ-7B"", 92.09, 88.33, 81.45], [""codegemma-2b"", 44.09, 17.5, 19.64], [""codegemma-7b"", 52.45, 35.19, 31.64], [""codegemma-7b-it"", 66.36, 49.26, 43.73], [""deepseek-coder-1.3b-base"", 47.45, 26.39, 23], [""deepseek-coder-6.7b-base"", 63.36, 39.35, 34.18], [""deepseek_coder-6.7b-instruct"", 85, 66.85, 62.82], [""deepseek_coder_33b-base"", 68, 48.89, 41.27], [""deepseek_coder_33b-instruct"", 82.09, 62.31, 53.91], [""codeqwen1.5-7b"", 59.73, 48.7, 45.64]], ""metadata"": null}",,,,2024-09-22 18:55:59.262701
dividing_into_different_subsets/3/QS/humaneval_new.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/3/QS/humaneval_with_cata.json ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "answer": "Array, Sorting",
4
+ "id": 0
5
+ },
6
+ {
7
+ "answer": "String, Stack",
8
+ "id": 1
9
+ },
10
+ {
11
+ "answer": "Math",
12
+ "id": 2
13
+ },
14
+ {
15
+ "answer": "Array, Math",
16
+ "id": 3
17
+ },
18
+ {
19
+ "answer": "Math, Array",
20
+ "id": 4
21
+ },
22
+ {
23
+ "answer": "Array",
24
+ "id": 5
25
+ },
26
+ {
27
+ "answer": "String, Stack",
28
+ "id": 6
29
+ },
30
+ {
31
+ "answer": "String, Array, Search",
32
+ "id": 7
33
+ },
34
+ {
35
+ "answer": "Math, Array",
36
+ "id": 8
37
+ },
38
+ {
39
+ "answer": "Array, Stack",
40
+ "id": 9
41
+ },
42
+ {
43
+ "answer": "String, Search",
44
+ "id": 10
45
+ },
46
+ {
47
+ "answer": "String, Math",
48
+ "id": 11
49
+ },
50
+ {
51
+ "answer": "String, Array",
52
+ "id": 12
53
+ },
54
+ {
55
+ "answer": "Math",
56
+ "id": 13
57
+ },
58
+ {
59
+ "answer": "String, Array",
60
+ "id": 14
61
+ },
62
+ {
63
+ "answer": "String, Math",
64
+ "id": 15
65
+ },
66
+ {
67
+ "answer": "String, Hash table",
68
+ "id": 16
69
+ },
70
+ {
71
+ "answer": "String, Array",
72
+ "id": 17
73
+ },
74
+ {
75
+ "answer": "String, Search",
76
+ "id": 18
77
+ },
78
+ {
79
+ "answer": "String, Sorting",
80
+ "id": 19
81
+ },
82
+ {
83
+ "answer": "Array, Sorting",
84
+ "id": 20
85
+ },
86
+ {
87
+ "answer": "Array, Math",
88
+ "id": 21
89
+ },
90
+ {
91
+ "answer": "Array, Search",
92
+ "id": 22
93
+ },
94
+ {
95
+ "answer": "String",
96
+ "id": 23
97
+ },
98
+ {
99
+ "answer": "Math",
100
+ "id": 24
101
+ },
102
+ {
103
+ "answer": "Math, Array",
104
+ "id": 25
105
+ },
106
+ {
107
+ "answer": "Array, Hash table",
108
+ "id": 26
109
+ },
110
+ {
111
+ "answer": "String",
112
+ "id": 27
113
+ },
114
+ {
115
+ "answer": "String",
116
+ "id": 28
117
+ },
118
+ {
119
+ "answer": "String, Array",
120
+ "id": 29
121
+ },
122
+ {
123
+ "answer": "Array",
124
+ "id": 30
125
+ },
126
+ {
127
+ "answer": "Math",
128
+ "id": 31
129
+ },
130
+ {
131
+ "answer": "Math, Search",
132
+ "id": 32
133
+ },
134
+ {
135
+ "answer": "Array, Sorting",
136
+ "id": 33
137
+ },
138
+ {
139
+ "answer": "Array, Sorting",
140
+ "id": 34
141
+ },
142
+ {
143
+ "answer": "Math, Array",
144
+ "id": 35
145
+ },
146
+ {
147
+ "answer": "Math, Search",
148
+ "id": 36
149
+ },
150
+ {
151
+ "answer": "Array, Sorting",
152
+ "id": 37
153
+ },
154
+ {
155
+ "answer": "String, Array",
156
+ "id": 38
157
+ },
158
+ {
159
+ "answer": "Math, Search",
160
+ "id": 39
161
+ },
162
+ {
163
+ "answer": "Array, Search",
164
+ "id": 40
165
+ },
166
+ {
167
+ "answer": "Math, Array",
168
+ "id": 41
169
+ },
170
+ {
171
+ "answer": "Array, Math",
172
+ "id": 42
173
+ },
174
+ {
175
+ "answer": "Array, Hash table",
176
+ "id": 43
177
+ },
178
+ {
179
+ "answer": "Math, String",
180
+ "id": 44
181
+ },
182
+ {
183
+ "answer": "Math",
184
+ "id": 45
185
+ },
186
+ {
187
+ "answer": "Math, Array",
188
+ "id": 46
189
+ },
190
+ {
191
+ "answer": "Array, Sorting",
192
+ "id": 47
193
+ },
194
+ {
195
+ "answer": "String",
196
+ "id": 48
197
+ },
198
+ {
199
+ "answer": "Math",
200
+ "id": 49
201
+ },
202
+ {
203
+ "answer": "String, Math",
204
+ "id": 50
205
+ },
206
+ {
207
+ "answer": "String",
208
+ "id": 51
209
+ },
210
+ {
211
+ "answer": "Array, Search",
212
+ "id": 52
213
+ },
214
+ {
215
+ "answer": "Math",
216
+ "id": 53
217
+ },
218
+ {
219
+ "answer": "String, Hash table",
220
+ "id": 54
221
+ },
222
+ {
223
+ "answer": "Math",
224
+ "id": 55
225
+ },
226
+ {
227
+ "answer": "String, Stack",
228
+ "id": 56
229
+ },
230
+ {
231
+ "answer": "Array, Sorting",
232
+ "id": 57
233
+ },
234
+ {
235
+ "answer": "Array, Sorting",
236
+ "id": 58
237
+ },
238
+ {
239
+ "answer": "Math, Search",
240
+ "id": 59
241
+ },
242
+ {
243
+ "answer": "Math",
244
+ "id": 60
245
+ },
246
+ {
247
+ "answer": "String, Stack",
248
+ "id": 61
249
+ },
250
+ {
251
+ "answer": "Array, Math",
252
+ "id": 62
253
+ },
254
+ {
255
+ "answer": "Math, Array",
256
+ "id": 63
257
+ },
258
+ {
259
+ "answer": "String",
260
+ "id": 64
261
+ },
262
+ {
263
+ "answer": "String, Math",
264
+ "id": 65
265
+ },
266
+ {
267
+ "answer": "String, Math",
268
+ "id": 66
269
+ },
270
+ {
271
+ "answer": "String, Math",
272
+ "id": 67
273
+ },
274
+ {
275
+ "answer": "Array, Search",
276
+ "id": 68
277
+ },
278
+ {
279
+ "answer": "Array, Hash table",
280
+ "id": 69
281
+ },
282
+ {
283
+ "answer": "Array, Sorting",
284
+ "id": 70
285
+ },
286
+ {
287
+ "answer": "Math, Array",
288
+ "id": 71
289
+ },
290
+ {
291
+ "answer": "Array, Math",
292
+ "id": 72
293
+ },
294
+ {
295
+ "answer": "Array, Sorting",
296
+ "id": 73
297
+ },
298
+ {
299
+ "answer": "String, Array",
300
+ "id": 74
301
+ },
302
+ {
303
+ "answer": "Math, Hash table",
304
+ "id": 75
305
+ },
306
+ {
307
+ "answer": "Math",
308
+ "id": 76
309
+ },
310
+ {
311
+ "answer": "Math",
312
+ "id": 77
313
+ },
314
+ {
315
+ "answer": "String, Hash table",
316
+ "id": 78
317
+ },
318
+ {
319
+ "answer": "String, Math",
320
+ "id": 79
321
+ },
322
+ {
323
+ "answer": "String, Search",
324
+ "id": 80
325
+ },
326
+ {
327
+ "answer": "Array, Sorting",
328
+ "id": 81
329
+ },
330
+ {
331
+ "answer": "String, Math",
332
+ "id": 82
333
+ },
334
+ {
335
+ "answer": "Math, Array",
336
+ "id": 83
337
+ },
338
+ {
339
+ "answer": "Math, String",
340
+ "id": 84
341
+ },
342
+ {
343
+ "answer": "Array, Math",
344
+ "id": 85
345
+ },
346
+ {
347
+ "answer": "String, Sorting",
348
+ "id": 86
349
+ },
350
+ {
351
+ "answer": "Array, Search, Sorting",
352
+ "id": 87
353
+ },
354
+ {
355
+ "answer": "Array, Sorting",
356
+ "id": 88
357
+ },
358
+ {
359
+ "answer": "String, Math",
360
+ "id": 89
361
+ },
362
+ {
363
+ "answer": "Array, Sorting",
364
+ "id": 90
365
+ },
366
+ {
367
+ "answer": "String, Search",
368
+ "id": 91
369
+ },
370
+ {
371
+ "answer": "Math, Array",
372
+ "id": 92
373
+ },
374
+ {
375
+ "answer": "String, Hash table",
376
+ "id": 93
377
+ },
378
+ {
379
+ "answer": "Array, Math",
380
+ "id": 94
381
+ },
382
+ {
383
+ "answer": "String, Hash table",
384
+ "id": 95
385
+ },
386
+ {
387
+ "answer": "Math, Array",
388
+ "id": 96
389
+ },
390
+ {
391
+ "answer": "Math",
392
+ "id": 97
393
+ },
394
+ {
395
+ "answer": "String, Array",
396
+ "id": 98
397
+ },
398
+ {
399
+ "answer": "String, Math",
400
+ "id": 99
401
+ },
402
+ {
403
+ "answer": "Array, Math",
404
+ "id": 100
405
+ },
406
+ {
407
+ "answer": "String, Array",
408
+ "id": 101
409
+ },
410
+ {
411
+ "answer": "Math, Search",
412
+ "id": 102
413
+ },
414
+ {
415
+ "answer": "Math, String",
416
+ "id": 103
417
+ },
418
+ {
419
+ "answer": "Array, Sorting",
420
+ "id": 104
421
+ },
422
+ {
423
+ "answer": "Array, Sorting",
424
+ "id": 105
425
+ },
426
+ {
427
+ "answer": "Array, Math",
428
+ "id": 106
429
+ },
430
+ {
431
+ "answer": "Math, Search",
432
+ "id": 107
433
+ },
434
+ {
435
+ "answer": "Array, Math",
436
+ "id": 108
437
+ },
438
+ {
439
+ "answer": "Array, Sorting",
440
+ "id": 109
441
+ },
442
+ {
443
+ "answer": "Array, String",
444
+ "id": 110
445
+ },
446
+ {
447
+ "answer": "String, Hash table",
448
+ "id": 111
449
+ },
450
+ {
451
+ "answer": "String, Sorting",
452
+ "id": 112
453
+ },
454
+ {
455
+ "answer": "String, Array",
456
+ "id": 113
457
+ },
458
+ {
459
+ "answer": "Array, Math",
460
+ "id": 114
461
+ },
462
+ {
463
+ "answer": "Array, Math",
464
+ "id": 115
465
+ },
466
+ {
467
+ "answer": "Array, Sorting",
468
+ "id": 116
469
+ },
470
+ {
471
+ "answer": "String, Array",
472
+ "id": 117
473
+ },
474
+ {
475
+ "answer": "String, Search",
476
+ "id": 118
477
+ },
478
+ {
479
+ "answer": "String, Stack",
480
+ "id": 119
481
+ },
482
+ {
483
+ "answer": "Array, Sorting",
484
+ "id": 120
485
+ },
486
+ {
487
+ "answer": "Array, Math",
488
+ "id": 121
489
+ },
490
+ {
491
+ "answer": "Array, Math",
492
+ "id": 122
493
+ },
494
+ {
495
+ "answer": "Array, Sorting",
496
+ "id": 123
497
+ },
498
+ {
499
+ "answer": "String, Math",
500
+ "id": 124
501
+ },
502
+ {
503
+ "answer": "String, Math",
504
+ "id": 125
505
+ },
506
+ {
507
+ "answer": "Array, Sorting",
508
+ "id": 126
509
+ },
510
+ {
511
+ "answer": "Math, Array",
512
+ "id": 127
513
+ },
514
+ {
515
+ "answer": "Array, Math",
516
+ "id": 128
517
+ },
518
+ {
519
+ "answer": "Array, Search, Matrix",
520
+ "id": 129
521
+ },
522
+ {
523
+ "answer": "Math, Array",
524
+ "id": 130
525
+ },
526
+ {
527
+ "answer": "Math, String",
528
+ "id": 131
529
+ },
530
+ {
531
+ "answer": "String, Stack",
532
+ "id": 132
533
+ },
534
+ {
535
+ "answer": "Array, Math",
536
+ "id": 133
537
+ },
538
+ {
539
+ "answer": "String, Search",
540
+ "id": 134
541
+ },
542
+ {
543
+ "answer": "Array, Search",
544
+ "id": 135
545
+ },
546
+ {
547
+ "answer": "Array, Search",
548
+ "id": 136
549
+ },
550
+ {
551
+ "answer": "String, Math",
552
+ "id": 137
553
+ },
554
+ {
555
+ "answer": "Math",
556
+ "id": 138
557
+ },
558
+ {
559
+ "answer": "Math, Array",
560
+ "id": 139
561
+ },
562
+ {
563
+ "answer": "String, Array",
564
+ "id": 140
565
+ },
566
+ {
567
+ "answer": "String, Hash table",
568
+ "id": 141
569
+ },
570
+ {
571
+ "answer": "Array, Math",
572
+ "id": 142
573
+ },
574
+ {
575
+ "answer": "String, Math",
576
+ "id": 143
577
+ },
578
+ {
579
+ "answer": "String, Math",
580
+ "id": 144
581
+ },
582
+ {
583
+ "answer": "Array, Sorting",
584
+ "id": 145
585
+ },
586
+ {
587
+ "answer": "Array, Math",
588
+ "id": 146
589
+ },
590
+ {
591
+ "answer": "Array, Math",
592
+ "id": 147
593
+ },
594
+ {
595
+ "answer": "String, Array, Sorting",
596
+ "id": 148
597
+ },
598
+ {
599
+ "answer": "String, Sorting",
600
+ "id": 149
601
+ },
602
+ {
603
+ "answer": "Math, Search",
604
+ "id": 150
605
+ },
606
+ {
607
+ "answer": "Array, Math",
608
+ "id": 151
609
+ },
610
+ {
611
+ "answer": "Array, Math",
612
+ "id": 152
613
+ },
614
+ {
615
+ "answer": "String, Array, Sorting",
616
+ "id": 153
617
+ },
618
+ {
619
+ "answer": "String, Search",
620
+ "id": 154
621
+ },
622
+ {
623
+ "answer": "String, Math",
624
+ "id": 155
625
+ },
626
+ {
627
+ "answer": "String, Math",
628
+ "id": 156
629
+ },
630
+ {
631
+ "answer": "Math, Sorting",
632
+ "id": 157
633
+ },
634
+ {
635
+ "answer": "String, Array",
636
+ "id": 158
637
+ },
638
+ {
639
+ "answer": "Array, Math",
640
+ "id": 159
641
+ },
642
+ {
643
+ "answer": "String, Array, Math",
644
+ "id": 160
645
+ },
646
+ {
647
+ "answer": "String, Array",
648
+ "id": 161
649
+ },
650
+ {
651
+ "answer": "String, Hash table",
652
+ "id": 162
653
+ },
654
+ {
655
+ "answer": "Array, Math",
656
+ "id": 163
657
+ }
658
+ ]
dividing_into_different_subsets/3/QS/line_counts_QS.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,line_subset_1,line_subset_2,line_subset_3
2
+ CodeFuse-DeepSeek-33b,81.82,72.22,76.36
3
+ Nxcode-CQ-7B,92.09,88.33,81.45
4
+ codegemma-2b,44.09,17.5,19.64
5
+ codegemma-7b,52.45,35.19,31.64
6
+ codegemma-7b-it,66.36,49.26,43.73
7
+ deepseek-coder-1.3b-base,47.45,26.39,23.0
8
+ deepseek-coder-6.7b-base,63.36,39.35,34.18
9
+ deepseek_coder-6.7b-instruct,85.0,66.85,62.82
10
+ deepseek_coder_33b-base,68.0,48.89,41.27
11
+ deepseek_coder_33b-instruct,82.09,62.31,53.91
12
+ codeqwen1.5-7b,59.73,48.7,45.64
dividing_into_different_subsets/3/QS/test.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import csv
4
+ #用来计算数据集中不同问题种类对应的pass@k的平均值
5
+ input_dir = 'E:\python-testn\pythonProject3\hh_1\evaluate_result'
6
+
7
+ # 获取目录中的所有文件
8
+ files = os.listdir(input_dir)
9
+
10
+ with open("cata_result.csv", "w", newline='') as csvfile:
11
+ writer = csv.writer(csvfile)
12
+ writer.writerow(["Model", "String", "Math","Array","Sorting","Hash Table","Stack","Search","Matrix"])
13
+
14
+
15
+ for file_name in files:
16
+ # 构建完整的文件路径
17
+ input_file_path = os.path.join(input_dir, file_name)
18
+ first_underscore_index = file_name.find('_')
19
+
20
+ # 找到最后一个 - 的位置
21
+ last_dash_index = file_name.rfind('-')
22
+ model_name = file_name[first_underscore_index + 1:last_dash_index]
23
+ print(model_name)
24
+ with open(input_file_path, "r", encoding="utf-8") as file:
25
+ data1 = json.load(file)
26
+
27
+ with open("humaneval_with_cata.json","r",encoding="utf-8") as file:
28
+ data2=json.load(file)
29
+ sum0=0
30
+ count0=0
31
+ sum1=0
32
+ count1=0
33
+ sum2=0
34
+ count2=0
35
+ sum3=0
36
+ count3=0
37
+ sum4=0
38
+ count4=0
39
+ sum5=0
40
+ count5=0
41
+ sum6=0
42
+ count6=0
43
+ sum7=0
44
+ count7=0
45
+ for (item1,item2) in zip(data1["humaneval"]["pass@1"],data2):
46
+
47
+
48
+ if "String" in item2["answer"]:
49
+ index, value = item1
50
+ sum0=sum0+value
51
+ count0=count0+1
52
+
53
+ if "Math" in item2["answer"]:
54
+ index, value = item1
55
+ sum1=sum1+value
56
+ count1=count1+1
57
+
58
+ if "Array" in item2["answer"]:
59
+ index, value = item1
60
+ sum2=sum2+value
61
+ count2=count2+1
62
+ if "Sorting" in item2["answer"]:
63
+ index, value = item1
64
+ sum3=sum3+value
65
+ count3=count3+1
66
+ if "Hash table" in item2["answer"]:
67
+ index, value = item1
68
+ sum4 = sum4 + value
69
+ count4 = count4 + 1
70
+
71
+ if "Stack" in item2["answer"]:
72
+ index, value = item1
73
+ sum5=sum5+value
74
+ count5=count5+1
75
+
76
+ if "Search" in item2["answer"]:
77
+ index, value = item1
78
+ sum6=sum6+value
79
+ count6=count6+1
80
+
81
+ if "Matrix" in item2["answer"]:
82
+ index, value = item1
83
+ sum7=sum7+value
84
+ count7=count7+1
85
+
86
+ mean0=round(sum0/count0*100,2)
87
+ mean1=round(sum1/count1*100,2)
88
+ mean2=round(sum2/count2*100,2)
89
+ mean3=round(sum3/count3*100,2)
90
+ mean4=round(sum4/count4*100,2)
91
+ mean5=round(sum5/count5*100,2)
92
+ mean6=round(sum6/count6*100,2)
93
+ mean7=round(sum7/count7*100,2)
94
+ print(count0,count1,count2,count3,count4,count5,count6,count7)
95
+ print(mean0,mean1,mean2,mean3,mean4,mean5,mean6,mean7)
96
+ with open("cata_result.csv", mode='a', newline='', encoding='utf-8') as file:
97
+ writer = csv.writer(file)
98
+ writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5,mean6,mean7])
99
+
100
+
dividing_into_different_subsets/3/QS/token_counts_QS.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,token_subset_1,token_subset_2,token_subset_3
2
+ CodeFuse-DeepSeek-33b,72.73,88.89,69.09
3
+ Nxcode-CQ-7B,90.73,87.04,84.09
4
+ codegemma-2b,42.45,27.13,11.82
5
+ codegemma-7b,54.18,39.81,25.36
6
+ codegemma-7b-it,69.45,51.2,38.73
7
+ deepseek-coder-1.3b-base,50.18,33.8,13.0
8
+ deepseek-coder-6.7b-base,63.91,48.06,25.09
9
+ deepseek_coder-6.7b-instruct,83.64,69.72,61.36
10
+ deepseek_coder_33b-base,66.73,56.85,34.73
11
+ deepseek_coder_33b-instruct,80.36,67.69,50.36
12
+ codeqwen1.5-7b,65.0,51.76,37.36
dividing_into_different_subsets/4/QS/CC_QS.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,CC_subset_1,CC_subset_2,CC_subset_3,CC_subset_4
2
+ CodeFuse-DeepSeek-33b,85.37,78.05,75.61,68.29
3
+ Nxcode-CQ-7B,89.51,80.49,90.24,88.9
4
+ codegemma-2b,47.32,23.17,25.73,12.32
5
+ codegemma-7b,59.02,37.2,42.32,20.61
6
+ codegemma-7b-it,63.05,54.76,52.56,42.2
7
+ deepseek-coder-1.3b-base,55.0,29.76,30.0,14.51
8
+ deepseek-coder-6.7b-base,66.22,40.98,50.37,25.12
9
+ deepseek_coder-6.7b-instruct,78.29,70.0,73.54,64.51
10
+ deepseek_coder_33b-base,65.85,53.05,53.17,38.9
11
+ deepseek_coder_33b-instruct,74.88,65.98,68.05,55.61
12
+ codeqwen1.5-7b,65.24,47.8,54.51,37.93
dividing_into_different_subsets/4/QS/QS.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/4/QS/calculate_humaneval_result.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import csv
4
+ # 定义文件所在的目录
5
+ input_dir = 'E:\python-testn\pythonProject3\hh_1\evaluate_result'
6
+
7
+ # 获取目录中的所有文件
8
+ files = os.listdir(input_dir)
9
+
10
+ # with open("token_counts_QS.csv","w", newline='') as csvfile:
11
+ # writer = csv.writer(csvfile)
12
+ # writer.writerow(["Model", "token_subset_1", "token_subset_2","token_subset_3","token_subset_4"])
13
+
14
+
15
+ with open("line_counts_QS.csv","w", newline='') as csvfile:
16
+ writer = csv.writer(csvfile)
17
+ writer.writerow(["Model", "line_subset_1", "line_subset_2","line_subset_3","line_subset_4"])
18
+ #
19
+ # with open("CC_QS.csv","w", newline='') as csvfile:
20
+ # writer = csv.writer(csvfile)
21
+ # writer.writerow(["Model", "CC_subset_1", "CC_subset_2","CC_subset_3","CC_subset_4"])
22
+
23
+
24
+
25
+ for file_name in files:
26
+ # 构建完整的文件路径
27
+ input_file_path = os.path.join(input_dir, file_name)
28
+ first_underscore_index = file_name.find('_')
29
+
30
+ # 找到最后一个 - 的位置
31
+ last_dash_index = file_name.rfind('-')
32
+ model_name = file_name[first_underscore_index + 1:last_dash_index]
33
+ print(model_name)
34
+ with open(input_file_path,"r",encoding="utf-8") as file:
35
+ data1=json.load(file)
36
+
37
+ with open("QS.json","r",encoding="utf-8") as file:
38
+ data2=json.load(file)
39
+ sum0=0
40
+ count0=0
41
+ sum1=0
42
+ count1=0
43
+ sum2=0
44
+ count2=0
45
+ sum3 = 0
46
+ count3 = 0
47
+
48
+
49
+ for (item1,item2) in zip(data1["humaneval"]["pass@1"],data2):
50
+ # #按照token个数划分后的评估结果
51
+ # if item2["token_diff"] == 0:
52
+ # index, value = item1
53
+ # print(item2["token_diff"],index,value)
54
+ # sum0=sum0+value
55
+ # count0=count0+1
56
+ # if item2["token_diff"] == 1:
57
+ # index, value = item1
58
+ # print(item2["token_diff"], index, value)
59
+ # sum1=sum1+value
60
+ # count1=count1+1
61
+ # if item2["token_diff"] == 2:
62
+ # index, value = item1
63
+ # print(item2["token_diff"], index, value)
64
+ # sum2=sum2+value
65
+ # count2=count2+1
66
+ # if item2["token_diff"] == 3:
67
+ # index, value = item1
68
+ # print(item2["token_diff"], index, value)
69
+ # sum3=sum3+value
70
+ # count3=count3+1
71
+
72
+
73
+ #按照行数划分后的评估结果
74
+ if item2["line_diff"] == 0:
75
+ index, value = item1
76
+ print(item2["line_diff"],index,value)
77
+ sum0=sum0+value
78
+ count0=count0+1
79
+ if item2["line_diff"] == 1:
80
+ index, value = item1
81
+ print(item2["line_diff"], index, value)
82
+ sum1=sum1+value
83
+ count1=count1+1
84
+ if item2["line_diff"] == 2:
85
+ index, value = item1
86
+ print(item2["line_diff"], index, value)
87
+ sum2=sum2+value
88
+ count2=count2+1
89
+ if item2["line_diff"] == 3:
90
+ index, value = item1
91
+ print(item2["line_diff"], index, value)
92
+ sum3=sum3+value
93
+ count3=count3+1
94
+
95
+ #按照圈复杂度划分后的评估结果
96
+ # if item2["CC_diff"] == 0:
97
+ # index, value = item1
98
+ # print(item2["CC_diff"],index,value)
99
+ # sum0=sum0+value
100
+ # count0=count0+1
101
+ # if item2["CC_diff"] == 1:
102
+ # index, value = item1
103
+ # print(item2["CC_diff"], index, value)
104
+ # sum1=sum1+value
105
+ # count1=count1+1
106
+ # if item2["CC_diff"] == 2:
107
+ # index, value = item1
108
+ # print(item2["CC_diff"], index, value)
109
+ # sum2=sum2+value
110
+ # count2=count2+1
111
+ # if item2["CC_diff"] == 3 :
112
+ # index, value = item1
113
+ # print(item2["CC_diff"], index, value)
114
+ # sum3=sum3+value
115
+ # count3=count3+1
116
+
117
+
118
+
119
+ mean0=round(sum0/count0*100,2)
120
+
121
+ mean1=round(sum1/count1*100,2)
122
+ mean2=round(sum2/count2*100,2)
123
+ mean3=round(sum3/count3*100,2)
124
+ print("count_result!!")
125
+ print(count0,count1,count2,count3)
126
+ print(mean0,mean1,mean2,mean3)
127
+ # with open("token_counts_QS.csv", mode='a', newline='', encoding='utf-8') as file:
128
+ # writer = csv.writer(file)
129
+ # writer.writerow([model_name,mean0,mean1,mean2])
130
+
131
+ with open("line_counts_QS.csv", mode='a', newline='', encoding='utf-8') as file:
132
+ writer = csv.writer(file)
133
+ writer.writerow([model_name,mean0,mean1,mean2,mean3])
134
+
135
+ # with open("CC_QS.csv", mode='a', newline='', encoding='utf-8') as file:
136
+ # writer = csv.writer(file)
137
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3])
138
+
139
+
dividing_into_different_subsets/4/QS/even.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ with open("humaneval_new.json","r",encoding="utf-8") as f:
3
+ data = json.load(f)
4
+ line_counts=[25,25,25,25]
5
+ line_counts_I=line_counts[0]*0.01*164
6
+ line_counts_II=line_counts[1]*0.01*164
7
+ line_counts_III=line_counts[2]*0.01*164
8
+ line_counts_IV=line_counts[3]*0.01*164
9
+
10
+ token_counts=[25,25,25,25]
11
+ token_counts_I=token_counts[0]*0.01*164
12
+ token_counts_II=token_counts[1]*0.01*164
13
+ token_counts_III=token_counts[2]*0.01*164
14
+ token_counts_IV=token_counts[3]*0.01*164
15
+
16
+
17
+
18
+ cyclomatic_complexity=[25,25,25,25]
19
+ cyclomatic_complexity_I=cyclomatic_complexity[0]*0.01*164
20
+ cyclomatic_complexity_II=cyclomatic_complexity[1]*0.01*164
21
+ cyclomatic_complexity_III=cyclomatic_complexity[2]*0.01*164
22
+ cyclomatic_complexity_IV=cyclomatic_complexity[3]*0.01*164
23
+
24
+
25
+ data.sort(key=lambda x: x['line'])
26
+ for i, item in enumerate(data):
27
+ if i < line_counts_I:
28
+ item['line_diff'] = 0
29
+ elif i <line_counts_I+line_counts_II:
30
+ item['line_diff'] = 1
31
+ elif i <line_counts_I+line_counts_II+line_counts_III:
32
+ item['line_diff'] = 2
33
+ else:
34
+ item["line_diff"]=3
35
+
36
+ data.sort(key=lambda x: x['token'])
37
+ for i, item in enumerate(data):
38
+ if i < token_counts_I:
39
+ item['token_diff'] = 0
40
+ elif i < token_counts_I + token_counts_II:
41
+ item['token_diff'] = 1
42
+ elif i < token_counts_I + token_counts_II+token_counts_III:
43
+ item['token_diff'] = 2
44
+ else:
45
+ item['token_diff'] = 3
46
+
47
+ data.sort(key=lambda x: x['cyclomatic_complexity'])
48
+ for i, item in enumerate(data):
49
+ if i < cyclomatic_complexity_I:
50
+ item['CC_diff'] = 0
51
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II:
52
+ item['CC_diff'] = 1
53
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III:
54
+ item['CC_diff'] = 2
55
+
56
+ else:
57
+ item['CC_diff'] = 3
58
+
59
+
60
+
61
+ data.sort(key=lambda x: x['id'])
62
+ # 将更新后的数据写回JSON文件
63
+ with open('QS.json', 'w', encoding='utf-8') as file:
64
+ json.dump(data, file, ensure_ascii=False, indent=4)
65
+
dividing_into_different_subsets/4/QS/humaneval_new.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/4/QS/humaneval_with_cata.json ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "answer": "Array, Sorting",
4
+ "id": 0
5
+ },
6
+ {
7
+ "answer": "String, Stack",
8
+ "id": 1
9
+ },
10
+ {
11
+ "answer": "Math",
12
+ "id": 2
13
+ },
14
+ {
15
+ "answer": "Array, Math",
16
+ "id": 3
17
+ },
18
+ {
19
+ "answer": "Math, Array",
20
+ "id": 4
21
+ },
22
+ {
23
+ "answer": "Array",
24
+ "id": 5
25
+ },
26
+ {
27
+ "answer": "String, Stack",
28
+ "id": 6
29
+ },
30
+ {
31
+ "answer": "String, Array, Search",
32
+ "id": 7
33
+ },
34
+ {
35
+ "answer": "Math, Array",
36
+ "id": 8
37
+ },
38
+ {
39
+ "answer": "Array, Stack",
40
+ "id": 9
41
+ },
42
+ {
43
+ "answer": "String, Search",
44
+ "id": 10
45
+ },
46
+ {
47
+ "answer": "String, Math",
48
+ "id": 11
49
+ },
50
+ {
51
+ "answer": "String, Array",
52
+ "id": 12
53
+ },
54
+ {
55
+ "answer": "Math",
56
+ "id": 13
57
+ },
58
+ {
59
+ "answer": "String, Array",
60
+ "id": 14
61
+ },
62
+ {
63
+ "answer": "String, Math",
64
+ "id": 15
65
+ },
66
+ {
67
+ "answer": "String, Hash table",
68
+ "id": 16
69
+ },
70
+ {
71
+ "answer": "String, Array",
72
+ "id": 17
73
+ },
74
+ {
75
+ "answer": "String, Search",
76
+ "id": 18
77
+ },
78
+ {
79
+ "answer": "String, Sorting",
80
+ "id": 19
81
+ },
82
+ {
83
+ "answer": "Array, Sorting",
84
+ "id": 20
85
+ },
86
+ {
87
+ "answer": "Array, Math",
88
+ "id": 21
89
+ },
90
+ {
91
+ "answer": "Array, Search",
92
+ "id": 22
93
+ },
94
+ {
95
+ "answer": "String",
96
+ "id": 23
97
+ },
98
+ {
99
+ "answer": "Math",
100
+ "id": 24
101
+ },
102
+ {
103
+ "answer": "Math, Array",
104
+ "id": 25
105
+ },
106
+ {
107
+ "answer": "Array, Hash table",
108
+ "id": 26
109
+ },
110
+ {
111
+ "answer": "String",
112
+ "id": 27
113
+ },
114
+ {
115
+ "answer": "String",
116
+ "id": 28
117
+ },
118
+ {
119
+ "answer": "String, Array",
120
+ "id": 29
121
+ },
122
+ {
123
+ "answer": "Array",
124
+ "id": 30
125
+ },
126
+ {
127
+ "answer": "Math",
128
+ "id": 31
129
+ },
130
+ {
131
+ "answer": "Math, Search",
132
+ "id": 32
133
+ },
134
+ {
135
+ "answer": "Array, Sorting",
136
+ "id": 33
137
+ },
138
+ {
139
+ "answer": "Array, Sorting",
140
+ "id": 34
141
+ },
142
+ {
143
+ "answer": "Math, Array",
144
+ "id": 35
145
+ },
146
+ {
147
+ "answer": "Math, Search",
148
+ "id": 36
149
+ },
150
+ {
151
+ "answer": "Array, Sorting",
152
+ "id": 37
153
+ },
154
+ {
155
+ "answer": "String, Array",
156
+ "id": 38
157
+ },
158
+ {
159
+ "answer": "Math, Search",
160
+ "id": 39
161
+ },
162
+ {
163
+ "answer": "Array, Search",
164
+ "id": 40
165
+ },
166
+ {
167
+ "answer": "Math, Array",
168
+ "id": 41
169
+ },
170
+ {
171
+ "answer": "Array, Math",
172
+ "id": 42
173
+ },
174
+ {
175
+ "answer": "Array, Hash table",
176
+ "id": 43
177
+ },
178
+ {
179
+ "answer": "Math, String",
180
+ "id": 44
181
+ },
182
+ {
183
+ "answer": "Math",
184
+ "id": 45
185
+ },
186
+ {
187
+ "answer": "Math, Array",
188
+ "id": 46
189
+ },
190
+ {
191
+ "answer": "Array, Sorting",
192
+ "id": 47
193
+ },
194
+ {
195
+ "answer": "String",
196
+ "id": 48
197
+ },
198
+ {
199
+ "answer": "Math",
200
+ "id": 49
201
+ },
202
+ {
203
+ "answer": "String, Math",
204
+ "id": 50
205
+ },
206
+ {
207
+ "answer": "String",
208
+ "id": 51
209
+ },
210
+ {
211
+ "answer": "Array, Search",
212
+ "id": 52
213
+ },
214
+ {
215
+ "answer": "Math",
216
+ "id": 53
217
+ },
218
+ {
219
+ "answer": "String, Hash table",
220
+ "id": 54
221
+ },
222
+ {
223
+ "answer": "Math",
224
+ "id": 55
225
+ },
226
+ {
227
+ "answer": "String, Stack",
228
+ "id": 56
229
+ },
230
+ {
231
+ "answer": "Array, Sorting",
232
+ "id": 57
233
+ },
234
+ {
235
+ "answer": "Array, Sorting",
236
+ "id": 58
237
+ },
238
+ {
239
+ "answer": "Math, Search",
240
+ "id": 59
241
+ },
242
+ {
243
+ "answer": "Math",
244
+ "id": 60
245
+ },
246
+ {
247
+ "answer": "String, Stack",
248
+ "id": 61
249
+ },
250
+ {
251
+ "answer": "Array, Math",
252
+ "id": 62
253
+ },
254
+ {
255
+ "answer": "Math, Array",
256
+ "id": 63
257
+ },
258
+ {
259
+ "answer": "String",
260
+ "id": 64
261
+ },
262
+ {
263
+ "answer": "String, Math",
264
+ "id": 65
265
+ },
266
+ {
267
+ "answer": "String, Math",
268
+ "id": 66
269
+ },
270
+ {
271
+ "answer": "String, Math",
272
+ "id": 67
273
+ },
274
+ {
275
+ "answer": "Array, Search",
276
+ "id": 68
277
+ },
278
+ {
279
+ "answer": "Array, Hash table",
280
+ "id": 69
281
+ },
282
+ {
283
+ "answer": "Array, Sorting",
284
+ "id": 70
285
+ },
286
+ {
287
+ "answer": "Math, Array",
288
+ "id": 71
289
+ },
290
+ {
291
+ "answer": "Array, Math",
292
+ "id": 72
293
+ },
294
+ {
295
+ "answer": "Array, Sorting",
296
+ "id": 73
297
+ },
298
+ {
299
+ "answer": "String, Array",
300
+ "id": 74
301
+ },
302
+ {
303
+ "answer": "Math, Hash table",
304
+ "id": 75
305
+ },
306
+ {
307
+ "answer": "Math",
308
+ "id": 76
309
+ },
310
+ {
311
+ "answer": "Math",
312
+ "id": 77
313
+ },
314
+ {
315
+ "answer": "String, Hash table",
316
+ "id": 78
317
+ },
318
+ {
319
+ "answer": "String, Math",
320
+ "id": 79
321
+ },
322
+ {
323
+ "answer": "String, Search",
324
+ "id": 80
325
+ },
326
+ {
327
+ "answer": "Array, Sorting",
328
+ "id": 81
329
+ },
330
+ {
331
+ "answer": "String, Math",
332
+ "id": 82
333
+ },
334
+ {
335
+ "answer": "Math, Array",
336
+ "id": 83
337
+ },
338
+ {
339
+ "answer": "Math, String",
340
+ "id": 84
341
+ },
342
+ {
343
+ "answer": "Array, Math",
344
+ "id": 85
345
+ },
346
+ {
347
+ "answer": "String, Sorting",
348
+ "id": 86
349
+ },
350
+ {
351
+ "answer": "Array, Search, Sorting",
352
+ "id": 87
353
+ },
354
+ {
355
+ "answer": "Array, Sorting",
356
+ "id": 88
357
+ },
358
+ {
359
+ "answer": "String, Math",
360
+ "id": 89
361
+ },
362
+ {
363
+ "answer": "Array, Sorting",
364
+ "id": 90
365
+ },
366
+ {
367
+ "answer": "String, Search",
368
+ "id": 91
369
+ },
370
+ {
371
+ "answer": "Math, Array",
372
+ "id": 92
373
+ },
374
+ {
375
+ "answer": "String, Hash table",
376
+ "id": 93
377
+ },
378
+ {
379
+ "answer": "Array, Math",
380
+ "id": 94
381
+ },
382
+ {
383
+ "answer": "String, Hash table",
384
+ "id": 95
385
+ },
386
+ {
387
+ "answer": "Math, Array",
388
+ "id": 96
389
+ },
390
+ {
391
+ "answer": "Math",
392
+ "id": 97
393
+ },
394
+ {
395
+ "answer": "String, Array",
396
+ "id": 98
397
+ },
398
+ {
399
+ "answer": "String, Math",
400
+ "id": 99
401
+ },
402
+ {
403
+ "answer": "Array, Math",
404
+ "id": 100
405
+ },
406
+ {
407
+ "answer": "String, Array",
408
+ "id": 101
409
+ },
410
+ {
411
+ "answer": "Math, Search",
412
+ "id": 102
413
+ },
414
+ {
415
+ "answer": "Math, String",
416
+ "id": 103
417
+ },
418
+ {
419
+ "answer": "Array, Sorting",
420
+ "id": 104
421
+ },
422
+ {
423
+ "answer": "Array, Sorting",
424
+ "id": 105
425
+ },
426
+ {
427
+ "answer": "Array, Math",
428
+ "id": 106
429
+ },
430
+ {
431
+ "answer": "Math, Search",
432
+ "id": 107
433
+ },
434
+ {
435
+ "answer": "Array, Math",
436
+ "id": 108
437
+ },
438
+ {
439
+ "answer": "Array, Sorting",
440
+ "id": 109
441
+ },
442
+ {
443
+ "answer": "Array, String",
444
+ "id": 110
445
+ },
446
+ {
447
+ "answer": "String, Hash table",
448
+ "id": 111
449
+ },
450
+ {
451
+ "answer": "String, Sorting",
452
+ "id": 112
453
+ },
454
+ {
455
+ "answer": "String, Array",
456
+ "id": 113
457
+ },
458
+ {
459
+ "answer": "Array, Math",
460
+ "id": 114
461
+ },
462
+ {
463
+ "answer": "Array, Math",
464
+ "id": 115
465
+ },
466
+ {
467
+ "answer": "Array, Sorting",
468
+ "id": 116
469
+ },
470
+ {
471
+ "answer": "String, Array",
472
+ "id": 117
473
+ },
474
+ {
475
+ "answer": "String, Search",
476
+ "id": 118
477
+ },
478
+ {
479
+ "answer": "String, Stack",
480
+ "id": 119
481
+ },
482
+ {
483
+ "answer": "Array, Sorting",
484
+ "id": 120
485
+ },
486
+ {
487
+ "answer": "Array, Math",
488
+ "id": 121
489
+ },
490
+ {
491
+ "answer": "Array, Math",
492
+ "id": 122
493
+ },
494
+ {
495
+ "answer": "Array, Sorting",
496
+ "id": 123
497
+ },
498
+ {
499
+ "answer": "String, Math",
500
+ "id": 124
501
+ },
502
+ {
503
+ "answer": "String, Math",
504
+ "id": 125
505
+ },
506
+ {
507
+ "answer": "Array, Sorting",
508
+ "id": 126
509
+ },
510
+ {
511
+ "answer": "Math, Array",
512
+ "id": 127
513
+ },
514
+ {
515
+ "answer": "Array, Math",
516
+ "id": 128
517
+ },
518
+ {
519
+ "answer": "Array, Search, Matrix",
520
+ "id": 129
521
+ },
522
+ {
523
+ "answer": "Math, Array",
524
+ "id": 130
525
+ },
526
+ {
527
+ "answer": "Math, String",
528
+ "id": 131
529
+ },
530
+ {
531
+ "answer": "String, Stack",
532
+ "id": 132
533
+ },
534
+ {
535
+ "answer": "Array, Math",
536
+ "id": 133
537
+ },
538
+ {
539
+ "answer": "String, Search",
540
+ "id": 134
541
+ },
542
+ {
543
+ "answer": "Array, Search",
544
+ "id": 135
545
+ },
546
+ {
547
+ "answer": "Array, Search",
548
+ "id": 136
549
+ },
550
+ {
551
+ "answer": "String, Math",
552
+ "id": 137
553
+ },
554
+ {
555
+ "answer": "Math",
556
+ "id": 138
557
+ },
558
+ {
559
+ "answer": "Math, Array",
560
+ "id": 139
561
+ },
562
+ {
563
+ "answer": "String, Array",
564
+ "id": 140
565
+ },
566
+ {
567
+ "answer": "String, Hash table",
568
+ "id": 141
569
+ },
570
+ {
571
+ "answer": "Array, Math",
572
+ "id": 142
573
+ },
574
+ {
575
+ "answer": "String, Math",
576
+ "id": 143
577
+ },
578
+ {
579
+ "answer": "String, Math",
580
+ "id": 144
581
+ },
582
+ {
583
+ "answer": "Array, Sorting",
584
+ "id": 145
585
+ },
586
+ {
587
+ "answer": "Array, Math",
588
+ "id": 146
589
+ },
590
+ {
591
+ "answer": "Array, Math",
592
+ "id": 147
593
+ },
594
+ {
595
+ "answer": "String, Array, Sorting",
596
+ "id": 148
597
+ },
598
+ {
599
+ "answer": "String, Sorting",
600
+ "id": 149
601
+ },
602
+ {
603
+ "answer": "Math, Search",
604
+ "id": 150
605
+ },
606
+ {
607
+ "answer": "Array, Math",
608
+ "id": 151
609
+ },
610
+ {
611
+ "answer": "Array, Math",
612
+ "id": 152
613
+ },
614
+ {
615
+ "answer": "String, Array, Sorting",
616
+ "id": 153
617
+ },
618
+ {
619
+ "answer": "String, Search",
620
+ "id": 154
621
+ },
622
+ {
623
+ "answer": "String, Math",
624
+ "id": 155
625
+ },
626
+ {
627
+ "answer": "String, Math",
628
+ "id": 156
629
+ },
630
+ {
631
+ "answer": "Math, Sorting",
632
+ "id": 157
633
+ },
634
+ {
635
+ "answer": "String, Array",
636
+ "id": 158
637
+ },
638
+ {
639
+ "answer": "Array, Math",
640
+ "id": 159
641
+ },
642
+ {
643
+ "answer": "String, Array, Math",
644
+ "id": 160
645
+ },
646
+ {
647
+ "answer": "String, Array",
648
+ "id": 161
649
+ },
650
+ {
651
+ "answer": "String, Hash table",
652
+ "id": 162
653
+ },
654
+ {
655
+ "answer": "Array, Math",
656
+ "id": 163
657
+ }
658
+ ]
dividing_into_different_subsets/4/QS/line_counts_QS.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,line_subset_1,line_subset_2,line_subset_3,line_subset_4
2
+ CodeFuse-DeepSeek-33b,80.49,80.49,70.73,75.61
3
+ Nxcode-CQ-7B,91.71,89.39,85.37,82.68
4
+ codegemma-2b,49.39,23.78,16.83,18.54
5
+ codegemma-7b,59.15,35.37,33.9,30.73
6
+ codegemma-7b-it,72.32,49.76,49.88,40.61
7
+ deepseek-coder-1.3b-base,50.98,29.76,26.34,22.2
8
+ deepseek-coder-6.7b-base,68.29,46.71,33.41,34.27
9
+ deepseek_coder-6.7b-instruct,81.1,81.34,64.51,59.39
10
+ deepseek_coder_33b-base,75.0,50.0,47.2,38.78
11
+ deepseek_coder_33b-instruct,84.15,69.27,59.15,51.95
12
+ codeqwen1.5-7b,62.93,54.63,43.54,44.39
dividing_into_different_subsets/4/QS/token_counts_QS.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,token_subset_1,token_subset_2,token_subset_3,token_subset_4
2
+ CodeFuse-DeepSeek-33b,75.61,85.37,82.93,63.41
3
+ Nxcode-CQ-7B,93.9,86.34,85.37,83.54
4
+ codegemma-2b,43.05,37.68,19.76,8.05
5
+ codegemma-7b,55.61,49.63,27.56,26.34
6
+ codegemma-7b-it,69.76,63.9,37.07,41.83
7
+ deepseek-coder-1.3b-base,49.39,46.59,20.98,12.32
8
+ deepseek-coder-6.7b-base,68.29,54.15,36.34,23.9
9
+ deepseek_coder-6.7b-instruct,85.98,71.22,72.8,56.34
10
+ deepseek_coder_33b-base,69.02,66.83,42.32,32.8
11
+ deepseek_coder_33b-instruct,82.93,73.05,62.44,46.1
12
+ codeqwen1.5-7b,65.49,59.76,39.88,40.37
dividing_into_different_subsets/5/QS/CC_QS.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,CC_subset_1,CC_subset_2,CC_subset_3,CC_subset_4,CC_subset_5
2
+ CodeFuse-DeepSeek-33b,81.82,78.79,81.82,81.82,59.38
3
+ Nxcode-CQ-7B,91.36,82.42,88.64,88.03,85.94
4
+ codegemma-2b,49.85,21.67,24.24,29.24,10.16
5
+ codegemma-7b,63.33,35.76,40.45,41.36,17.34
6
+ codegemma-7b-it,66.67,51.82,53.18,57.88,35.62
7
+ deepseek-coder-1.3b-base,59.24,28.94,28.33,32.58,11.88
8
+ deepseek-coder-6.7b-base,70.91,38.79,46.36,51.82,19.69
9
+ deepseek_coder-6.7b-instruct,84.7,66.06,69.39,79.24,58.13
10
+ deepseek_coder_33b-base,72.12,44.24,54.09,60.0,32.66
11
+ deepseek_coder_33b-instruct,80.15,59.55,64.85,76.52,49.06
12
+ codeqwen1.5-7b,66.52,51.67,49.55,55.61,32.97
dividing_into_different_subsets/5/QS/QS.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/5/QS/calculate_humaneval_result.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import csv
4
+ # 定义文件所在的目录
5
+ input_dir = '/evaluate_result'
6
+
7
+ # 获取目录中的所有文件
8
+ files = os.listdir(input_dir)
9
+
10
+ # with open("token_counts_QS.csv","w", newline='') as csvfile:
11
+ # writer = csv.writer(csvfile)
12
+ # writer.writerow(["Model", "token_subset_1", "token_subset_2","token_subset_3","token_subset_4","token_subset_5"])
13
+ #
14
+
15
+ # with open("line_counts_QS.csv","w", newline='') as csvfile:
16
+ # writer = csv.writer(csvfile)
17
+ # writer.writerow(["Model", "line_subset_1", "line_subset_2","line_subset_3","line_subset_4","line_subset_5"])
18
+
19
+ with open("CC_QS.csv", "w", newline='') as csvfile:
20
+ writer = csv.writer(csvfile)
21
+ writer.writerow(["Model", "CC_subset_1", "CC_subset_2","CC_subset_3","CC_subset_4","CC_subset_5"])
22
+
23
+
24
+
25
+ for file_name in files:
26
+ # 构建完整的文件路径
27
+ input_file_path = os.path.join(input_dir, file_name)
28
+ first_underscore_index = file_name.find('_')
29
+
30
+ # 找到最后一个 - 的位置
31
+ last_dash_index = file_name.rfind('-')
32
+ model_name = file_name[first_underscore_index + 1:last_dash_index]
33
+ print(model_name)
34
+ with open(input_file_path,"r",encoding="utf-8") as file:
35
+ data1=json.load(file)
36
+
37
+ with open("QS.json", "r", encoding="utf-8") as file:
38
+ data2=json.load(file)
39
+ sum0=0
40
+ count0=0
41
+ sum1=0
42
+ count1=0
43
+ sum2=0
44
+ count2=0
45
+ sum3 = 0
46
+ count3 = 0
47
+ sum4=0
48
+ count4=0
49
+
50
+
51
+ for (item1,item2) in zip(data1["humaneval"]["pass@1"],data2):
52
+ # #按照token个数划分后的评估结果
53
+ # if item2["token_diff"] == 0:
54
+ # index, value = item1
55
+ # print(item2["token_diff"],index,value)
56
+ # sum0=sum0+value
57
+ # count0=count0+1
58
+ # if item2["token_diff"] == 1:
59
+ # index, value = item1
60
+ # print(item2["token_diff"], index, value)
61
+ # sum1=sum1+value
62
+ # count1=count1+1
63
+ # if item2["token_diff"] == 2:
64
+ # index, value = item1
65
+ # print(item2["token_diff"], index, value)
66
+ # sum2=sum2+value
67
+ # count2=count2+1
68
+ # if item2["token_diff"] == 3:
69
+ # index, value = item1
70
+ # print(item2["token_diff"], index, value)
71
+ # sum3=sum3+value
72
+ # count3=count3+1
73
+ # if item2["token_diff"] == 4:
74
+ # index, value = item1
75
+ # print(item2["token_diff"], index, value)
76
+ # sum4 = sum4 + value
77
+ # count4 = count4 + 1
78
+
79
+
80
+ #按照行数划分后的评估结果
81
+ # if item2["line_diff"] == 0:
82
+ # index, value = item1
83
+ # print(item2["line_diff"],index,value)
84
+ # sum0=sum0+value
85
+ # count0=count0+1
86
+ # if item2["line_diff"] == 1:
87
+ # index, value = item1
88
+ # print(item2["line_diff"], index, value)
89
+ # sum1=sum1+value
90
+ # count1=count1+1
91
+ # if item2["line_diff"] == 2:
92
+ # index, value = item1
93
+ # print(item2["line_diff"], index, value)
94
+ # sum2=sum2+value
95
+ # count2=count2+1
96
+ # if item2["line_diff"] == 3:
97
+ # index, value = item1
98
+ # print(item2["line_diff"], index, value)
99
+ # sum3=sum3+value
100
+ # count3=count3+1
101
+ # if item2["line_diff"] == 4:
102
+ # index, value = item1
103
+ # print(item2["line_diff"], index, value)
104
+ # sum4=sum4+value
105
+ # count4=count4+1
106
+
107
+ #按照圈复杂度划分后的评估结果
108
+ if item2["CC_diff"] == 0:
109
+ index, value = item1
110
+ print(item2["CC_diff"],index,value)
111
+ sum0=sum0+value
112
+ count0=count0+1
113
+ if item2["CC_diff"] == 1:
114
+ index, value = item1
115
+ print(item2["CC_diff"], index, value)
116
+ sum1=sum1+value
117
+ count1=count1+1
118
+ if item2["CC_diff"] == 2:
119
+ index, value = item1
120
+ print(item2["CC_diff"], index, value)
121
+ sum2=sum2+value
122
+ count2=count2+1
123
+ if item2["CC_diff"] == 3 :
124
+ index, value = item1
125
+ print(item2["CC_diff"], index, value)
126
+ sum3=sum3+value
127
+ count3=count3+1
128
+ if item2["CC_diff"] == 4 :
129
+ index, value = item1
130
+ print(item2["CC_diff"], index, value)
131
+ sum4=sum4+value
132
+ count4=count4+1
133
+
134
+
135
+
136
+ mean0=round(sum0/count0*100,2)
137
+
138
+ mean1=round(sum1/count1*100,2)
139
+ mean2=round(sum2/count2*100,2)
140
+ mean3=round(sum3/count3*100,2)
141
+ mean4=round(sum4/count4*100,2)
142
+ print("count_result!!")
143
+ print(count0,count1,count2,count3,count4)
144
+ print(mean0,mean1,mean2,mean3,count4)
145
+ # with open("token_counts_QS.csv", mode='a', newline='', encoding='utf-8') as file:
146
+ # writer = csv.writer(file)
147
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4])
148
+ #
149
+ # with open("line_counts_QS.csv", mode='a', newline='', encoding='utf-8') as file:
150
+ # writer = csv.writer(file)
151
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4])
152
+ #
153
+ with open("CC_QS.csv", mode='a', newline='', encoding='utf-8') as file:
154
+ writer = csv.writer(file)
155
+ writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4])
156
+
157
+
dividing_into_different_subsets/5/QS/draw_line.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import plotly.express as px
4
+
5
+
6
+ def plot_csv(df):
7
+ # 将第一列作为索引
8
+ df.set_index('Model', inplace=True)
9
+
10
+ # 转置数据框,使得模型作为列,横轴作为行
11
+ df_transposed = df.T
12
+
13
+ # 使用plotly绘制折线图
14
+ fig = px.line(df_transposed, x=df_transposed.index, y=df_transposed.columns,
15
+ title='Model Evaluation Results',
16
+ labels={'value': 'Evaluation Score', 'index': 'Evaluation Metric'},
17
+ color_discrete_sequence=px.colors.qualitative.Plotly)
18
+
19
+ # 设置悬停效果
20
+ fig.update_traces(hovertemplate='%{y}')
21
+
22
+ return fig
23
+
24
+
25
+ # 读取本地的CSV文件
26
+ file_path = 'line_counts_QS.csv'
27
+ df = pd.read_csv(file_path)
28
+
29
+
30
+ iface = gr.Interface(
31
+ fn=plot_csv,
32
+ inputs=gr.Dataframe(df),
33
+ outputs=gr.Plot(label="Line Plot"),
34
+ title="CSV to Line Plot",
35
+ description="Visualize the evaluation results as a line plot."
36
+ )
37
+
38
+ iface.launch()
dividing_into_different_subsets/5/QS/even.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ with open("humaneval_new.json", "r", encoding="utf-8") as f:
3
+ data = json.load(f)
4
+ line_counts=[20,20,20,20,20]
5
+ line_counts_I=line_counts[0]*0.01*164
6
+ line_counts_II=line_counts[1]*0.01*164
7
+ line_counts_III=line_counts[2]*0.01*164
8
+ line_counts_IV=line_counts[3]*0.01*164
9
+ line_counts_V=line_counts[4]*0.01*164
10
+
11
+ token_counts=[20,20,20,20,20]
12
+ token_counts_I=token_counts[0]*0.01*164
13
+ token_counts_II=token_counts[1]*0.01*164
14
+ token_counts_III=token_counts[2]*0.01*164
15
+ token_counts_IV=token_counts[3]*0.01*164
16
+ token_counts_V=token_counts[4]*0.01*164
17
+
18
+ cyclomatic_complexity=[20,20,20,20,20]
19
+ cyclomatic_complexity_I=cyclomatic_complexity[0]*0.01*164
20
+ cyclomatic_complexity_II=cyclomatic_complexity[1]*0.01*164
21
+ cyclomatic_complexity_III=cyclomatic_complexity[2]*0.01*164
22
+ cyclomatic_complexity_IV=cyclomatic_complexity[3]*0.01*164
23
+ cyclomatic_complexity_V=cyclomatic_complexity[4]*0.01*164
24
+
25
+
26
+ data.sort(key=lambda x: x['line'])
27
+ for i, item in enumerate(data):
28
+ if i < line_counts_I:
29
+ item['line_diff'] = 0
30
+ elif i <line_counts_I+line_counts_II:
31
+ item['line_diff'] = 1
32
+ elif i <line_counts_I+line_counts_II+line_counts_III:
33
+ item['line_diff'] = 2
34
+ elif i <line_counts_I+line_counts_II+line_counts_III+line_counts_IV:
35
+ item['line_diff'] = 3
36
+ else:
37
+ item['line_diff'] = 4
38
+
39
+ data.sort(key=lambda x: x['token'])
40
+ for i, item in enumerate(data):
41
+ if i < token_counts_I:
42
+ item['token_diff'] = 0
43
+ elif i < token_counts_I + token_counts_II:
44
+ item['token_diff'] = 1
45
+ elif i < token_counts_I + token_counts_II+token_counts_III:
46
+ item['token_diff'] = 2
47
+ elif i < token_counts_I + token_counts_II+token_counts_III+token_counts_IV:
48
+ item['token_diff'] = 3
49
+
50
+ else:
51
+ item['token_diff'] = 4
52
+
53
+ data.sort(key=lambda x: x['cyclomatic_complexity'])
54
+ for i, item in enumerate(data):
55
+ if i < cyclomatic_complexity_I:
56
+ item['CC_diff'] = 0
57
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II:
58
+ item['CC_diff'] = 1
59
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III:
60
+ item['CC_diff'] = 2
61
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III+cyclomatic_complexity_IV:
62
+ item['CC_diff'] = 3
63
+ else:
64
+ item['CC_diff'] = 4
65
+
66
+
67
+ data.sort(key=lambda x: x['id'])
68
+ # 将更新后的数据写回JSON文件
69
+ with open('QS.json', 'w', encoding='utf-8') as file:
70
+ json.dump(data, file, ensure_ascii=False, indent=4)
71
+
dividing_into_different_subsets/5/QS/humaneval_new.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/5/QS/humaneval_with_cata.json ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "answer": "Array, Sorting",
4
+ "id": 0
5
+ },
6
+ {
7
+ "answer": "String, Stack",
8
+ "id": 1
9
+ },
10
+ {
11
+ "answer": "Math",
12
+ "id": 2
13
+ },
14
+ {
15
+ "answer": "Array, Math",
16
+ "id": 3
17
+ },
18
+ {
19
+ "answer": "Math, Array",
20
+ "id": 4
21
+ },
22
+ {
23
+ "answer": "Array",
24
+ "id": 5
25
+ },
26
+ {
27
+ "answer": "String, Stack",
28
+ "id": 6
29
+ },
30
+ {
31
+ "answer": "String, Array, Search",
32
+ "id": 7
33
+ },
34
+ {
35
+ "answer": "Math, Array",
36
+ "id": 8
37
+ },
38
+ {
39
+ "answer": "Array, Stack",
40
+ "id": 9
41
+ },
42
+ {
43
+ "answer": "String, Search",
44
+ "id": 10
45
+ },
46
+ {
47
+ "answer": "String, Math",
48
+ "id": 11
49
+ },
50
+ {
51
+ "answer": "String, Array",
52
+ "id": 12
53
+ },
54
+ {
55
+ "answer": "Math",
56
+ "id": 13
57
+ },
58
+ {
59
+ "answer": "String, Array",
60
+ "id": 14
61
+ },
62
+ {
63
+ "answer": "String, Math",
64
+ "id": 15
65
+ },
66
+ {
67
+ "answer": "String, Hash table",
68
+ "id": 16
69
+ },
70
+ {
71
+ "answer": "String, Array",
72
+ "id": 17
73
+ },
74
+ {
75
+ "answer": "String, Search",
76
+ "id": 18
77
+ },
78
+ {
79
+ "answer": "String, Sorting",
80
+ "id": 19
81
+ },
82
+ {
83
+ "answer": "Array, Sorting",
84
+ "id": 20
85
+ },
86
+ {
87
+ "answer": "Array, Math",
88
+ "id": 21
89
+ },
90
+ {
91
+ "answer": "Array, Search",
92
+ "id": 22
93
+ },
94
+ {
95
+ "answer": "String",
96
+ "id": 23
97
+ },
98
+ {
99
+ "answer": "Math",
100
+ "id": 24
101
+ },
102
+ {
103
+ "answer": "Math, Array",
104
+ "id": 25
105
+ },
106
+ {
107
+ "answer": "Array, Hash table",
108
+ "id": 26
109
+ },
110
+ {
111
+ "answer": "String",
112
+ "id": 27
113
+ },
114
+ {
115
+ "answer": "String",
116
+ "id": 28
117
+ },
118
+ {
119
+ "answer": "String, Array",
120
+ "id": 29
121
+ },
122
+ {
123
+ "answer": "Array",
124
+ "id": 30
125
+ },
126
+ {
127
+ "answer": "Math",
128
+ "id": 31
129
+ },
130
+ {
131
+ "answer": "Math, Search",
132
+ "id": 32
133
+ },
134
+ {
135
+ "answer": "Array, Sorting",
136
+ "id": 33
137
+ },
138
+ {
139
+ "answer": "Array, Sorting",
140
+ "id": 34
141
+ },
142
+ {
143
+ "answer": "Math, Array",
144
+ "id": 35
145
+ },
146
+ {
147
+ "answer": "Math, Search",
148
+ "id": 36
149
+ },
150
+ {
151
+ "answer": "Array, Sorting",
152
+ "id": 37
153
+ },
154
+ {
155
+ "answer": "String, Array",
156
+ "id": 38
157
+ },
158
+ {
159
+ "answer": "Math, Search",
160
+ "id": 39
161
+ },
162
+ {
163
+ "answer": "Array, Search",
164
+ "id": 40
165
+ },
166
+ {
167
+ "answer": "Math, Array",
168
+ "id": 41
169
+ },
170
+ {
171
+ "answer": "Array, Math",
172
+ "id": 42
173
+ },
174
+ {
175
+ "answer": "Array, Hash table",
176
+ "id": 43
177
+ },
178
+ {
179
+ "answer": "Math, String",
180
+ "id": 44
181
+ },
182
+ {
183
+ "answer": "Math",
184
+ "id": 45
185
+ },
186
+ {
187
+ "answer": "Math, Array",
188
+ "id": 46
189
+ },
190
+ {
191
+ "answer": "Array, Sorting",
192
+ "id": 47
193
+ },
194
+ {
195
+ "answer": "String",
196
+ "id": 48
197
+ },
198
+ {
199
+ "answer": "Math",
200
+ "id": 49
201
+ },
202
+ {
203
+ "answer": "String, Math",
204
+ "id": 50
205
+ },
206
+ {
207
+ "answer": "String",
208
+ "id": 51
209
+ },
210
+ {
211
+ "answer": "Array, Search",
212
+ "id": 52
213
+ },
214
+ {
215
+ "answer": "Math",
216
+ "id": 53
217
+ },
218
+ {
219
+ "answer": "String, Hash table",
220
+ "id": 54
221
+ },
222
+ {
223
+ "answer": "Math",
224
+ "id": 55
225
+ },
226
+ {
227
+ "answer": "String, Stack",
228
+ "id": 56
229
+ },
230
+ {
231
+ "answer": "Array, Sorting",
232
+ "id": 57
233
+ },
234
+ {
235
+ "answer": "Array, Sorting",
236
+ "id": 58
237
+ },
238
+ {
239
+ "answer": "Math, Search",
240
+ "id": 59
241
+ },
242
+ {
243
+ "answer": "Math",
244
+ "id": 60
245
+ },
246
+ {
247
+ "answer": "String, Stack",
248
+ "id": 61
249
+ },
250
+ {
251
+ "answer": "Array, Math",
252
+ "id": 62
253
+ },
254
+ {
255
+ "answer": "Math, Array",
256
+ "id": 63
257
+ },
258
+ {
259
+ "answer": "String",
260
+ "id": 64
261
+ },
262
+ {
263
+ "answer": "String, Math",
264
+ "id": 65
265
+ },
266
+ {
267
+ "answer": "String, Math",
268
+ "id": 66
269
+ },
270
+ {
271
+ "answer": "String, Math",
272
+ "id": 67
273
+ },
274
+ {
275
+ "answer": "Array, Search",
276
+ "id": 68
277
+ },
278
+ {
279
+ "answer": "Array, Hash table",
280
+ "id": 69
281
+ },
282
+ {
283
+ "answer": "Array, Sorting",
284
+ "id": 70
285
+ },
286
+ {
287
+ "answer": "Math, Array",
288
+ "id": 71
289
+ },
290
+ {
291
+ "answer": "Array, Math",
292
+ "id": 72
293
+ },
294
+ {
295
+ "answer": "Array, Sorting",
296
+ "id": 73
297
+ },
298
+ {
299
+ "answer": "String, Array",
300
+ "id": 74
301
+ },
302
+ {
303
+ "answer": "Math, Hash table",
304
+ "id": 75
305
+ },
306
+ {
307
+ "answer": "Math",
308
+ "id": 76
309
+ },
310
+ {
311
+ "answer": "Math",
312
+ "id": 77
313
+ },
314
+ {
315
+ "answer": "String, Hash table",
316
+ "id": 78
317
+ },
318
+ {
319
+ "answer": "String, Math",
320
+ "id": 79
321
+ },
322
+ {
323
+ "answer": "String, Search",
324
+ "id": 80
325
+ },
326
+ {
327
+ "answer": "Array, Sorting",
328
+ "id": 81
329
+ },
330
+ {
331
+ "answer": "String, Math",
332
+ "id": 82
333
+ },
334
+ {
335
+ "answer": "Math, Array",
336
+ "id": 83
337
+ },
338
+ {
339
+ "answer": "Math, String",
340
+ "id": 84
341
+ },
342
+ {
343
+ "answer": "Array, Math",
344
+ "id": 85
345
+ },
346
+ {
347
+ "answer": "String, Sorting",
348
+ "id": 86
349
+ },
350
+ {
351
+ "answer": "Array, Search, Sorting",
352
+ "id": 87
353
+ },
354
+ {
355
+ "answer": "Array, Sorting",
356
+ "id": 88
357
+ },
358
+ {
359
+ "answer": "String, Math",
360
+ "id": 89
361
+ },
362
+ {
363
+ "answer": "Array, Sorting",
364
+ "id": 90
365
+ },
366
+ {
367
+ "answer": "String, Search",
368
+ "id": 91
369
+ },
370
+ {
371
+ "answer": "Math, Array",
372
+ "id": 92
373
+ },
374
+ {
375
+ "answer": "String, Hash table",
376
+ "id": 93
377
+ },
378
+ {
379
+ "answer": "Array, Math",
380
+ "id": 94
381
+ },
382
+ {
383
+ "answer": "String, Hash table",
384
+ "id": 95
385
+ },
386
+ {
387
+ "answer": "Math, Array",
388
+ "id": 96
389
+ },
390
+ {
391
+ "answer": "Math",
392
+ "id": 97
393
+ },
394
+ {
395
+ "answer": "String, Array",
396
+ "id": 98
397
+ },
398
+ {
399
+ "answer": "String, Math",
400
+ "id": 99
401
+ },
402
+ {
403
+ "answer": "Array, Math",
404
+ "id": 100
405
+ },
406
+ {
407
+ "answer": "String, Array",
408
+ "id": 101
409
+ },
410
+ {
411
+ "answer": "Math, Search",
412
+ "id": 102
413
+ },
414
+ {
415
+ "answer": "Math, String",
416
+ "id": 103
417
+ },
418
+ {
419
+ "answer": "Array, Sorting",
420
+ "id": 104
421
+ },
422
+ {
423
+ "answer": "Array, Sorting",
424
+ "id": 105
425
+ },
426
+ {
427
+ "answer": "Array, Math",
428
+ "id": 106
429
+ },
430
+ {
431
+ "answer": "Math, Search",
432
+ "id": 107
433
+ },
434
+ {
435
+ "answer": "Array, Math",
436
+ "id": 108
437
+ },
438
+ {
439
+ "answer": "Array, Sorting",
440
+ "id": 109
441
+ },
442
+ {
443
+ "answer": "Array, String",
444
+ "id": 110
445
+ },
446
+ {
447
+ "answer": "String, Hash table",
448
+ "id": 111
449
+ },
450
+ {
451
+ "answer": "String, Sorting",
452
+ "id": 112
453
+ },
454
+ {
455
+ "answer": "String, Array",
456
+ "id": 113
457
+ },
458
+ {
459
+ "answer": "Array, Math",
460
+ "id": 114
461
+ },
462
+ {
463
+ "answer": "Array, Math",
464
+ "id": 115
465
+ },
466
+ {
467
+ "answer": "Array, Sorting",
468
+ "id": 116
469
+ },
470
+ {
471
+ "answer": "String, Array",
472
+ "id": 117
473
+ },
474
+ {
475
+ "answer": "String, Search",
476
+ "id": 118
477
+ },
478
+ {
479
+ "answer": "String, Stack",
480
+ "id": 119
481
+ },
482
+ {
483
+ "answer": "Array, Sorting",
484
+ "id": 120
485
+ },
486
+ {
487
+ "answer": "Array, Math",
488
+ "id": 121
489
+ },
490
+ {
491
+ "answer": "Array, Math",
492
+ "id": 122
493
+ },
494
+ {
495
+ "answer": "Array, Sorting",
496
+ "id": 123
497
+ },
498
+ {
499
+ "answer": "String, Math",
500
+ "id": 124
501
+ },
502
+ {
503
+ "answer": "String, Math",
504
+ "id": 125
505
+ },
506
+ {
507
+ "answer": "Array, Sorting",
508
+ "id": 126
509
+ },
510
+ {
511
+ "answer": "Math, Array",
512
+ "id": 127
513
+ },
514
+ {
515
+ "answer": "Array, Math",
516
+ "id": 128
517
+ },
518
+ {
519
+ "answer": "Array, Search, Matrix",
520
+ "id": 129
521
+ },
522
+ {
523
+ "answer": "Math, Array",
524
+ "id": 130
525
+ },
526
+ {
527
+ "answer": "Math, String",
528
+ "id": 131
529
+ },
530
+ {
531
+ "answer": "String, Stack",
532
+ "id": 132
533
+ },
534
+ {
535
+ "answer": "Array, Math",
536
+ "id": 133
537
+ },
538
+ {
539
+ "answer": "String, Search",
540
+ "id": 134
541
+ },
542
+ {
543
+ "answer": "Array, Search",
544
+ "id": 135
545
+ },
546
+ {
547
+ "answer": "Array, Search",
548
+ "id": 136
549
+ },
550
+ {
551
+ "answer": "String, Math",
552
+ "id": 137
553
+ },
554
+ {
555
+ "answer": "Math",
556
+ "id": 138
557
+ },
558
+ {
559
+ "answer": "Math, Array",
560
+ "id": 139
561
+ },
562
+ {
563
+ "answer": "String, Array",
564
+ "id": 140
565
+ },
566
+ {
567
+ "answer": "String, Hash table",
568
+ "id": 141
569
+ },
570
+ {
571
+ "answer": "Array, Math",
572
+ "id": 142
573
+ },
574
+ {
575
+ "answer": "String, Math",
576
+ "id": 143
577
+ },
578
+ {
579
+ "answer": "String, Math",
580
+ "id": 144
581
+ },
582
+ {
583
+ "answer": "Array, Sorting",
584
+ "id": 145
585
+ },
586
+ {
587
+ "answer": "Array, Math",
588
+ "id": 146
589
+ },
590
+ {
591
+ "answer": "Array, Math",
592
+ "id": 147
593
+ },
594
+ {
595
+ "answer": "String, Array, Sorting",
596
+ "id": 148
597
+ },
598
+ {
599
+ "answer": "String, Sorting",
600
+ "id": 149
601
+ },
602
+ {
603
+ "answer": "Math, Search",
604
+ "id": 150
605
+ },
606
+ {
607
+ "answer": "Array, Math",
608
+ "id": 151
609
+ },
610
+ {
611
+ "answer": "Array, Math",
612
+ "id": 152
613
+ },
614
+ {
615
+ "answer": "String, Array, Sorting",
616
+ "id": 153
617
+ },
618
+ {
619
+ "answer": "String, Search",
620
+ "id": 154
621
+ },
622
+ {
623
+ "answer": "String, Math",
624
+ "id": 155
625
+ },
626
+ {
627
+ "answer": "String, Math",
628
+ "id": 156
629
+ },
630
+ {
631
+ "answer": "Math, Sorting",
632
+ "id": 157
633
+ },
634
+ {
635
+ "answer": "String, Array",
636
+ "id": 158
637
+ },
638
+ {
639
+ "answer": "Array, Math",
640
+ "id": 159
641
+ },
642
+ {
643
+ "answer": "String, Array, Math",
644
+ "id": 160
645
+ },
646
+ {
647
+ "answer": "String, Array",
648
+ "id": 161
649
+ },
650
+ {
651
+ "answer": "String, Hash table",
652
+ "id": 162
653
+ },
654
+ {
655
+ "answer": "Array, Math",
656
+ "id": 163
657
+ }
658
+ ]
dividing_into_different_subsets/5/QS/line_counts_QS.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,line_subset_1,line_subset_2,line_subset_3,line_subset_4,line_subset_5
2
+ CodeFuse-DeepSeek-33b,75.76,84.85,78.79,66.67,78.12
3
+ Nxcode-CQ-7B,92.27,87.27,91.21,87.12,78.28
4
+ codegemma-2b,51.67,29.09,17.27,20.0,17.34
5
+ codegemma-7b,60.45,38.64,38.18,32.88,28.44
6
+ codegemma-7b-it,73.33,54.24,49.24,48.79,39.69
7
+ deepseek-coder-1.3b-base,52.73,33.94,28.03,24.85,21.72
8
+ deepseek-coder-6.7b-base,74.09,46.82,40.61,36.82,29.53
9
+ deepseek_coder-6.7b-instruct,79.55,84.39,67.88,73.48,52.03
10
+ deepseek_coder_33b-base,75.0,55.3,51.21,43.18,38.59
11
+ deepseek_coder_33b-instruct,82.27,74.24,65.15,58.33,50.16
12
+ codeqwen1.5-7b,63.18,55.76,49.55,46.36,41.72
dividing_into_different_subsets/5/QS/token_counts_QS.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,token_subset_1,token_subset_2,token_subset_3,token_subset_4,token_subset_5
2
+ CodeFuse-DeepSeek-33b,72.73,78.79,90.91,72.73,68.75
3
+ Nxcode-CQ-7B,95.45,86.21,87.42,87.42,79.69
4
+ codegemma-2b,46.97,38.48,28.03,16.21,5.31
5
+ codegemma-7b,60.76,47.73,39.7,26.36,23.91
6
+ codegemma-7b-it,74.39,64.09,50.3,37.27,39.22
7
+ deepseek-coder-1.3b-base,54.85,45.61,34.09,15.45,10.94
8
+ deepseek-coder-6.7b-base,74.09,54.24,44.24,33.79,21.25
9
+ deepseek_coder-6.7b-instruct,87.88,76.36,65.0,76.36,51.72
10
+ deepseek_coder_33b-base,73.18,63.79,54.09,42.27,29.69
11
+ deepseek_coder_33b-instruct,87.12,73.48,65.45,64.7,39.06
12
+ codeqwen1.5-7b,68.18,63.03,47.12,40.76,37.34
dividing_into_different_subsets/6/QS/CC_QS.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,CC_subset_1,CC_subset_2,CC_subset_3,CC_subset_4,CC_subset_5,CC_subset_6
2
+ CodeFuse-DeepSeek-33b,78.57,82.14,85.71,71.43,89.29,50.0
3
+ Nxcode-CQ-7B,89.82,85.71,80.18,92.86,93.04,81.25
4
+ codegemma-2b,48.93,24.82,33.04,22.86,27.32,2.29
5
+ codegemma-7b,61.61,39.82,43.04,40.18,36.07,14.37
6
+ codegemma-7b-it,65.36,54.46,56.25,53.21,53.39,33.33
7
+ deepseek-coder-1.3b-base,55.54,33.75,37.86,30.18,26.25,6.67
8
+ deepseek-coder-6.7b-base,69.64,42.32,48.57,49.11,46.07,13.75
9
+ deepseek_coder-6.7b-instruct,81.96,68.75,70.0,78.04,73.75,54.58
10
+ deepseek_coder_33b-base,70.71,50.18,56.79,52.32,54.11,28.96
11
+ deepseek_coder_33b-instruct,80.18,61.61,68.04,66.79,71.79,45.42
12
+ codeqwen1.5-7b,64.64,51.79,52.68,56.79,49.64,29.58
13
+ new,65.36,54.46,56.25,53.21,53.39,33.33
dividing_into_different_subsets/6/QS/QS.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/6/QS/calculate_humaneval_result.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import csv
4
+ # 定义文件所在的目录
5
+ input_dir = 'E:/python-testn/pythonProject3/hh_1/evaluate_result'
6
+
7
+ # 获取目录中的所有文件
8
+ files = os.listdir(input_dir)
9
+
10
+ # with open("token_counts_QS.csv","w", newline='') as csvfile:
11
+ # writer = csv.writer(csvfile)
12
+ # writer.writerow(["Model", "token_subset_1", "token_subset_2","token_subset_3","token_subset_4","token_subset_5","token_subset_6"])
13
+
14
+
15
+ with open("line_counts_QS.csv","w", newline='') as csvfile:
16
+ writer = csv.writer(csvfile)
17
+ writer.writerow(["Model", "line_subset_1", "line_subset_2","line_subset_3","line_subset_4","line_subset_5","line_subset_6"])
18
+
19
+ # with open("CC_QS.csv", "w", newline='') as csvfile:
20
+ # writer = csv.writer(csvfile)
21
+ # writer.writerow(["Model", "CC_subset_1", "CC_subset_2","CC_subset_3","CC_subset_4","CC_subset_5","CC_subset_6"])
22
+
23
+
24
+
25
+ for file_name in files:
26
+ # 构建完整的文件路径
27
+ input_file_path = os.path.join(input_dir, file_name)
28
+ first_underscore_index = file_name.find('_')
29
+
30
+ # 找到最后一个 - 的位置
31
+ last_dash_index = file_name.rfind('-')
32
+ model_name = file_name[first_underscore_index + 1:last_dash_index]
33
+ print(model_name)
34
+ with open(input_file_path,"r",encoding="utf-8") as file:
35
+ data1=json.load(file)
36
+
37
+ with open("QS.json", "r", encoding="utf-8") as file:
38
+ data2=json.load(file)
39
+ sum0=0
40
+ count0=0
41
+ sum1=0
42
+ count1=0
43
+ sum2=0
44
+ count2=0
45
+ sum3 = 0
46
+ count3 = 0
47
+ sum4=0
48
+ count4=0
49
+ sum5 = 0
50
+ count5 = 0
51
+
52
+
53
+
54
+ for (item1,item2) in zip(data1["humaneval"]["pass@1"],data2):
55
+ # #按照token个数划分后的评估结果
56
+ # if item2["token_diff"] == 0:
57
+ # index, value = item1
58
+ # print(item2["token_diff"],index,value)
59
+ # sum0=sum0+value
60
+ # count0=count0+1
61
+ # if item2["token_diff"] == 1:
62
+ # index, value = item1
63
+ # print(item2["token_diff"], index, value)
64
+ # sum1=sum1+value
65
+ # count1=count1+1
66
+ # if item2["token_diff"] == 2:
67
+ # index, value = item1
68
+ # print(item2["token_diff"], index, value)
69
+ # sum2=sum2+value
70
+ # count2=count2+1
71
+ # if item2["token_diff"] == 3:
72
+ # index, value = item1
73
+ # print(item2["token_diff"], index, value)
74
+ # sum3=sum3+value
75
+ # count3=count3+1
76
+ # if item2["token_diff"] == 4:
77
+ # index, value = item1
78
+ # print(item2["token_diff"], index, value)
79
+ # sum4 = sum4 + value
80
+ # count4 = count4 + 1
81
+ # if item2["token_diff"] ==5:
82
+ # index, value = item1
83
+ # print(item2["token_diff"], index, value)
84
+ # sum5 = sum5 + value
85
+ # count5 = count5 + 1
86
+
87
+
88
+ #按照行数划分后的评估结果
89
+ if item2["line_diff"] == 0:
90
+ index, value = item1
91
+ print(item2["line_diff"],index,value)
92
+ sum0=sum0+value
93
+ count0=count0+1
94
+ if item2["line_diff"] == 1:
95
+ index, value = item1
96
+ print(item2["line_diff"], index, value)
97
+ sum1=sum1+value
98
+ count1=count1+1
99
+ if item2["line_diff"] == 2:
100
+ index, value = item1
101
+ print(item2["line_diff"], index, value)
102
+ sum2=sum2+value
103
+ count2=count2+1
104
+ if item2["line_diff"] == 3:
105
+ index, value = item1
106
+ print(item2["line_diff"], index, value)
107
+ sum3=sum3+value
108
+ count3=count3+1
109
+ if item2["line_diff"] == 4:
110
+ index, value = item1
111
+ print(item2["line_diff"], index, value)
112
+ sum4=sum4+value
113
+ count4=count4+1
114
+ if item2["line_diff"] == 5:
115
+ index, value = item1
116
+ print(item2["line_diff"], index, value)
117
+ sum5 = sum5 + value
118
+ count5 = count5 + 1
119
+
120
+ #按照圈复杂度划分后的评估结果
121
+ # if item2["CC_diff"] == 0:
122
+ # index, value = item1
123
+ # print(item2["CC_diff"],index,value)
124
+ # sum0=sum0+value
125
+ # count0=count0+1
126
+ # if item2["CC_diff"] == 1:
127
+ # index, value = item1
128
+ # print(item2["CC_diff"], index, value)
129
+ # sum1=sum1+value
130
+ # count1=count1+1
131
+ # if item2["CC_diff"] == 2:
132
+ # index, value = item1
133
+ # print(item2["CC_diff"], index, value)
134
+ # sum2=sum2+value
135
+ # count2=count2+1
136
+ # if item2["CC_diff"] == 3 :
137
+ # index, value = item1
138
+ # print(item2["CC_diff"], index, value)
139
+ # sum3=sum3+value
140
+ # count3=count3+1
141
+ # if item2["CC_diff"] == 4 :
142
+ # index, value = item1
143
+ # print(item2["CC_diff"], index, value)
144
+ # sum4=sum4+value
145
+ # count4=count4+1
146
+ # if item2["CC_diff"] == 5 :
147
+ # index, value = item1
148
+ # print(item2["CC_diff"], index, value)
149
+ # sum5=sum5+value
150
+ # count5=count5+1
151
+
152
+
153
+
154
+ mean0=round(sum0/count0*100,2)
155
+
156
+ mean1=round(sum1/count1*100,2)
157
+ mean2=round(sum2/count2*100,2)
158
+ mean3=round(sum3/count3*100,2)
159
+ mean4=round(sum4/count4*100,2)
160
+ mean5 = round(sum5 / count5 * 100, 2)
161
+ print("count_result!!")
162
+ print(count0,count1,count2,count3,count4,count5)
163
+ print(mean0,mean1,mean2,mean3,count4,mean5)
164
+ # with open("token_counts_QS.csv", mode='a', newline='', encoding='utf-8') as file:
165
+ # writer = csv.writer(file)
166
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5])
167
+
168
+ with open("line_counts_QS.csv", mode='a', newline='', encoding='utf-8') as file:
169
+ writer = csv.writer(file)
170
+ writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5])
171
+ #
172
+ # with open("CC_QS.csv", mode='a', newline='', encoding='utf-8') as file:
173
+ # writer = csv.writer(file)
174
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5])
175
+ #
176
+
dividing_into_different_subsets/6/QS/even.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ with open("humaneval_new.json", "r", encoding="utf-8") as f:
3
+ data = json.load(f)
4
+ line_counts=[17,17,17,17,17,15]
5
+ line_counts_I=line_counts[0]*0.01*164
6
+ line_counts_II=line_counts[1]*0.01*164
7
+ line_counts_III=line_counts[2]*0.01*164
8
+ line_counts_IV=line_counts[3]*0.01*164
9
+ line_counts_V=line_counts[4]*0.01*164
10
+ line_counts_VI=line_counts[5]*0.01*164
11
+
12
+ token_counts=[17,17,17,17,17,15]
13
+ token_counts_I=token_counts[0]*0.01*164
14
+ token_counts_II=token_counts[1]*0.01*164
15
+ token_counts_III=token_counts[2]*0.01*164
16
+ token_counts_IV=token_counts[3]*0.01*164
17
+ token_counts_V=token_counts[4]*0.01*164
18
+ token_counts_VI=token_counts[5]*0.01*164
19
+
20
+ cyclomatic_complexity=[17,17,17,17,17,15]
21
+ cyclomatic_complexity_I=cyclomatic_complexity[0]*0.01*164
22
+ cyclomatic_complexity_II=cyclomatic_complexity[1]*0.01*164
23
+ cyclomatic_complexity_III=cyclomatic_complexity[2]*0.01*164
24
+ cyclomatic_complexity_IV=cyclomatic_complexity[3]*0.01*164
25
+ cyclomatic_complexity_V=cyclomatic_complexity[4]*0.01*164
26
+ cyclomatic_complexity_VI=cyclomatic_complexity[5]*0.01*164
27
+
28
+
29
+ data.sort(key=lambda x: x['line'])
30
+ for i, item in enumerate(data):
31
+ if i < line_counts_I:
32
+ item['line_diff'] = 0
33
+ elif i <line_counts_I+line_counts_II:
34
+ item['line_diff'] = 1
35
+ elif i <line_counts_I+line_counts_II+line_counts_III:
36
+ item['line_diff'] = 2
37
+ elif i <line_counts_I+line_counts_II+line_counts_III+line_counts_IV:
38
+ item['line_diff'] = 3
39
+ elif i <line_counts_I+line_counts_II+line_counts_III+line_counts_IV+line_counts_V:
40
+ item['line_diff'] = 4
41
+ else:
42
+ item['line_diff'] = 5
43
+
44
+ data.sort(key=lambda x: x['token'])
45
+ for i, item in enumerate(data):
46
+ if i < token_counts_I:
47
+ item['token_diff'] = 0
48
+ elif i < token_counts_I + token_counts_II:
49
+ item['token_diff'] = 1
50
+ elif i < token_counts_I + token_counts_II+token_counts_III:
51
+ item['token_diff'] = 2
52
+ elif i < token_counts_I + token_counts_II+token_counts_III+token_counts_IV:
53
+ item['token_diff'] = 3
54
+ elif i < token_counts_I + token_counts_II+token_counts_III+token_counts_IV+token_counts_V:
55
+ item['token_diff'] = 4
56
+
57
+ else:
58
+ item['token_diff'] = 5
59
+
60
+ data.sort(key=lambda x: x['cyclomatic_complexity'])
61
+ for i, item in enumerate(data):
62
+ if i < cyclomatic_complexity_I:
63
+ item['CC_diff'] = 0
64
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II:
65
+ item['CC_diff'] = 1
66
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III:
67
+ item['CC_diff'] = 2
68
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III+cyclomatic_complexity_IV:
69
+ item['CC_diff'] = 3
70
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III+cyclomatic_complexity_IV+cyclomatic_complexity_V:
71
+ item['CC_diff'] = 4
72
+ else:
73
+ item['CC_diff'] = 5
74
+
75
+
76
+ data.sort(key=lambda x: x['id'])
77
+ # 将更新后的数据写回JSON文件
78
+ with open('QS.json', 'w', encoding='utf-8') as file:
79
+ json.dump(data, file, ensure_ascii=False, indent=4)
80
+
dividing_into_different_subsets/6/QS/humaneval_new.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/6/QS/humaneval_with_cata.json ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "answer": "Array, Sorting",
4
+ "id": 0
5
+ },
6
+ {
7
+ "answer": "String, Stack",
8
+ "id": 1
9
+ },
10
+ {
11
+ "answer": "Math",
12
+ "id": 2
13
+ },
14
+ {
15
+ "answer": "Array, Math",
16
+ "id": 3
17
+ },
18
+ {
19
+ "answer": "Math, Array",
20
+ "id": 4
21
+ },
22
+ {
23
+ "answer": "Array",
24
+ "id": 5
25
+ },
26
+ {
27
+ "answer": "String, Stack",
28
+ "id": 6
29
+ },
30
+ {
31
+ "answer": "String, Array, Search",
32
+ "id": 7
33
+ },
34
+ {
35
+ "answer": "Math, Array",
36
+ "id": 8
37
+ },
38
+ {
39
+ "answer": "Array, Stack",
40
+ "id": 9
41
+ },
42
+ {
43
+ "answer": "String, Search",
44
+ "id": 10
45
+ },
46
+ {
47
+ "answer": "String, Math",
48
+ "id": 11
49
+ },
50
+ {
51
+ "answer": "String, Array",
52
+ "id": 12
53
+ },
54
+ {
55
+ "answer": "Math",
56
+ "id": 13
57
+ },
58
+ {
59
+ "answer": "String, Array",
60
+ "id": 14
61
+ },
62
+ {
63
+ "answer": "String, Math",
64
+ "id": 15
65
+ },
66
+ {
67
+ "answer": "String, Hash table",
68
+ "id": 16
69
+ },
70
+ {
71
+ "answer": "String, Array",
72
+ "id": 17
73
+ },
74
+ {
75
+ "answer": "String, Search",
76
+ "id": 18
77
+ },
78
+ {
79
+ "answer": "String, Sorting",
80
+ "id": 19
81
+ },
82
+ {
83
+ "answer": "Array, Sorting",
84
+ "id": 20
85
+ },
86
+ {
87
+ "answer": "Array, Math",
88
+ "id": 21
89
+ },
90
+ {
91
+ "answer": "Array, Search",
92
+ "id": 22
93
+ },
94
+ {
95
+ "answer": "String",
96
+ "id": 23
97
+ },
98
+ {
99
+ "answer": "Math",
100
+ "id": 24
101
+ },
102
+ {
103
+ "answer": "Math, Array",
104
+ "id": 25
105
+ },
106
+ {
107
+ "answer": "Array, Hash table",
108
+ "id": 26
109
+ },
110
+ {
111
+ "answer": "String",
112
+ "id": 27
113
+ },
114
+ {
115
+ "answer": "String",
116
+ "id": 28
117
+ },
118
+ {
119
+ "answer": "String, Array",
120
+ "id": 29
121
+ },
122
+ {
123
+ "answer": "Array",
124
+ "id": 30
125
+ },
126
+ {
127
+ "answer": "Math",
128
+ "id": 31
129
+ },
130
+ {
131
+ "answer": "Math, Search",
132
+ "id": 32
133
+ },
134
+ {
135
+ "answer": "Array, Sorting",
136
+ "id": 33
137
+ },
138
+ {
139
+ "answer": "Array, Sorting",
140
+ "id": 34
141
+ },
142
+ {
143
+ "answer": "Math, Array",
144
+ "id": 35
145
+ },
146
+ {
147
+ "answer": "Math, Search",
148
+ "id": 36
149
+ },
150
+ {
151
+ "answer": "Array, Sorting",
152
+ "id": 37
153
+ },
154
+ {
155
+ "answer": "String, Array",
156
+ "id": 38
157
+ },
158
+ {
159
+ "answer": "Math, Search",
160
+ "id": 39
161
+ },
162
+ {
163
+ "answer": "Array, Search",
164
+ "id": 40
165
+ },
166
+ {
167
+ "answer": "Math, Array",
168
+ "id": 41
169
+ },
170
+ {
171
+ "answer": "Array, Math",
172
+ "id": 42
173
+ },
174
+ {
175
+ "answer": "Array, Hash table",
176
+ "id": 43
177
+ },
178
+ {
179
+ "answer": "Math, String",
180
+ "id": 44
181
+ },
182
+ {
183
+ "answer": "Math",
184
+ "id": 45
185
+ },
186
+ {
187
+ "answer": "Math, Array",
188
+ "id": 46
189
+ },
190
+ {
191
+ "answer": "Array, Sorting",
192
+ "id": 47
193
+ },
194
+ {
195
+ "answer": "String",
196
+ "id": 48
197
+ },
198
+ {
199
+ "answer": "Math",
200
+ "id": 49
201
+ },
202
+ {
203
+ "answer": "String, Math",
204
+ "id": 50
205
+ },
206
+ {
207
+ "answer": "String",
208
+ "id": 51
209
+ },
210
+ {
211
+ "answer": "Array, Search",
212
+ "id": 52
213
+ },
214
+ {
215
+ "answer": "Math",
216
+ "id": 53
217
+ },
218
+ {
219
+ "answer": "String, Hash table",
220
+ "id": 54
221
+ },
222
+ {
223
+ "answer": "Math",
224
+ "id": 55
225
+ },
226
+ {
227
+ "answer": "String, Stack",
228
+ "id": 56
229
+ },
230
+ {
231
+ "answer": "Array, Sorting",
232
+ "id": 57
233
+ },
234
+ {
235
+ "answer": "Array, Sorting",
236
+ "id": 58
237
+ },
238
+ {
239
+ "answer": "Math, Search",
240
+ "id": 59
241
+ },
242
+ {
243
+ "answer": "Math",
244
+ "id": 60
245
+ },
246
+ {
247
+ "answer": "String, Stack",
248
+ "id": 61
249
+ },
250
+ {
251
+ "answer": "Array, Math",
252
+ "id": 62
253
+ },
254
+ {
255
+ "answer": "Math, Array",
256
+ "id": 63
257
+ },
258
+ {
259
+ "answer": "String",
260
+ "id": 64
261
+ },
262
+ {
263
+ "answer": "String, Math",
264
+ "id": 65
265
+ },
266
+ {
267
+ "answer": "String, Math",
268
+ "id": 66
269
+ },
270
+ {
271
+ "answer": "String, Math",
272
+ "id": 67
273
+ },
274
+ {
275
+ "answer": "Array, Search",
276
+ "id": 68
277
+ },
278
+ {
279
+ "answer": "Array, Hash table",
280
+ "id": 69
281
+ },
282
+ {
283
+ "answer": "Array, Sorting",
284
+ "id": 70
285
+ },
286
+ {
287
+ "answer": "Math, Array",
288
+ "id": 71
289
+ },
290
+ {
291
+ "answer": "Array, Math",
292
+ "id": 72
293
+ },
294
+ {
295
+ "answer": "Array, Sorting",
296
+ "id": 73
297
+ },
298
+ {
299
+ "answer": "String, Array",
300
+ "id": 74
301
+ },
302
+ {
303
+ "answer": "Math, Hash table",
304
+ "id": 75
305
+ },
306
+ {
307
+ "answer": "Math",
308
+ "id": 76
309
+ },
310
+ {
311
+ "answer": "Math",
312
+ "id": 77
313
+ },
314
+ {
315
+ "answer": "String, Hash table",
316
+ "id": 78
317
+ },
318
+ {
319
+ "answer": "String, Math",
320
+ "id": 79
321
+ },
322
+ {
323
+ "answer": "String, Search",
324
+ "id": 80
325
+ },
326
+ {
327
+ "answer": "Array, Sorting",
328
+ "id": 81
329
+ },
330
+ {
331
+ "answer": "String, Math",
332
+ "id": 82
333
+ },
334
+ {
335
+ "answer": "Math, Array",
336
+ "id": 83
337
+ },
338
+ {
339
+ "answer": "Math, String",
340
+ "id": 84
341
+ },
342
+ {
343
+ "answer": "Array, Math",
344
+ "id": 85
345
+ },
346
+ {
347
+ "answer": "String, Sorting",
348
+ "id": 86
349
+ },
350
+ {
351
+ "answer": "Array, Search, Sorting",
352
+ "id": 87
353
+ },
354
+ {
355
+ "answer": "Array, Sorting",
356
+ "id": 88
357
+ },
358
+ {
359
+ "answer": "String, Math",
360
+ "id": 89
361
+ },
362
+ {
363
+ "answer": "Array, Sorting",
364
+ "id": 90
365
+ },
366
+ {
367
+ "answer": "String, Search",
368
+ "id": 91
369
+ },
370
+ {
371
+ "answer": "Math, Array",
372
+ "id": 92
373
+ },
374
+ {
375
+ "answer": "String, Hash table",
376
+ "id": 93
377
+ },
378
+ {
379
+ "answer": "Array, Math",
380
+ "id": 94
381
+ },
382
+ {
383
+ "answer": "String, Hash table",
384
+ "id": 95
385
+ },
386
+ {
387
+ "answer": "Math, Array",
388
+ "id": 96
389
+ },
390
+ {
391
+ "answer": "Math",
392
+ "id": 97
393
+ },
394
+ {
395
+ "answer": "String, Array",
396
+ "id": 98
397
+ },
398
+ {
399
+ "answer": "String, Math",
400
+ "id": 99
401
+ },
402
+ {
403
+ "answer": "Array, Math",
404
+ "id": 100
405
+ },
406
+ {
407
+ "answer": "String, Array",
408
+ "id": 101
409
+ },
410
+ {
411
+ "answer": "Math, Search",
412
+ "id": 102
413
+ },
414
+ {
415
+ "answer": "Math, String",
416
+ "id": 103
417
+ },
418
+ {
419
+ "answer": "Array, Sorting",
420
+ "id": 104
421
+ },
422
+ {
423
+ "answer": "Array, Sorting",
424
+ "id": 105
425
+ },
426
+ {
427
+ "answer": "Array, Math",
428
+ "id": 106
429
+ },
430
+ {
431
+ "answer": "Math, Search",
432
+ "id": 107
433
+ },
434
+ {
435
+ "answer": "Array, Math",
436
+ "id": 108
437
+ },
438
+ {
439
+ "answer": "Array, Sorting",
440
+ "id": 109
441
+ },
442
+ {
443
+ "answer": "Array, String",
444
+ "id": 110
445
+ },
446
+ {
447
+ "answer": "String, Hash table",
448
+ "id": 111
449
+ },
450
+ {
451
+ "answer": "String, Sorting",
452
+ "id": 112
453
+ },
454
+ {
455
+ "answer": "String, Array",
456
+ "id": 113
457
+ },
458
+ {
459
+ "answer": "Array, Math",
460
+ "id": 114
461
+ },
462
+ {
463
+ "answer": "Array, Math",
464
+ "id": 115
465
+ },
466
+ {
467
+ "answer": "Array, Sorting",
468
+ "id": 116
469
+ },
470
+ {
471
+ "answer": "String, Array",
472
+ "id": 117
473
+ },
474
+ {
475
+ "answer": "String, Search",
476
+ "id": 118
477
+ },
478
+ {
479
+ "answer": "String, Stack",
480
+ "id": 119
481
+ },
482
+ {
483
+ "answer": "Array, Sorting",
484
+ "id": 120
485
+ },
486
+ {
487
+ "answer": "Array, Math",
488
+ "id": 121
489
+ },
490
+ {
491
+ "answer": "Array, Math",
492
+ "id": 122
493
+ },
494
+ {
495
+ "answer": "Array, Sorting",
496
+ "id": 123
497
+ },
498
+ {
499
+ "answer": "String, Math",
500
+ "id": 124
501
+ },
502
+ {
503
+ "answer": "String, Math",
504
+ "id": 125
505
+ },
506
+ {
507
+ "answer": "Array, Sorting",
508
+ "id": 126
509
+ },
510
+ {
511
+ "answer": "Math, Array",
512
+ "id": 127
513
+ },
514
+ {
515
+ "answer": "Array, Math",
516
+ "id": 128
517
+ },
518
+ {
519
+ "answer": "Array, Search, Matrix",
520
+ "id": 129
521
+ },
522
+ {
523
+ "answer": "Math, Array",
524
+ "id": 130
525
+ },
526
+ {
527
+ "answer": "Math, String",
528
+ "id": 131
529
+ },
530
+ {
531
+ "answer": "String, Stack",
532
+ "id": 132
533
+ },
534
+ {
535
+ "answer": "Array, Math",
536
+ "id": 133
537
+ },
538
+ {
539
+ "answer": "String, Search",
540
+ "id": 134
541
+ },
542
+ {
543
+ "answer": "Array, Search",
544
+ "id": 135
545
+ },
546
+ {
547
+ "answer": "Array, Search",
548
+ "id": 136
549
+ },
550
+ {
551
+ "answer": "String, Math",
552
+ "id": 137
553
+ },
554
+ {
555
+ "answer": "Math",
556
+ "id": 138
557
+ },
558
+ {
559
+ "answer": "Math, Array",
560
+ "id": 139
561
+ },
562
+ {
563
+ "answer": "String, Array",
564
+ "id": 140
565
+ },
566
+ {
567
+ "answer": "String, Hash table",
568
+ "id": 141
569
+ },
570
+ {
571
+ "answer": "Array, Math",
572
+ "id": 142
573
+ },
574
+ {
575
+ "answer": "String, Math",
576
+ "id": 143
577
+ },
578
+ {
579
+ "answer": "String, Math",
580
+ "id": 144
581
+ },
582
+ {
583
+ "answer": "Array, Sorting",
584
+ "id": 145
585
+ },
586
+ {
587
+ "answer": "Array, Math",
588
+ "id": 146
589
+ },
590
+ {
591
+ "answer": "Array, Math",
592
+ "id": 147
593
+ },
594
+ {
595
+ "answer": "String, Array, Sorting",
596
+ "id": 148
597
+ },
598
+ {
599
+ "answer": "String, Sorting",
600
+ "id": 149
601
+ },
602
+ {
603
+ "answer": "Math, Search",
604
+ "id": 150
605
+ },
606
+ {
607
+ "answer": "Array, Math",
608
+ "id": 151
609
+ },
610
+ {
611
+ "answer": "Array, Math",
612
+ "id": 152
613
+ },
614
+ {
615
+ "answer": "String, Array, Sorting",
616
+ "id": 153
617
+ },
618
+ {
619
+ "answer": "String, Search",
620
+ "id": 154
621
+ },
622
+ {
623
+ "answer": "String, Math",
624
+ "id": 155
625
+ },
626
+ {
627
+ "answer": "String, Math",
628
+ "id": 156
629
+ },
630
+ {
631
+ "answer": "Math, Sorting",
632
+ "id": 157
633
+ },
634
+ {
635
+ "answer": "String, Array",
636
+ "id": 158
637
+ },
638
+ {
639
+ "answer": "Array, Math",
640
+ "id": 159
641
+ },
642
+ {
643
+ "answer": "String, Array, Math",
644
+ "id": 160
645
+ },
646
+ {
647
+ "answer": "String, Array",
648
+ "id": 161
649
+ },
650
+ {
651
+ "answer": "String, Hash table",
652
+ "id": 162
653
+ },
654
+ {
655
+ "answer": "Array, Math",
656
+ "id": 163
657
+ }
658
+ ]
dividing_into_different_subsets/6/QS/line_counts_QS.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,line_subset_1,line_subset_2,line_subset_3,line_subset_4,line_subset_5,line_subset_6
2
+ CodeFuse-DeepSeek-33b,75.0,85.71,82.14,67.86,71.43,79.17
3
+ Nxcode-CQ-7B,92.68,91.79,87.86,89.29,85.71,74.58
4
+ codegemma-2b,52.86,34.11,26.96,9.11,24.11,13.75
5
+ codegemma-7b,62.32,42.68,39.64,30.18,35.0,27.08
6
+ codegemma-7b-it,74.11,57.68,54.11,42.5,50.54,37.71
7
+ deepseek-coder-1.3b-base,55.0,40.36,30.0,22.32,24.46,20.0
8
+ deepseek-coder-6.7b-base,75.89,52.14,47.5,30.89,39.11,25.63
9
+ deepseek_coder-6.7b-instruct,78.57,91.96,74.46,58.39,70.18,53.33
10
+ deepseek_coder_33b-base,74.29,62.86,53.04,43.39,43.93,36.67
11
+ deepseek_coder_33b-instruct,83.21,81.61,66.96,56.79,55.0,51.04
12
+ codeqwen1.5-7b,65.18,54.64,59.46,37.86,53.21,35.62
13
+ new,74.11,57.68,54.11,42.5,50.54,37.71
dividing_into_different_subsets/6/QS/token_counts_QS.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,token_subset_1,token_subset_2,token_subset_3,token_subset_4,token_subset_5,token_subset_6
2
+ CodeFuse-DeepSeek-33b,75.0,71.43,96.43,82.14,71.43,62.5
3
+ Nxcode-CQ-7B,95.18,86.61,85.71,89.29,88.39,77.08
4
+ codegemma-2b,46.96,36.43,34.82,20.36,19.29,1.25
5
+ codegemma-7b,60.71,46.43,47.32,33.21,30.18,17.71
6
+ codegemma-7b-it,75.89,61.79,58.93,43.21,40.36,36.25
7
+ deepseek-coder-1.3b-base,57.14,41.43,41.96,24.11,19.82,5.62
8
+ deepseek-coder-6.7b-base,73.04,52.5,53.75,42.68,31.79,16.04
9
+ deepseek_coder-6.7b-instruct,85.71,82.14,65.89,72.5,73.04,46.67
10
+ deepseek_coder_33b-base,73.57,60.18,67.14,46.79,39.46,25.42
11
+ deepseek_coder_33b-instruct,86.25,75.18,70.54,63.57,57.86,39.58
12
+ codeqwen1.5-7b,69.11,58.93,55.36,48.21,38.93,35.42
13
+ new,75.89,61.79,58.93,43.21,40.36,36.25
dividing_into_different_subsets/7/QS/CC_QS.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,CC_subset_1,CC_subset_2,CC_subset_3,CC_subset_4,CC_subset_5,CC_subset_6,CC_subset_7
2
+ CodeFuse-DeepSeek-33b,73.91,86.96,78.26,91.3,65.22,92.0,50.0
3
+ Nxcode-CQ-7B,91.3,85.65,81.09,88.04,91.3,92.2,81.25
4
+ codegemma-2b,47.61,37.61,20.65,28.7,28.91,25.4,2.29
5
+ codegemma-7b,57.61,53.26,35.87,44.78,40.22,34.0,14.37
6
+ codegemma-7b-it,61.3,62.17,54.35,53.48,55.43,52.8,33.33
7
+ deepseek-coder-1.3b-base,54.13,48.26,27.39,30.65,38.91,22.2,6.67
8
+ deepseek-coder-6.7b-base,65.0,57.61,40.22,52.61,51.74,40.6,13.75
9
+ deepseek_coder-6.7b-instruct,82.39,71.96,70.65,72.83,75.0,74.2,54.58
10
+ deepseek_coder_33b-base,68.04,57.39,49.35,58.91,55.22,52.4,28.96
11
+ deepseek_coder_33b-instruct,76.09,69.57,63.04,67.17,72.17,70.0,45.42
12
+ codeqwen1.5-7b,64.78,57.83,52.61,55.87,51.3,48.8,29.58
13
+ new,61.3,62.17,54.35,53.48,55.43,52.8,33.33
dividing_into_different_subsets/7/QS/QS.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/7/QS/calculate_humaneval_result.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import csv
4
+ # 定义文件所在的目录
5
+ input_dir = 'E:/python-testn/pythonProject3/hh_1/evaluate_result'
6
+
7
+ # 获取目录中的所有文件
8
+ files = os.listdir(input_dir)
9
+
10
+ # with open("token_counts_QS.csv","w", newline='') as csvfile:
11
+ # writer = csv.writer(csvfile)
12
+ # writer.writerow(["Model", "token_subset_1", "token_subset_2","token_subset_3","token_subset_4","token_subset_5","token_subset_6","token_subset_7"])
13
+ #
14
+
15
+ # with open("line_counts_QS.csv","w", newline='') as csvfile:
16
+ # writer = csv.writer(csvfile)
17
+ # writer.writerow(["Model", "line_subset_1", "line_subset_2","line_subset_3","line_subset_4","line_subset_5","line_subset_6","line_subset_7"])
18
+
19
+ with open("CC_QS.csv", "w", newline='') as csvfile:
20
+ writer = csv.writer(csvfile)
21
+ writer.writerow(["Model", "CC_subset_1", "CC_subset_2","CC_subset_3","CC_subset_4","CC_subset_5","CC_subset_6","CC_subset_7"])
22
+
23
+
24
+
25
+ for file_name in files:
26
+ # 构建完整的文件路径
27
+ input_file_path = os.path.join(input_dir, file_name)
28
+ first_underscore_index = file_name.find('_')
29
+
30
+ # 找到最后一个 - 的位置
31
+ last_dash_index = file_name.rfind('-')
32
+ model_name = file_name[first_underscore_index + 1:last_dash_index]
33
+ print(model_name)
34
+ with open(input_file_path,"r",encoding="utf-8") as file:
35
+ data1=json.load(file)
36
+
37
+ with open("QS.json", "r", encoding="utf-8") as file:
38
+ data2=json.load(file)
39
+ sum0=0
40
+ count0=0
41
+ sum1=0
42
+ count1=0
43
+ sum2=0
44
+ count2=0
45
+ sum3 = 0
46
+ count3 = 0
47
+ sum4=0
48
+ count4=0
49
+ sum5 = 0
50
+ count5 = 0
51
+ sum6=0
52
+ count6=0
53
+
54
+
55
+
56
+ for (item1,item2) in zip(data1["humaneval"]["pass@1"],data2):
57
+ # #按照token个数划分后的评估结果
58
+ # if item2["token_diff"] == 0:
59
+ # index, value = item1
60
+ # print(item2["token_diff"],index,value)
61
+ # sum0=sum0+value
62
+ # count0=count0+1
63
+ # if item2["token_diff"] == 1:
64
+ # index, value = item1
65
+ # print(item2["token_diff"], index, value)
66
+ # sum1=sum1+value
67
+ # count1=count1+1
68
+ # if item2["token_diff"] == 2:
69
+ # index, value = item1
70
+ # print(item2["token_diff"], index, value)
71
+ # sum2=sum2+value
72
+ # count2=count2+1
73
+ # if item2["token_diff"] == 3:
74
+ # index, value = item1
75
+ # print(item2["token_diff"], index, value)
76
+ # sum3=sum3+value
77
+ # count3=count3+1
78
+ # if item2["token_diff"] == 4:
79
+ # index, value = item1
80
+ # print(item2["token_diff"], index, value)
81
+ # sum4 = sum4 + value
82
+ # count4 = count4 + 1
83
+ # if item2["token_diff"] ==5:
84
+ # index, value = item1
85
+ # print(item2["token_diff"], index, value)
86
+ # sum5 = sum5 + value
87
+ # count5 = count5 + 1
88
+ # if item2["token_diff"] ==6:
89
+ # index, value = item1
90
+ # print(item2["token_diff"], index, value)
91
+ # sum6 = sum6 + value
92
+ # count6 = count6 + 1
93
+
94
+
95
+ #按照行数划分后的评估结果
96
+ # if item2["line_diff"] == 0:
97
+ # index, value = item1
98
+ # print(item2["line_diff"],index,value)
99
+ # sum0=sum0+value
100
+ # count0=count0+1
101
+ # if item2["line_diff"] == 1:
102
+ # index, value = item1
103
+ # print(item2["line_diff"], index, value)
104
+ # sum1=sum1+value
105
+ # count1=count1+1
106
+ # if item2["line_diff"] == 2:
107
+ # index, value = item1
108
+ # print(item2["line_diff"], index, value)
109
+ # sum2=sum2+value
110
+ # count2=count2+1
111
+ # if item2["line_diff"] == 3:
112
+ # index, value = item1
113
+ # print(item2["line_diff"], index, value)
114
+ # sum3=sum3+value
115
+ # count3=count3+1
116
+ # if item2["line_diff"] == 4:
117
+ # index, value = item1
118
+ # print(item2["line_diff"], index, value)
119
+ # sum4=sum4+value
120
+ # count4=count4+1
121
+ # if item2["line_diff"] == 5:
122
+ # index, value = item1
123
+ # print(item2["line_diff"], index, value)
124
+ # sum5 = sum5 + value
125
+ # count5 = count5 + 1
126
+ # if item2["line_diff"] == 6:
127
+ # index, value = item1
128
+ # print(item2["line_diff"], index, value)
129
+ # sum6 = sum6 + value
130
+ # count6 = count6 + 1
131
+
132
+
133
+ #按照圈复杂度划分后的评估结果
134
+ if item2["CC_diff"] == 0:
135
+ index, value = item1
136
+ print(item2["CC_diff"],index,value)
137
+ sum0=sum0+value
138
+ count0=count0+1
139
+ if item2["CC_diff"] == 1:
140
+ index, value = item1
141
+ print(item2["CC_diff"], index, value)
142
+ sum1=sum1+value
143
+ count1=count1+1
144
+ if item2["CC_diff"] == 2:
145
+ index, value = item1
146
+ print(item2["CC_diff"], index, value)
147
+ sum2=sum2+value
148
+ count2=count2+1
149
+ if item2["CC_diff"] == 3 :
150
+ index, value = item1
151
+ print(item2["CC_diff"], index, value)
152
+ sum3=sum3+value
153
+ count3=count3+1
154
+ if item2["CC_diff"] == 4 :
155
+ index, value = item1
156
+ print(item2["CC_diff"], index, value)
157
+ sum4=sum4+value
158
+ count4=count4+1
159
+ if item2["CC_diff"] == 5 :
160
+ index, value = item1
161
+ print(item2["CC_diff"], index, value)
162
+ sum5=sum5+value
163
+ count5=count5+1
164
+ if item2["CC_diff"] == 6 :
165
+ index, value = item1
166
+ print(item2["CC_diff"], index, value)
167
+ sum6=sum6+value
168
+ count6=count6+1
169
+
170
+
171
+
172
+ mean0=round(sum0/count0*100,2)
173
+
174
+ mean1=round(sum1/count1*100,2)
175
+ mean2=round(sum2/count2*100,2)
176
+ mean3=round(sum3/count3*100,2)
177
+ mean4=round(sum4/count4*100,2)
178
+ mean5 = round(sum5 / count5 * 100, 2)
179
+ mean6=round(sum6/count6*100,2)
180
+ print("count_result!!")
181
+ print(count0,count1,count2,count3,count4,count5,count6)
182
+ print(mean0,mean1,mean2,mean3,count4,mean5,mean6)
183
+ # with open("token_counts_QS.csv", mode='a', newline='', encoding='utf-8') as file:
184
+ # writer = csv.writer(file)
185
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5,mean6])
186
+
187
+ # with open("line_counts_QS.csv", mode='a', newline='', encoding='utf-8') as file:
188
+ # writer = csv.writer(file)
189
+ # writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5,mean6])
190
+ #
191
+ with open("CC_QS.csv", mode='a', newline='', encoding='utf-8') as file:
192
+ writer = csv.writer(file)
193
+ writer.writerow([model_name,mean0,mean1,mean2,mean3,mean4,mean5,mean6])
194
+
195
+
dividing_into_different_subsets/7/QS/even.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ with open("humaneval_new.json", "r", encoding="utf-8") as f:
3
+ data = json.load(f)
4
+ line_counts=[14, 14, 14, 14, 14, 15, 15]
5
+ line_counts_I=line_counts[0]*0.01*164
6
+ line_counts_II=line_counts[1]*0.01*164
7
+ line_counts_III=line_counts[2]*0.01*164
8
+ line_counts_IV=line_counts[3]*0.01*164
9
+ line_counts_V=line_counts[4]*0.01*164
10
+ line_counts_VI=line_counts[5]*0.01*164
11
+ line_counts_VII=line_counts[6]*0.01*164
12
+
13
+ token_counts=[14, 14, 14, 14, 14, 15, 15]
14
+ token_counts_I=token_counts[0]*0.01*164
15
+ token_counts_II=token_counts[1]*0.01*164
16
+ token_counts_III=token_counts[2]*0.01*164
17
+ token_counts_IV=token_counts[3]*0.01*164
18
+ token_counts_V=token_counts[4]*0.01*164
19
+ token_counts_VI=token_counts[5]*0.01*164
20
+ token_counts_VII=token_counts[6]*0.01*164
21
+
22
+ cyclomatic_complexity=[14, 14, 14, 14, 14, 15, 15]
23
+ cyclomatic_complexity_I=cyclomatic_complexity[0]*0.01*164
24
+ cyclomatic_complexity_II=cyclomatic_complexity[1]*0.01*164
25
+ cyclomatic_complexity_III=cyclomatic_complexity[2]*0.01*164
26
+ cyclomatic_complexity_IV=cyclomatic_complexity[3]*0.01*164
27
+ cyclomatic_complexity_V=cyclomatic_complexity[4]*0.01*164
28
+ cyclomatic_complexity_VI=cyclomatic_complexity[5]*0.01*164
29
+ cyclomatic_complexity_VII=cyclomatic_complexity[6]*0.01*164
30
+
31
+ data.sort(key=lambda x: x['line'])
32
+ for i, item in enumerate(data):
33
+ if i < line_counts_I:
34
+ item['line_diff'] = 0
35
+ elif i <line_counts_I+line_counts_II:
36
+ item['line_diff'] = 1
37
+ elif i <line_counts_I+line_counts_II+line_counts_III:
38
+ item['line_diff'] = 2
39
+ elif i <line_counts_I+line_counts_II+line_counts_III+line_counts_IV:
40
+ item['line_diff'] = 3
41
+ elif i <line_counts_I+line_counts_II+line_counts_III+line_counts_IV+line_counts_V:
42
+ item['line_diff'] = 4
43
+ elif i <line_counts_I+line_counts_II+line_counts_III+line_counts_IV+line_counts_V+line_counts_VI:
44
+ item['line_diff'] = 5
45
+ else:
46
+ item['line_diff'] = 6
47
+
48
+ data.sort(key=lambda x: x['token'])
49
+ for i, item in enumerate(data):
50
+ if i < token_counts_I:
51
+ item['token_diff'] = 0
52
+ elif i < token_counts_I + token_counts_II:
53
+ item['token_diff'] = 1
54
+ elif i < token_counts_I + token_counts_II+token_counts_III:
55
+ item['token_diff'] = 2
56
+ elif i < token_counts_I + token_counts_II+token_counts_III+token_counts_IV:
57
+ item['token_diff'] = 3
58
+ elif i < token_counts_I + token_counts_II+token_counts_III+token_counts_IV+token_counts_V:
59
+ item['token_diff'] = 4
60
+ elif i < token_counts_I + token_counts_II + token_counts_III + token_counts_IV + token_counts_V+token_counts_VI:
61
+ item['token_diff'] = 5
62
+ else:
63
+ item['token_diff'] = 6
64
+
65
+ data.sort(key=lambda x: x['cyclomatic_complexity'])
66
+ for i, item in enumerate(data):
67
+ if i < cyclomatic_complexity_I:
68
+ item['CC_diff'] = 0
69
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II:
70
+ item['CC_diff'] = 1
71
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III:
72
+ item['CC_diff'] = 2
73
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III+cyclomatic_complexity_IV:
74
+ item['CC_diff'] = 3
75
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III+cyclomatic_complexity_IV+cyclomatic_complexity_V:
76
+ item['CC_diff'] = 4
77
+ elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III+cyclomatic_complexity_IV+cyclomatic_complexity_V+cyclomatic_complexity_VI:
78
+ item['CC_diff'] = 5
79
+ else:
80
+ item['CC_diff'] = 6
81
+
82
+
83
+ data.sort(key=lambda x: x['id'])
84
+ # 将更新后的数据写回JSON文件
85
+ with open('QS.json', 'w', encoding='utf-8') as file:
86
+ json.dump(data, file, ensure_ascii=False, indent=4)
87
+
dividing_into_different_subsets/7/QS/humaneval_new.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/7/QS/humaneval_with_cata.json ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "answer": "Array, Sorting",
4
+ "id": 0
5
+ },
6
+ {
7
+ "answer": "String, Stack",
8
+ "id": 1
9
+ },
10
+ {
11
+ "answer": "Math",
12
+ "id": 2
13
+ },
14
+ {
15
+ "answer": "Array, Math",
16
+ "id": 3
17
+ },
18
+ {
19
+ "answer": "Math, Array",
20
+ "id": 4
21
+ },
22
+ {
23
+ "answer": "Array",
24
+ "id": 5
25
+ },
26
+ {
27
+ "answer": "String, Stack",
28
+ "id": 6
29
+ },
30
+ {
31
+ "answer": "String, Array, Search",
32
+ "id": 7
33
+ },
34
+ {
35
+ "answer": "Math, Array",
36
+ "id": 8
37
+ },
38
+ {
39
+ "answer": "Array, Stack",
40
+ "id": 9
41
+ },
42
+ {
43
+ "answer": "String, Search",
44
+ "id": 10
45
+ },
46
+ {
47
+ "answer": "String, Math",
48
+ "id": 11
49
+ },
50
+ {
51
+ "answer": "String, Array",
52
+ "id": 12
53
+ },
54
+ {
55
+ "answer": "Math",
56
+ "id": 13
57
+ },
58
+ {
59
+ "answer": "String, Array",
60
+ "id": 14
61
+ },
62
+ {
63
+ "answer": "String, Math",
64
+ "id": 15
65
+ },
66
+ {
67
+ "answer": "String, Hash table",
68
+ "id": 16
69
+ },
70
+ {
71
+ "answer": "String, Array",
72
+ "id": 17
73
+ },
74
+ {
75
+ "answer": "String, Search",
76
+ "id": 18
77
+ },
78
+ {
79
+ "answer": "String, Sorting",
80
+ "id": 19
81
+ },
82
+ {
83
+ "answer": "Array, Sorting",
84
+ "id": 20
85
+ },
86
+ {
87
+ "answer": "Array, Math",
88
+ "id": 21
89
+ },
90
+ {
91
+ "answer": "Array, Search",
92
+ "id": 22
93
+ },
94
+ {
95
+ "answer": "String",
96
+ "id": 23
97
+ },
98
+ {
99
+ "answer": "Math",
100
+ "id": 24
101
+ },
102
+ {
103
+ "answer": "Math, Array",
104
+ "id": 25
105
+ },
106
+ {
107
+ "answer": "Array, Hash table",
108
+ "id": 26
109
+ },
110
+ {
111
+ "answer": "String",
112
+ "id": 27
113
+ },
114
+ {
115
+ "answer": "String",
116
+ "id": 28
117
+ },
118
+ {
119
+ "answer": "String, Array",
120
+ "id": 29
121
+ },
122
+ {
123
+ "answer": "Array",
124
+ "id": 30
125
+ },
126
+ {
127
+ "answer": "Math",
128
+ "id": 31
129
+ },
130
+ {
131
+ "answer": "Math, Search",
132
+ "id": 32
133
+ },
134
+ {
135
+ "answer": "Array, Sorting",
136
+ "id": 33
137
+ },
138
+ {
139
+ "answer": "Array, Sorting",
140
+ "id": 34
141
+ },
142
+ {
143
+ "answer": "Math, Array",
144
+ "id": 35
145
+ },
146
+ {
147
+ "answer": "Math, Search",
148
+ "id": 36
149
+ },
150
+ {
151
+ "answer": "Array, Sorting",
152
+ "id": 37
153
+ },
154
+ {
155
+ "answer": "String, Array",
156
+ "id": 38
157
+ },
158
+ {
159
+ "answer": "Math, Search",
160
+ "id": 39
161
+ },
162
+ {
163
+ "answer": "Array, Search",
164
+ "id": 40
165
+ },
166
+ {
167
+ "answer": "Math, Array",
168
+ "id": 41
169
+ },
170
+ {
171
+ "answer": "Array, Math",
172
+ "id": 42
173
+ },
174
+ {
175
+ "answer": "Array, Hash table",
176
+ "id": 43
177
+ },
178
+ {
179
+ "answer": "Math, String",
180
+ "id": 44
181
+ },
182
+ {
183
+ "answer": "Math",
184
+ "id": 45
185
+ },
186
+ {
187
+ "answer": "Math, Array",
188
+ "id": 46
189
+ },
190
+ {
191
+ "answer": "Array, Sorting",
192
+ "id": 47
193
+ },
194
+ {
195
+ "answer": "String",
196
+ "id": 48
197
+ },
198
+ {
199
+ "answer": "Math",
200
+ "id": 49
201
+ },
202
+ {
203
+ "answer": "String, Math",
204
+ "id": 50
205
+ },
206
+ {
207
+ "answer": "String",
208
+ "id": 51
209
+ },
210
+ {
211
+ "answer": "Array, Search",
212
+ "id": 52
213
+ },
214
+ {
215
+ "answer": "Math",
216
+ "id": 53
217
+ },
218
+ {
219
+ "answer": "String, Hash table",
220
+ "id": 54
221
+ },
222
+ {
223
+ "answer": "Math",
224
+ "id": 55
225
+ },
226
+ {
227
+ "answer": "String, Stack",
228
+ "id": 56
229
+ },
230
+ {
231
+ "answer": "Array, Sorting",
232
+ "id": 57
233
+ },
234
+ {
235
+ "answer": "Array, Sorting",
236
+ "id": 58
237
+ },
238
+ {
239
+ "answer": "Math, Search",
240
+ "id": 59
241
+ },
242
+ {
243
+ "answer": "Math",
244
+ "id": 60
245
+ },
246
+ {
247
+ "answer": "String, Stack",
248
+ "id": 61
249
+ },
250
+ {
251
+ "answer": "Array, Math",
252
+ "id": 62
253
+ },
254
+ {
255
+ "answer": "Math, Array",
256
+ "id": 63
257
+ },
258
+ {
259
+ "answer": "String",
260
+ "id": 64
261
+ },
262
+ {
263
+ "answer": "String, Math",
264
+ "id": 65
265
+ },
266
+ {
267
+ "answer": "String, Math",
268
+ "id": 66
269
+ },
270
+ {
271
+ "answer": "String, Math",
272
+ "id": 67
273
+ },
274
+ {
275
+ "answer": "Array, Search",
276
+ "id": 68
277
+ },
278
+ {
279
+ "answer": "Array, Hash table",
280
+ "id": 69
281
+ },
282
+ {
283
+ "answer": "Array, Sorting",
284
+ "id": 70
285
+ },
286
+ {
287
+ "answer": "Math, Array",
288
+ "id": 71
289
+ },
290
+ {
291
+ "answer": "Array, Math",
292
+ "id": 72
293
+ },
294
+ {
295
+ "answer": "Array, Sorting",
296
+ "id": 73
297
+ },
298
+ {
299
+ "answer": "String, Array",
300
+ "id": 74
301
+ },
302
+ {
303
+ "answer": "Math, Hash table",
304
+ "id": 75
305
+ },
306
+ {
307
+ "answer": "Math",
308
+ "id": 76
309
+ },
310
+ {
311
+ "answer": "Math",
312
+ "id": 77
313
+ },
314
+ {
315
+ "answer": "String, Hash table",
316
+ "id": 78
317
+ },
318
+ {
319
+ "answer": "String, Math",
320
+ "id": 79
321
+ },
322
+ {
323
+ "answer": "String, Search",
324
+ "id": 80
325
+ },
326
+ {
327
+ "answer": "Array, Sorting",
328
+ "id": 81
329
+ },
330
+ {
331
+ "answer": "String, Math",
332
+ "id": 82
333
+ },
334
+ {
335
+ "answer": "Math, Array",
336
+ "id": 83
337
+ },
338
+ {
339
+ "answer": "Math, String",
340
+ "id": 84
341
+ },
342
+ {
343
+ "answer": "Array, Math",
344
+ "id": 85
345
+ },
346
+ {
347
+ "answer": "String, Sorting",
348
+ "id": 86
349
+ },
350
+ {
351
+ "answer": "Array, Search, Sorting",
352
+ "id": 87
353
+ },
354
+ {
355
+ "answer": "Array, Sorting",
356
+ "id": 88
357
+ },
358
+ {
359
+ "answer": "String, Math",
360
+ "id": 89
361
+ },
362
+ {
363
+ "answer": "Array, Sorting",
364
+ "id": 90
365
+ },
366
+ {
367
+ "answer": "String, Search",
368
+ "id": 91
369
+ },
370
+ {
371
+ "answer": "Math, Array",
372
+ "id": 92
373
+ },
374
+ {
375
+ "answer": "String, Hash table",
376
+ "id": 93
377
+ },
378
+ {
379
+ "answer": "Array, Math",
380
+ "id": 94
381
+ },
382
+ {
383
+ "answer": "String, Hash table",
384
+ "id": 95
385
+ },
386
+ {
387
+ "answer": "Math, Array",
388
+ "id": 96
389
+ },
390
+ {
391
+ "answer": "Math",
392
+ "id": 97
393
+ },
394
+ {
395
+ "answer": "String, Array",
396
+ "id": 98
397
+ },
398
+ {
399
+ "answer": "String, Math",
400
+ "id": 99
401
+ },
402
+ {
403
+ "answer": "Array, Math",
404
+ "id": 100
405
+ },
406
+ {
407
+ "answer": "String, Array",
408
+ "id": 101
409
+ },
410
+ {
411
+ "answer": "Math, Search",
412
+ "id": 102
413
+ },
414
+ {
415
+ "answer": "Math, String",
416
+ "id": 103
417
+ },
418
+ {
419
+ "answer": "Array, Sorting",
420
+ "id": 104
421
+ },
422
+ {
423
+ "answer": "Array, Sorting",
424
+ "id": 105
425
+ },
426
+ {
427
+ "answer": "Array, Math",
428
+ "id": 106
429
+ },
430
+ {
431
+ "answer": "Math, Search",
432
+ "id": 107
433
+ },
434
+ {
435
+ "answer": "Array, Math",
436
+ "id": 108
437
+ },
438
+ {
439
+ "answer": "Array, Sorting",
440
+ "id": 109
441
+ },
442
+ {
443
+ "answer": "Array, String",
444
+ "id": 110
445
+ },
446
+ {
447
+ "answer": "String, Hash table",
448
+ "id": 111
449
+ },
450
+ {
451
+ "answer": "String, Sorting",
452
+ "id": 112
453
+ },
454
+ {
455
+ "answer": "String, Array",
456
+ "id": 113
457
+ },
458
+ {
459
+ "answer": "Array, Math",
460
+ "id": 114
461
+ },
462
+ {
463
+ "answer": "Array, Math",
464
+ "id": 115
465
+ },
466
+ {
467
+ "answer": "Array, Sorting",
468
+ "id": 116
469
+ },
470
+ {
471
+ "answer": "String, Array",
472
+ "id": 117
473
+ },
474
+ {
475
+ "answer": "String, Search",
476
+ "id": 118
477
+ },
478
+ {
479
+ "answer": "String, Stack",
480
+ "id": 119
481
+ },
482
+ {
483
+ "answer": "Array, Sorting",
484
+ "id": 120
485
+ },
486
+ {
487
+ "answer": "Array, Math",
488
+ "id": 121
489
+ },
490
+ {
491
+ "answer": "Array, Math",
492
+ "id": 122
493
+ },
494
+ {
495
+ "answer": "Array, Sorting",
496
+ "id": 123
497
+ },
498
+ {
499
+ "answer": "String, Math",
500
+ "id": 124
501
+ },
502
+ {
503
+ "answer": "String, Math",
504
+ "id": 125
505
+ },
506
+ {
507
+ "answer": "Array, Sorting",
508
+ "id": 126
509
+ },
510
+ {
511
+ "answer": "Math, Array",
512
+ "id": 127
513
+ },
514
+ {
515
+ "answer": "Array, Math",
516
+ "id": 128
517
+ },
518
+ {
519
+ "answer": "Array, Search, Matrix",
520
+ "id": 129
521
+ },
522
+ {
523
+ "answer": "Math, Array",
524
+ "id": 130
525
+ },
526
+ {
527
+ "answer": "Math, String",
528
+ "id": 131
529
+ },
530
+ {
531
+ "answer": "String, Stack",
532
+ "id": 132
533
+ },
534
+ {
535
+ "answer": "Array, Math",
536
+ "id": 133
537
+ },
538
+ {
539
+ "answer": "String, Search",
540
+ "id": 134
541
+ },
542
+ {
543
+ "answer": "Array, Search",
544
+ "id": 135
545
+ },
546
+ {
547
+ "answer": "Array, Search",
548
+ "id": 136
549
+ },
550
+ {
551
+ "answer": "String, Math",
552
+ "id": 137
553
+ },
554
+ {
555
+ "answer": "Math",
556
+ "id": 138
557
+ },
558
+ {
559
+ "answer": "Math, Array",
560
+ "id": 139
561
+ },
562
+ {
563
+ "answer": "String, Array",
564
+ "id": 140
565
+ },
566
+ {
567
+ "answer": "String, Hash table",
568
+ "id": 141
569
+ },
570
+ {
571
+ "answer": "Array, Math",
572
+ "id": 142
573
+ },
574
+ {
575
+ "answer": "String, Math",
576
+ "id": 143
577
+ },
578
+ {
579
+ "answer": "String, Math",
580
+ "id": 144
581
+ },
582
+ {
583
+ "answer": "Array, Sorting",
584
+ "id": 145
585
+ },
586
+ {
587
+ "answer": "Array, Math",
588
+ "id": 146
589
+ },
590
+ {
591
+ "answer": "Array, Math",
592
+ "id": 147
593
+ },
594
+ {
595
+ "answer": "String, Array, Sorting",
596
+ "id": 148
597
+ },
598
+ {
599
+ "answer": "String, Sorting",
600
+ "id": 149
601
+ },
602
+ {
603
+ "answer": "Math, Search",
604
+ "id": 150
605
+ },
606
+ {
607
+ "answer": "Array, Math",
608
+ "id": 151
609
+ },
610
+ {
611
+ "answer": "Array, Math",
612
+ "id": 152
613
+ },
614
+ {
615
+ "answer": "String, Array, Sorting",
616
+ "id": 153
617
+ },
618
+ {
619
+ "answer": "String, Search",
620
+ "id": 154
621
+ },
622
+ {
623
+ "answer": "String, Math",
624
+ "id": 155
625
+ },
626
+ {
627
+ "answer": "String, Math",
628
+ "id": 156
629
+ },
630
+ {
631
+ "answer": "Math, Sorting",
632
+ "id": 157
633
+ },
634
+ {
635
+ "answer": "String, Array",
636
+ "id": 158
637
+ },
638
+ {
639
+ "answer": "Array, Math",
640
+ "id": 159
641
+ },
642
+ {
643
+ "answer": "String, Array, Math",
644
+ "id": 160
645
+ },
646
+ {
647
+ "answer": "String, Array",
648
+ "id": 161
649
+ },
650
+ {
651
+ "answer": "String, Hash table",
652
+ "id": 162
653
+ },
654
+ {
655
+ "answer": "Array, Math",
656
+ "id": 163
657
+ }
658
+ ]
dividing_into_different_subsets/7/QS/line_counts_QS.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,line_subset_1,line_subset_2,line_subset_3,line_subset_4,line_subset_5,line_subset_6,line_subset_7
2
+ CodeFuse-DeepSeek-33b,82.61,73.91,86.96,78.26,69.57,68.0,79.17
3
+ Nxcode-CQ-7B,95.43,85.65,89.57,87.39,95.22,84.0,74.58
4
+ codegemma-2b,53.7,40.22,25.43,19.57,20.0,18.6,13.75
5
+ codegemma-7b,68.26,43.48,36.09,41.09,32.83,31.0,27.08
6
+ codegemma-7b-it,79.57,57.61,54.35,47.61,51.52,45.0,37.71
7
+ deepseek-coder-1.3b-base,57.17,41.3,30.43,30.65,31.3,17.2,20.0
8
+ deepseek-coder-6.7b-base,78.48,50.65,52.39,43.26,38.04,33.2,25.63
9
+ deepseek_coder-6.7b-instruct,81.74,82.39,84.13,66.3,67.83,66.6,53.33
10
+ deepseek_coder_33b-base,76.96,66.74,51.96,56.96,44.13,37.8,36.67
11
+ deepseek_coder_33b-instruct,87.39,78.26,69.35,68.04,61.3,49.6,51.04
12
+ codeqwen1.5-7b,68.26,53.04,55.43,54.13,46.52,47.6,35.62
13
+ new,79.57,57.61,54.35,47.61,51.52,45.0,37.71
dividing_into_different_subsets/7/QS/token_counts_QS.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,token_subset_1,token_subset_2,token_subset_3,token_subset_4,token_subset_5,token_subset_6,token_subset_7
2
+ CodeFuse-DeepSeek-33b,69.57,82.61,78.26,91.3,82.61,72.0,62.5
3
+ Nxcode-CQ-7B,94.13,92.17,85.22,84.13,91.74,87.0,77.08
4
+ codegemma-2b,40.65,40.87,49.78,22.17,15.43,21.4,1.25
5
+ codegemma-7b,59.13,48.26,56.96,34.78,29.35,33.8,17.71
6
+ codegemma-7b-it,72.61,65.65,70.65,48.04,36.52,43.8,36.25
7
+ deepseek-coder-1.3b-base,52.39,46.96,53.48,31.3,16.3,22.2,5.62
8
+ deepseek-coder-6.7b-base,71.74,61.96,63.26,35.43,38.7,34.8,16.04
9
+ deepseek_coder-6.7b-instruct,82.83,87.83,74.78,61.52,74.13,74.2,46.67
10
+ deepseek_coder_33b-base,71.3,63.91,70.87,50.65,45.43,43.6,25.42
11
+ deepseek_coder_33b-instruct,83.26,78.91,77.39,63.91,60.87,60.6,39.58
12
+ codeqwen1.5-7b,66.09,66.09,64.78,38.7,50.43,39.8,35.42
13
+ new,72.61,65.65,70.65,48.04,36.52,43.8,36.25
dividing_into_different_subsets/8/QS/CC_QS.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,CC_subset_1,CC_subset_2,CC_subset_3,CC_subset_4,CC_subset_5,CC_subset_6,CC_subset_7,CC_subset_8
2
+ CodeFuse-DeepSeek-33b,75.0,95.0,75.0,78.95,72.73,85.71,80.95,52.38
3
+ Nxcode-CQ-7B,90.0,88.5,82.25,81.84,95.0,86.19,95.0,78.57
4
+ codegemma-2b,49.75,47.0,18.0,30.53,12.95,37.38,21.67,2.38
5
+ codegemma-7b,58.0,63.0,36.0,42.37,30.23,50.95,26.43,13.81
6
+ codegemma-7b-it,60.5,68.5,53.75,58.95,42.05,61.19,51.19,31.19
7
+ deepseek-coder-1.3b-base,56.5,56.25,22.75,35.53,20.91,40.95,20.71,7.62
8
+ deepseek-coder-6.7b-base,63.75,72.0,36.0,47.63,40.45,58.57,37.14,11.9
9
+ deepseek_coder-6.7b-instruct,84.75,75.75,67.0,70.0,67.5,82.38,74.29,51.67
10
+ deepseek_coder_33b-base,71.5,63.25,47.5,59.74,44.77,61.43,47.38,28.57
11
+ deepseek_coder_33b-instruct,77.75,74.75,62.0,67.63,57.95,81.67,61.19,47.38
12
+ codeqwen1.5-7b,68.75,62.25,47.0,56.32,41.14,63.57,45.24,28.81
13
+ new,60.5,68.5,53.75,58.95,42.05,61.19,51.19,31.19
dividing_into_different_subsets/8/QS/QS.json ADDED
The diff for this file is too large to render. See raw diff
 
dividing_into_different_subsets/8/QS/TEST.json ADDED
The diff for this file is too large to render. See raw diff