XufengDuan commited on
Commit
d0d5660
1 Parent(s): 3776314

update scripts

Browse files
Files changed (1) hide show
  1. src/backend/model_operations.py +383 -28
src/backend/model_operations.py CHANGED
@@ -35,7 +35,7 @@ import spacy_transformers
35
  import subprocess
36
 
37
  # Run the command to download the spaCy model
38
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_lg"], check=True)
39
  # subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
40
  # subprocess.run(["pip", "install", "spacy-transformers"], check=True)
41
  # subprocess.run(["pip", "install", "curated-transformers"], check=True)
@@ -43,7 +43,7 @@ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_lg"], check=Tr
43
  # Load spacy model for word tokenization
44
  # nlp = spacy.load("en_core_web_sm")
45
  try:
46
- nlp1 = spacy.load("en_core_web_lg")
47
  except OSError:
48
  print("Can not load spacy model")
49
 
@@ -171,7 +171,8 @@ class ResponseGenerator:
171
  # print(ID, q_ID, prompt_value)
172
  system_prompt = envs.SYSTEM_PROMPT
173
  _user_prompt = prompt_value
174
- for ii in range(50):
 
175
  # user_prompt = f"{envs.USER_PROMPT}\nPassage:\n{_source}"
176
  while True:
177
  try:
@@ -179,6 +180,7 @@ class ResponseGenerator:
179
  print(self.model_id.lower(),'-',ID,'-',j,'-',ii)
180
 
181
  _response = self.send_request(system_prompt, _user_prompt)
 
182
  # print(f"Finish index {index}")
183
  break
184
  except Exception as e:
@@ -205,6 +207,7 @@ class ResponseGenerator:
205
  time.sleep(wait_time)
206
  try:
207
  _response = self.send_request(system_prompt, _user_prompt)
 
208
  break
209
  except Exception as ee:
210
  exceptions.append(ee)
@@ -512,7 +515,7 @@ class EvaluationModel:
512
  self.scores = []
513
  self.humanlike_score = None
514
 
515
- def code_results_llm(self, responses_df):
516
  '''code results from LLM's response'''
517
  output = []
518
  '''database for Exp4'''
@@ -738,25 +741,365 @@ class EvaluationModel:
738
  doc = nlp1(sentence)
739
  subject = "None"
740
  obj = "None"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742
 
743
  for token in doc:
744
  if token.dep_ == "nsubj":
745
  subject = token.text
746
  elif token.dep_ == "dobj":
747
  obj = token.text
748
- #print("E6", subject, obj)
749
- if subject in rs and obj in rs:
750
- #print(rs, subject, obj, "Other")
 
 
751
  output.append("Other")
752
- elif subject in rs:
753
- #print(rs, subject, obj, "VP")
754
  output.append("VP")
755
- elif obj in rs:
756
- #print(rs, subject, obj, "NP")
757
  output.append("NP")
758
  else:
759
- #print(rs, subject, obj, "Other")
760
  output.append("Other")
761
 
762
  '''Exp7'''
@@ -834,11 +1177,22 @@ class EvaluationModel:
834
  # exit()
835
  '''LLM'''
836
  print(len(output))
 
 
 
 
 
 
 
 
 
 
 
 
 
837
  self.data = pd.DataFrame(list(
838
- zip(responses_df["Experiment"], responses_df["Question_ID"], responses_df["Item"], responses_df["Response"],
839
- responses_df["Factor 2"], responses_df["Stimuli 1"], output)),
840
- columns=["Experiment", "Question_ID", "Item", "Response", "Factor 2", "Stimuli 1",
841
- "Coding"])
842
 
843
  return self.data
844
 
@@ -848,6 +1202,8 @@ class EvaluationModel:
848
 
849
 
850
 
 
 
851
  def calculate_js_divergence(self, file_path_1, file_path_2):
852
  """
853
  Calculate the Jensen-Shannon divergence for response distributions between two datasets.
@@ -855,7 +1211,7 @@ class EvaluationModel:
855
  removes the original E5 and E51, and then calculates the JS divergence between the datasets.
856
 
857
  Parameters:
858
- file_path_1 (str): Path to the first dataset file (Excel format).
859
  file_path_2 (str): Path to the second dataset file (CSV format).
860
 
861
  Returns:
@@ -893,17 +1249,15 @@ class EvaluationModel:
893
  human_df = pd.concat([human_df, human_e5], ignore_index=True)
894
  llm_df = pd.concat([llm_df, llm_e5], ignore_index=True)
895
 
896
-
897
  ### Calculate Average JS Divergence ###
898
 
899
-
900
  # Extract the relevant columns for JS divergence calculation
901
  human_responses = human_df[['Question_ID', 'Coding']]
902
  llm_responses = llm_df[['Question_ID', 'Coding']]
903
 
904
  # Remove 'Other' responses
905
- human_responses = human_responses[human_responses['Coding'] != 'Other']
906
- llm_responses = llm_responses[llm_responses['Coding'] != 'Other']
907
 
908
  # Get unique Question_IDs present in both datasets
909
  common_question_ids = set(human_responses['Question_ID']).intersection(set(llm_responses['Question_ID']))
@@ -933,6 +1287,7 @@ class EvaluationModel:
933
 
934
  # Calculate the average JS divergence per experiment and the confidence interval
935
  results = {}
 
936
  for exp, divs in js_divergence.items():
937
  avg_js_divergence = 1 - np.nanmean(divs)
938
  ci_lower, ci_upper = bootstrap((divs,), np.nanmean, confidence_level=0.95,
@@ -941,14 +1296,14 @@ class EvaluationModel:
941
  'average_js_divergence': avg_js_divergence,
942
  'confidence_interval': (1 - ci_upper, 1 - ci_lower) # Adjust for 1 - score
943
  }
 
944
 
945
- # Calculate the overall average JS divergence and confidence interval
946
- overall_js_divergence = 1 - np.nanmean([js for divs in js_divergence.values() for js in divs])
947
- flattened_js_divergence = np.concatenate([np.array(divs) for divs in js_divergence.values()])
948
 
949
- # 计算总体的置信区间
950
  overall_ci_lower, overall_ci_upper = bootstrap(
951
- (flattened_js_divergence,),
952
  np.nanmean,
953
  confidence_level=0.95,
954
  n_resamples=1000
@@ -957,8 +1312,8 @@ class EvaluationModel:
957
  # Combine all results into one dictionary
958
  all_results = {
959
  'overall': {
960
- 'average_js_divergence': overall_js_divergence,
961
- 'confidence_interval': (1 - overall_ci_upper, 1 - overall_ci_lower)
962
  },
963
  'per_experiment': results
964
  }
 
35
  import subprocess
36
 
37
  # Run the command to download the spaCy model
38
+ # subprocess.run(["python", "-m", "spacy", "download", "en_core_web_lg"], check=True)
39
  # subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
40
  # subprocess.run(["pip", "install", "spacy-transformers"], check=True)
41
  # subprocess.run(["pip", "install", "curated-transformers"], check=True)
 
43
  # Load spacy model for word tokenization
44
  # nlp = spacy.load("en_core_web_sm")
45
  try:
46
+ nlp1 = spacy.load("en_core_web_sm")
47
  except OSError:
48
  print("Can not load spacy model")
49
 
 
171
  # print(ID, q_ID, prompt_value)
172
  system_prompt = envs.SYSTEM_PROMPT
173
  _user_prompt = prompt_value
174
+ print(_user_prompt)
175
+ for ii in range(100):
176
  # user_prompt = f"{envs.USER_PROMPT}\nPassage:\n{_source}"
177
  while True:
178
  try:
 
180
  print(self.model_id.lower(),'-',ID,'-',j,'-',ii)
181
 
182
  _response = self.send_request(system_prompt, _user_prompt)
183
+ # print(_response)
184
  # print(f"Finish index {index}")
185
  break
186
  except Exception as e:
 
207
  time.sleep(wait_time)
208
  try:
209
  _response = self.send_request(system_prompt, _user_prompt)
210
+
211
  break
212
  except Exception as ee:
213
  exceptions.append(ee)
 
515
  self.scores = []
516
  self.humanlike_score = None
517
 
518
+ def code_results_llm_cleaned(self, responses_df):
519
  '''code results from LLM's response'''
520
  output = []
521
  '''database for Exp4'''
 
741
  doc = nlp1(sentence)
742
  subject = "None"
743
  obj = "None"
744
+ pobj_list = [] # To collect all prepositional objects
745
+
746
+ for token in doc:
747
+ if token.dep_ == "nsubj":
748
+ subject = token.text
749
+ elif token.dep_ == "dobj":
750
+ obj = token.text
751
+ elif token.dep_ == "pobj":
752
+ pobj_list.append(token.text) # Collect prepositional objects
753
+
754
+ rs_list = rs.lower().split()
755
+ if subject in rs_list and (obj in rs_list or any(pobj == r for pobj in pobj_list for r in rs_list)):
756
+ output.append("Other")
757
+ elif subject in rs_list:
758
+ output.append("VP")
759
+ elif obj in rs_list or any(pobj == r for pobj in pobj_list for r in rs_list):
760
+ output.append("NP")
761
+ else:
762
+ output.append("Other")
763
+
764
+ '''Exp7'''
765
+ elif responses_df["Experiment"][i] == "E7":
766
+ # rs = responses_df["Response"][i].strip().lower()
767
+ rs = rs.replace(".", "").replace(",", "").lower()
768
+ #print("E7", rs)
769
+ if "yes" in rs and "no" in rs:
770
+ output.append("Other")
771
+ elif "no" in rs:
772
+ output.append("0")
773
+ elif "yes" in rs:
774
+ output.append("1")
775
+ else:
776
+ output.append("Other")
777
+
778
+ '''Exp8'''
779
+ elif responses_df["Experiment"][i] == "E8":
780
+ # rs = responses_df["Response"][i].strip()
781
+ #print("E8", rs)
782
+ if "something is wrong with the question" in rs:
783
+ output.append("1")
784
+ else:
785
+ output.append("0")
786
+
787
+ '''Exp9'''
788
+ elif responses_df["Experiment"][i] == "E9":
789
+ male, female = 0, 0
790
+
791
+ # rs = responses_df["Response"][i].strip()
792
+ if "because" in rs:
793
+ rs = rs.replace("because because", "because").split("because")[1]
794
+ else:
795
+ rs = rs
796
+ condition = responses_df["Factor 2"][i].strip()
797
+ rs = rs.split(" ")
798
+ for w in rs:
799
+ if w in male_keyword and female != 1:
800
+ male = 1
801
+ break
802
+ if w in female_keyword and male != 1:
803
+ female = 1
804
+ break
805
+ #print("E9", "condition", condition, "male", male, "female", female)
806
+ if male == 0 and female == 0:
807
+ output.append('Other')
808
+ else:
809
+ if male == 1 and female == 0:
810
+ if condition == "MF":
811
+ output.append("Subject")
812
+ elif condition == "FM":
813
+ output.append("Object")
814
+ else:
815
+ output.append("Other")
816
+ elif female == 1 and male == 0:
817
+ if condition == "MF":
818
+ output.append("Object")
819
+ elif condition == "FM":
820
+ output.append("Subject")
821
+ else:
822
+ output.append("Other")
823
+
824
+ '''Exp10'''
825
+ elif responses_df["Experiment"][i] == "E10":
826
+ # rs = responses_df["Response"][i].strip()
827
+ rs = rs.replace(".", "")
828
+ if rs == "yes":
829
+ output.append("1")
830
+ else:
831
+ output.append("0")
832
+ else:
833
+ #print("can;t find the Exp:", responses_df["Experiment"][i])
834
+ output.append("NA")
835
+ # print(output)
836
+ # exit()
837
+ '''LLM'''
838
+ print(len(output))
839
+ import re
840
+ def clean_text(text):
841
+ if isinstance(text, str):
842
+ return re.sub(r'[^\x00-\x7F]+', '', text)
843
+ return text
844
+
845
+ responses_df["Experiment"] = responses_df["Experiment"].apply(clean_text)
846
+ responses_df["Question_ID"] = responses_df["Question_ID"].apply(clean_text)
847
+ responses_df["Item"] = responses_df["Item"].apply(clean_text)
848
+ responses_df["Response"] = responses_df["Response"].apply(clean_text)
849
+
850
+ output = [str(item) for item in output]
851
+
852
+ self.data = pd.DataFrame(list(
853
+ zip(responses_df["Experiment"], responses_df["Question_ID"], responses_df["Item"], responses_df["Response"],output)),
854
+ columns=["Experiment", "Question_ID", "Item", "Response","Coding"])
855
+
856
+ return self.data
857
+
858
+ def code_results_llm(self, responses_df):
859
+ '''code results from LLM's response'''
860
+ output = []
861
+ '''database for Exp4'''
862
+ item4 = pd.read_csv(envs.ITEM_4_DATA)
863
+ wordpair2code = {}
864
+ for j in range(len(item4['Coding'])):
865
+ wordpair2code[item4['Pair'][j]] = item4['Coding'][j]
866
+ '''verb for Exp5'''
867
+ item5 = pd.read_csv(envs.ITEM_5_DATA)
868
+ # item corresponding to verb, same item id corresponding to verb pair
869
+ item2verb2 = {}
870
+ item2verb1 = {}
871
+
872
+ Stimuli1, Stimuli2 = {}, {}
873
+ for j in range(len(item5['Item'])):
874
+ item2verb1[item5['Item'][j]] = item5['Verb1'][j]
875
+ item2verb2[item5['Item'][j]] = item5['Verb2'][j]
876
+ Stimuli1[item5['ID'][j]] = item5['Stimuli-1'][j]
877
+ Stimuli2[item5['ID'][j]] = item5['Stimuli-2'][j]
878
+
879
+ male_keyword = ["he", "his", "himself"]
880
+ female_keyword = ["she", "her", "herself"]
881
+ #print(len(responses_df["Experiment"]))
882
+ for i in range(len(responses_df["Experiment"])):
883
+
884
+
885
+ print(i, "/", len(responses_df["Experiment"]))
886
+ # vote_1_1, vote_1_2, vote_1_3 = 0, 0, 0
887
+ # print()
888
+ if pd.isna(responses_df["Response"][i]):
889
+ output.append("Other")
890
+ continue
891
+ rs = responses_df["Response"][i].strip().lower()
892
+ print(rs)
893
+ rs = rs.replace('"', '').replace(" ", " ").replace('.', '')
894
+ #lines = rs.split("\n")
895
+ #filtered_lines = [line for line in lines if line and not (line.endswith(":") or line.endswith(":"))]
896
+ # filtered_lines = [r.split(':', 1)[-1].strip() if ':' in r else r for
897
+ # r in filtered_lines]
898
+ # rs = "\n".join(filtered_lines)
899
+ # rs = rs.strip()
900
+ '''Exp1'''
901
+ if responses_df["Experiment"][i] == "E1":
902
+ rs_lower = rs.lower()
903
+ if "round" in rs_lower and "spiky" in rs_lower:
904
+ output.append("Other")
905
+ elif "round" in rs_lower:
906
+ output.append("Round")
907
+ elif "spiky" in rs_lower:
908
+ output.append("Spiky")
909
+ else:
910
+ output.append("Other")
911
+
912
+ '''Exp2'''
913
+
914
+ elif responses_df["Experiment"][i] == "E2":
915
+ # rs = responses_df["Response"][i].strip()
916
+ rs = rs.split(' ')
917
+ #print("E2", rs)
918
+ male, female = 0, 0
919
+ for word in rs:
920
+ if word in female_keyword and male == 0:
921
+ female = 1
922
+ output.append("Female")
923
+ break
924
+ if word in male_keyword and female == 0:
925
+ male = 1
926
+ output.append("Male")
927
+ break
928
+ if male == 0 and female == 0:
929
+ output.append("Other")
930
 
931
+ '''Exp3'''
932
+ elif responses_df["Experiment"][i] == "E3":
933
+ # rs = responses_df["Response"][i].strip()
934
+ #print("E3", rs)
935
+ pair = responses_df["Factor 2"][i]
936
+ word1, word2 = pair.replace(".", "").split('_')
937
+
938
+ if responses_df["Item"][i] == 12:
939
+ output.append("Other")
940
+ else:
941
+ words = rs.split() # split the response into words
942
+ if any(word == word1 for word in words) and any(word == word2 for word in words):
943
+ output.append("Other")
944
+ else:
945
+ if any(word.lower() == word1.lower() for word in words):
946
+ if len(word1) > len(word2):
947
+ output.append("Long")
948
+ else:
949
+ output.append("Short")
950
+ elif any(word.lower() == word2.lower() for word in words):
951
+ if len(word1) > len(word2):
952
+ output.append("Short")
953
+ else:
954
+ output.append("Long")
955
+ else:
956
+ if len(words) > 1:
957
+ # joint the words using " "
958
+ word = " ".join(words)
959
+ if word.lower() == word1.lower():
960
+ if len(word1) > len(word2):
961
+ output.append("Long")
962
+ else:
963
+ output.append("Short")
964
+ elif word.lower() == word2.lower():
965
+ if len(word1) > len(word2):
966
+ output.append("Short")
967
+ else:
968
+ output.append("Long")
969
+ else:
970
+ output.append("Other")
971
+ else:
972
+ output.append("Other")
973
+
974
+
975
+ '''Exp4'''
976
+
977
+ elif responses_df["Experiment"][i] == "E4":
978
+ lines = rs.split("\n")
979
+ filtered_lines = []
980
+ if len(lines) > 1:
981
+ for r in lines[1:]:
982
+ if ':' in r:
983
+ filtered_lines.append(r.split(':', 1)[-1].strip())
984
+ else:
985
+ filtered_lines.append(r)
986
+ filtered_lines.insert(0, lines[0])
987
+ else:
988
+ filtered_lines = lines
989
+ # print(filtered_lines)
990
+
991
+ #filtered_lines = [r.split('-', 1)[-1].strip() if '-' in r else r for r in filtered_lines]
992
+ #rs = "\n".join(filtered_lines)
993
+
994
+ #filtered_lines = [r.split(':', 1)[-1].strip() if ':' in r else r for r in rs.split(";")]
995
+ #filtered_lines = [r.split('-', 1)[-1].strip() if '-' in r else r for r in filtered_lines]
996
+ rs = ";".join(filtered_lines).strip()
997
+ try:
998
+ meaning_word = rs.split(";")[4].replace(" ", '')
999
+ except IndexError:
1000
+ try:
1001
+ meaning_word = rs.split("\n")[4].replace(" ", '')
1002
+ except IndexError:
1003
+ output.append("Other")
1004
+ continue
1005
+ except Exception as e:
1006
+ print(f"Unexpected error: {e}")
1007
+ output.append("Other")
1008
+ continue
1009
+
1010
+ target = responses_df["Factor 2"][i].strip().lower()
1011
+ pair = target + "_" + meaning_word
1012
+ #print("E4:", pair)
1013
+
1014
+ if pair in wordpair2code.keys():
1015
+ output.append(wordpair2code[pair])
1016
+ else:
1017
+ output.append("Other")
1018
+
1019
+ '''Exp5'''
1020
+ elif responses_df["Experiment"][i] == "E5" or responses_df["Experiment"][i] == "E51":
1021
+ # sentence = responses_df["Response"][i].strip()
1022
+ item_id = responses_df["Item"][i]
1023
+ question_id = responses_df["Question_ID"][i]
1024
+
1025
+ if responses_df["Experiment"][i] == "E51":
1026
+ sti1 = Stimuli1[question_id[0:-1]].lower().replace("...", "")
1027
+ #sti2 = Stimuli2[question_id[0:-1]].lower().replace("...", "")
1028
+ verb = item2verb1[item_id].lower()
1029
+
1030
+ sentence = sti1 + " " + rs.replace(sti1, "")
1031
+ #print("E5", verb, sentence)
1032
+ if responses_df["Experiment"][i] == "E5":
1033
+ #sti1 = Stimuli1[question_id].lower().replace("...", "")
1034
+ # print(sti1)
1035
+ sti2 = Stimuli2[question_id].lower().replace("...", "")
1036
+
1037
+ verb = item2verb2[item_id].lower()
1038
+ sentence = sti2 + " " + rs.replace(sti2, "")
1039
+ #print("E5", verb, sentence)
1040
+
1041
+ doc = nlp1(sentence.replace(" ", " "))
1042
+ # print(doc)
1043
+ # print()
1044
+ verb_token = None
1045
+ for token in doc:
1046
+ # print(token.lemma_)
1047
+ if token.lemma_ == verb:
1048
+ verb_token = token
1049
+ break
1050
+ # exit()
1051
+ pobj, dative = None, None
1052
+ # print(verb_token.children)
1053
+ # exit()
1054
+ if verb_token is not None:
1055
+ for child in verb_token.children:
1056
+ # print(child)
1057
+ if (child.dep_ == 'dative' and child.pos_ == "ADP") or (
1058
+ child.text == "to" and child.dep_ == 'prep' and child.pos_ == "ADP"):
1059
+ pobj = child.text
1060
+ if child.dep_ == 'dative':
1061
+ dative = child.text
1062
+
1063
+ # print("E5", pobj, dative)
1064
+ # exit()
1065
+
1066
+ if pobj:
1067
+ output.append("PO")
1068
+ elif dative:
1069
+ output.append("DO")
1070
+ else:
1071
+ # print("Other", sentence, pobj, dative)
1072
+ # exit()
1073
+ output.append("Other")
1074
+
1075
+
1076
+
1077
+ '''Exp6'''
1078
+
1079
+ elif responses_df["Experiment"][i] == "E6":
1080
+ sentence = responses_df["Stimuli 1"][i].strip().lower()
1081
+ #print("E6", sentence)
1082
+ doc = nlp1(sentence)
1083
+ subject = "None"
1084
+ obj = "None"
1085
+ pobj_list = [] # To collect all prepositional objects
1086
 
1087
  for token in doc:
1088
  if token.dep_ == "nsubj":
1089
  subject = token.text
1090
  elif token.dep_ == "dobj":
1091
  obj = token.text
1092
+ elif token.dep_ == "pobj":
1093
+ pobj_list.append(token.text) # Collect prepositional objects
1094
+
1095
+ rs_list = rs.lower().split()
1096
+ if subject in rs_list and (obj in rs_list or any(pobj == r for pobj in pobj_list for r in rs_list)):
1097
  output.append("Other")
1098
+ elif subject in rs_list:
 
1099
  output.append("VP")
1100
+ elif obj in rs_list or any(pobj == r for pobj in pobj_list for r in rs_list):
 
1101
  output.append("NP")
1102
  else:
 
1103
  output.append("Other")
1104
 
1105
  '''Exp7'''
 
1177
  # exit()
1178
  '''LLM'''
1179
  print(len(output))
1180
+ import re
1181
+ def clean_text(text):
1182
+ if isinstance(text, str):
1183
+ return re.sub(r'[^\x00-\x7F]+', '', text)
1184
+ return text
1185
+
1186
+ responses_df["Experiment"] = responses_df["Experiment"].apply(clean_text)
1187
+ responses_df["Question_ID"] = responses_df["Question_ID"].apply(clean_text)
1188
+ responses_df["Item"] = responses_df["Item"].apply(clean_text)
1189
+ responses_df["Response"] = responses_df["Response"].apply(clean_text)
1190
+
1191
+ output = [str(item) for item in output]
1192
+
1193
  self.data = pd.DataFrame(list(
1194
+ zip(responses_df["Experiment"], responses_df["Question_ID"], responses_df["Item"], responses_df["Response"],output)),
1195
+ columns=["Experiment", "Question_ID", "Item", "Response","Coding"])
 
 
1196
 
1197
  return self.data
1198
 
 
1202
 
1203
 
1204
 
1205
+
1206
+
1207
  def calculate_js_divergence(self, file_path_1, file_path_2):
1208
  """
1209
  Calculate the Jensen-Shannon divergence for response distributions between two datasets.
 
1211
  removes the original E5 and E51, and then calculates the JS divergence between the datasets.
1212
 
1213
  Parameters:
1214
+ file_path_1 (str): Path to the first dataset file (CSV format).
1215
  file_path_2 (str): Path to the second dataset file (CSV format).
1216
 
1217
  Returns:
 
1249
  human_df = pd.concat([human_df, human_e5], ignore_index=True)
1250
  llm_df = pd.concat([llm_df, llm_e5], ignore_index=True)
1251
 
 
1252
  ### Calculate Average JS Divergence ###
1253
 
 
1254
  # Extract the relevant columns for JS divergence calculation
1255
  human_responses = human_df[['Question_ID', 'Coding']]
1256
  llm_responses = llm_df[['Question_ID', 'Coding']]
1257
 
1258
  # Remove 'Other' responses
1259
+ #human_responses = human_responses[human_responses['Coding'] != 'Other']
1260
+ #llm_responses = llm_responses[llm_responses['Coding'] != 'Other']
1261
 
1262
  # Get unique Question_IDs present in both datasets
1263
  common_question_ids = set(human_responses['Question_ID']).intersection(set(llm_responses['Question_ID']))
 
1287
 
1288
  # Calculate the average JS divergence per experiment and the confidence interval
1289
  results = {}
1290
+ experiment_averages = []
1291
  for exp, divs in js_divergence.items():
1292
  avg_js_divergence = 1 - np.nanmean(divs)
1293
  ci_lower, ci_upper = bootstrap((divs,), np.nanmean, confidence_level=0.95,
 
1296
  'average_js_divergence': avg_js_divergence,
1297
  'confidence_interval': (1 - ci_upper, 1 - ci_lower) # Adjust for 1 - score
1298
  }
1299
+ experiment_averages.append(avg_js_divergence)
1300
 
1301
+ # Calculate the weighted average JS divergence across all experiments
1302
+ weighted_js_divergence = np.mean(experiment_averages) # Simple average over experiments
 
1303
 
1304
+ # Calculate the confidence interval for the overall JS divergence using bootstrap
1305
  overall_ci_lower, overall_ci_upper = bootstrap(
1306
+ (experiment_averages,),
1307
  np.nanmean,
1308
  confidence_level=0.95,
1309
  n_resamples=1000
 
1312
  # Combine all results into one dictionary
1313
  all_results = {
1314
  'overall': {
1315
+ 'average_js_divergence': weighted_js_divergence,
1316
+ 'confidence_interval': (overall_ci_lower, overall_ci_upper)
1317
  },
1318
  'per_experiment': results
1319
  }