Whiteshadow12 commited on
Commit
a4e6a71
1 Parent(s): 15bbe10
__pycache__/main.cpython-310.pyc CHANGED
Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ
 
__pycache__/models.cpython-310.pyc CHANGED
Binary files a/__pycache__/models.cpython-310.pyc and b/__pycache__/models.cpython-310.pyc differ
 
app.py CHANGED
@@ -4,166 +4,188 @@ from models import chat_with_model, embed
4
  from prompts import questions as predefined_questions, create_gen_prompt, create_judge_prompt
5
  import requests
6
  import numpy as np
7
- import os # Import the os module
8
 
9
  st.title("Aiden Bench - Generator")
10
 
11
  # API Key Inputs with Security and User Experience Enhancements
12
- st.warning("Please keep your API keys secure and confidential.")
13
- open_router_key = st.text_input("Enter your Open Router API Key:", type="password")
14
- openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password")
15
-
16
- # Set environment variables (temporarily)
17
- os.environ["OPEN_ROUTER_KEY"] = open_router_key
18
- os.environ["OPENAI_API_KEY"] = openai_api_key
19
-
20
- # Fetch models from OpenRouter API
21
- try:
22
- response = requests.get("https://openrouter.ai/api/v1/models")
23
- response.raise_for_status() # Raise an exception for bad status codes
24
- models = response.json()["data"]
25
-
26
- # Sort models alphabetically by their ID
27
- models.sort(key=lambda model: model["id"])
28
-
29
- model_names = [model["id"] for model in models]
30
- except requests.exceptions.RequestException as e:
31
- st.error(f"Error fetching models from OpenRouter API: {e}")
32
- model_names = [] # Provide an empty list if API call fails
33
-
34
- # Model Selection
35
- if model_names:
36
- model_name = st.selectbox("Select a Language Model", model_names)
37
- else:
38
- st.error("No models available. Please check your API connection.")
39
- st.stop() # Stop execution if no models are available
40
-
41
- # Initialize session state for user_questions and predefined_questions
42
- if "user_questions" not in st.session_state:
43
- st.session_state.user_questions = []
44
-
45
- # Workflow Selection
46
- workflow = st.radio("Select Workflow:", ["Use Predefined Questions", "Use User-Defined Questions"])
47
-
48
- # Handle Predefined Questions
49
- if workflow == "Use Predefined Questions":
50
- st.header("Question Selection")
51
- # Multiselect for predefined questions
52
- selected_questions = st.multiselect(
53
- "Select questions to benchmark:",
54
- predefined_questions,
55
- predefined_questions # Select all by default
56
- )
57
-
58
- # Handle User-Defined Questions
59
- elif workflow == "Use User-Defined Questions":
60
- st.header("Question Input")
61
-
62
- # Input for adding a new question
63
- new_question = st.text_input("Enter a new question:")
64
- if st.button("Add Question") and new_question:
65
- new_question = new_question.strip() # Remove leading/trailing whitespace
66
- if new_question and new_question not in st.session_state.user_questions:
67
- st.session_state.user_questions.append(new_question) # Append to session state
68
- st.success(f"Question '{new_question}' added successfully.")
69
- else:
70
- st.warning("Question already exists or is empty!")
71
-
72
- # Display multiselect with updated user questions
73
- selected_questions = st.multiselect(
74
- "Select your custom questions:",
75
- options=st.session_state.user_questions,
76
- default=st.session_state.user_questions
77
- )
78
-
79
- # Display selected questions
80
- st.write("Selected Questions:", selected_questions)
81
-
82
- # Benchmark Execution
83
- if st.button("Start Benchmark"):
84
- if not selected_questions:
85
- st.warning("Please select at least one question.")
86
- elif not open_router_key or not openai_api_key: # Check if API keys are provided
87
  st.warning("Please enter both API keys.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  else:
89
- # Initialize progress bar
90
- progress_bar = st.progress(0)
91
- num_questions = len(selected_questions)
92
- results = [] # List to store results
93
-
94
- # Iterate through selected questions
95
- for i, question in enumerate(selected_questions):
96
- # Display current question
97
- st.write(f"Processing question {i+1}/{num_questions}: {question}")
98
-
99
- previous_answers = []
100
- question_novelty = 0
101
-
102
- try:
103
- while True:
104
- gen_prompt = create_gen_prompt(question, previous_answers)
105
-
106
- # Handle potential API errors for chat_with_model
107
- try:
108
- new_answer = chat_with_model(prompt=gen_prompt, model=model_name)
109
- except requests.exceptions.RequestException as e:
110
- st.error(f"API Error: {e}")
111
- break # Exit the loop if API error occurs
112
-
113
- judge_prompt = create_judge_prompt(question, new_answer)
114
- judge = "openai/gpt-4o-mini"
115
-
116
- # Handle potential API errors for chat_with_model (judge)
117
- try:
118
- judge_response = chat_with_model(prompt=judge_prompt, model=judge)
119
- except requests.exceptions.RequestException as e:
120
- st.error(f"API Error (Judge): {e}")
121
- break # Exit the loop if API error occurs
122
-
123
- coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
124
-
125
- if coherence_score <= 3:
126
- st.warning("Output is incoherent. Moving to next question.")
127
- break
128
-
129
- novelty_score = get_novelty_score(new_answer, previous_answers)
130
-
131
- if novelty_score < 0.1:
132
- st.warning("Output is redundant. Moving to next question.")
133
- break
134
-
135
- st.write(f"New Answer:\n{new_answer}")
136
- st.write(f"Coherence Score: {coherence_score}")
137
- st.write(f"Novelty Score: {novelty_score}")
138
-
139
- previous_answers.append(new_answer)
140
- question_novelty += novelty_score
141
-
142
- except Exception as e:
143
- st.error(f"Error processing question: {e}")
144
-
145
-
146
- results.append({
147
- "question": question,
148
- "answers": previous_answers,
149
- "coherence_score": coherence_score,
150
- "novelty_score": novelty_score
151
- })
152
-
153
- # Update progress bar
154
- progress_bar.progress((i + 1) / num_questions)
155
-
156
- st.success("Benchmark completed!")
157
-
158
- # Display results in a table
159
- st.write("Results:")
160
- results_table = []
161
- for result in results:
162
- for answer in result["answers"]:
163
- results_table.append({
164
- "Question": result["question"],
165
- "Answer": answer,
166
- "Coherence Score": result["coherence_score"],
167
- "Novelty Score": result["novelty_score"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  })
169
- st.table(results_table)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from prompts import questions as predefined_questions, create_gen_prompt, create_judge_prompt
5
  import requests
6
  import numpy as np
7
+ import os
8
 
9
  st.title("Aiden Bench - Generator")
10
 
11
  # API Key Inputs with Security and User Experience Enhancements
12
+ st.warning("Please keep your API keys secure and confidential. This app does not store or log your API keys.")
13
+ st.write("Learn how to obtain API keys from Open Router and OpenAI.") # Add links or instructions here
14
+
15
+ if "open_router_key" not in st.session_state:
16
+ st.session_state.open_router_key = ""
17
+ if "openai_api_key" not in st.session_state:
18
+ st.session_state.openai_api_key = ""
19
+
20
+ open_router_key = st.text_input("Enter your Open Router API Key:", type="password", value=st.session_state.open_router_key)
21
+ openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password", value=st.session_state.openai_api_key)
22
+
23
+ if st.button("Confirm API Keys"):
24
+ if open_router_key and openai_api_key:
25
+ st.session_state.open_router_key = open_router_key
26
+ st.session_state.openai_api_key = openai_api_key
27
+ st.success("API keys confirmed!")
28
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  st.warning("Please enter both API keys.")
30
+
31
+ # Access API keys from session state
32
+ if st.session_state.open_router_key and st.session_state.openai_api_key:
33
+ # Fetch models from OpenRouter API
34
+ try:
35
+ response = requests.get("https://openrouter.ai/api/v1/models")
36
+ response.raise_for_status() # Raise an exception for bad status codes
37
+ models = response.json()["data"]
38
+
39
+ # Sort models alphabetically by their ID
40
+ models.sort(key=lambda model: model["id"])
41
+
42
+ model_names = [model["id"] for model in models]
43
+ except requests.exceptions.RequestException as e:
44
+ st.error(f"Error fetching models from OpenRouter API: {e}")
45
+ model_names = [] # Provide an empty list if API call fails
46
+
47
+ # Model Selection
48
+ if model_names:
49
+ model_name = st.selectbox("Select a Language Model", model_names)
50
  else:
51
+ st.error("No models available. Please check your API connection.")
52
+ st.stop() # Stop execution if no models are available
53
+
54
+ # Initialize session state for user_questions and predefined_questions
55
+ if "user_questions" not in st.session_state:
56
+ st.session_state.user_questions = []
57
+
58
+ # Workflow Selection
59
+ workflow = st.radio("Select Workflow:", ["Use Predefined Questions", "Use User-Defined Questions"])
60
+
61
+ # Handle Predefined Questions
62
+ if workflow == "Use Predefined Questions":
63
+ st.header("Question Selection")
64
+ # Multiselect for predefined questions
65
+ selected_questions = st.multiselect(
66
+ "Select questions to benchmark:",
67
+ predefined_questions,
68
+ predefined_questions # Select all by default
69
+ )
70
+
71
+ # Handle User-Defined Questions
72
+ elif workflow == "Use User-Defined Questions":
73
+ st.header("Question Input")
74
+
75
+ # Input for adding a new question
76
+ new_question = st.text_input("Enter a new question:")
77
+ if st.button("Add Question") and new_question:
78
+ new_question = new_question.strip() # Remove leading/trailing whitespace
79
+ if new_question and new_question not in st.session_state.user_questions:
80
+ st.session_state.user_questions.append(new_question) # Append to session state
81
+ st.success(f"Question '{new_question}' added successfully.")
82
+ else:
83
+ st.warning("Question already exists or is empty!")
84
+
85
+ # Display multiselect with updated user questions
86
+ selected_questions = st.multiselect(
87
+ "Select your custom questions:",
88
+ options=st.session_state.user_questions,
89
+ default=st.session_state.user_questions
90
+ )
91
+
92
+ # Display selected questions
93
+ st.write("Selected Questions:", selected_questions)
94
+
95
+ # Benchmark Execution
96
+ if st.button("Start Benchmark"):
97
+ if not selected_questions:
98
+ st.warning("Please select at least one question.")
99
+ else:
100
+ # Initialize progress bar
101
+ progress_bar = st.progress(0)
102
+ num_questions = len(selected_questions)
103
+ results = [] # List to store results
104
+
105
+ # Iterate through selected questions
106
+ for i, question in enumerate(selected_questions):
107
+ # Display current question
108
+ st.write(f"Processing question {i+1}/{num_questions}: {question}")
109
+
110
+ previous_answers = []
111
+ question_novelty = 0
112
+
113
+ try:
114
+ while True:
115
+ gen_prompt = create_gen_prompt(question, previous_answers)
116
+
117
+ try:
118
+ new_answer = chat_with_model(
119
+ prompt=gen_prompt,
120
+ model=model_name,
121
+ open_router_key=st.session_state.open_router_key,
122
+ openai_api_key=st.session_state.openai_api_key
123
+ )
124
+ except requests.exceptions.RequestException as e:
125
+ st.error(f"API Error: {e}")
126
+ break
127
+
128
+ judge_prompt = create_judge_prompt(question, new_answer)
129
+ judge = "openai/gpt-4o-mini"
130
+
131
+ try:
132
+ judge_response = chat_with_model(
133
+ prompt=judge_prompt,
134
+ model=judge,
135
+ open_router_key=st.session_state.open_router_key,
136
+ openai_api_key=st.session_state.openai_api_key
137
+ )
138
+ except requests.exceptions.RequestException as e:
139
+ st.error(f"API Error (Judge): {e}")
140
+ break
141
+
142
+ coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
143
+
144
+ if coherence_score <= 3:
145
+ st.warning("Output is incoherent. Moving to next question.")
146
+ break
147
+
148
+ novelty_score = get_novelty_score(new_answer, previous_answers, st.session_state.openai_api_key)
149
+
150
+ if novelty_score < 0.1:
151
+ st.warning("Output is redundant. Moving to next question.")
152
+ break
153
+
154
+ st.write(f"New Answer:\n{new_answer}")
155
+ st.write(f"Coherence Score: {coherence_score}")
156
+ st.write(f"Novelty Score: {novelty_score}")
157
+
158
+ previous_answers.append(new_answer)
159
+ question_novelty += novelty_score
160
+
161
+ except Exception as e:
162
+ st.error(f"Error processing question: {e}")
163
+
164
+ results.append({
165
+ "question": question,
166
+ "answers": previous_answers,
167
+ "coherence_score": coherence_score,
168
+ "novelty_score": novelty_score
169
  })
170
+
171
+ # Update progress bar
172
+ progress_bar.progress((i + 1) / num_questions)
173
+
174
+ st.success("Benchmark completed!")
175
+
176
+ # Display results in a table
177
+ st.write("Results:")
178
+ results_table = []
179
+ for result in results:
180
+ for answer in result["answers"]:
181
+ results_table.append({
182
+ "Question": result["question"],
183
+ "Answer": answer,
184
+ "Coherence Score": result["coherence_score"],
185
+ "Novelty Score": result["novelty_score"]
186
+ })
187
+ st.table(results_table)
188
+
189
+
190
+ else:
191
+ st.warning("Please confirm your API keys first.")
main.py CHANGED
@@ -79,25 +79,25 @@ def process_question(question, model_name):
79
  return question_novelty
80
 
81
 
82
- def get_novelty_score(new_answer: str, previous_answers: list):
83
- new_embedding = embed(new_answer)
84
 
85
- # If there are no previous answers, return maximum novelty
86
- if not previous_answers:
87
- return 1.0
88
 
89
- previous_embeddings = [embed(answer) for answer in previous_answers]
90
 
91
- similarities = [
92
- np.dot(new_embedding, prev_embedding) /
93
- (np.linalg.norm(new_embedding) * np.linalg.norm(prev_embedding))
94
- for prev_embedding in previous_embeddings
95
- ]
96
 
97
- max_similarity = max(similarities)
98
- novelty = 1 - max_similarity
99
 
100
- return novelty
101
 
102
 
103
  def benchmark_model_multithreaded(model_name):
 
79
  return question_novelty
80
 
81
 
82
+ def get_novelty_score(new_answer: str, previous_answers: list, openai_api_key=None):
83
+ new_embedding = embed(new_answer, openai_api_key)
84
 
85
+ # If there are no previous answers, return maximum novelty
86
+ if not previous_answers:
87
+ return 1.0
88
 
89
+ previous_embeddings = [embed(answer, openai_api_key) for answer in previous_answers]
90
 
91
+ similarities = [
92
+ np.dot(new_embedding, prev_embedding) /
93
+ (np.linalg.norm(new_embedding) * np.linalg.norm(prev_embedding))
94
+ for prev_embedding in previous_embeddings
95
+ ]
96
 
97
+ max_similarity = max(similarities)
98
+ novelty = 1 - max_similarity
99
 
100
+ return novelty
101
 
102
 
103
  def benchmark_model_multithreaded(model_name):
models.py CHANGED
@@ -5,11 +5,17 @@ from retry import retry
5
 
6
 
7
  @retry(tries=3)
8
- def chat_with_model(prompt, model, max_tokens=4000, temperature=0):
9
- client = OpenAI(
10
- api_key=os.getenv("OPEN_ROUTER_KEY"),
11
- base_url="https://openrouter.ai/api/v1"
12
- )
 
 
 
 
 
 
13
  response = client.chat.completions.create(
14
  model=model,
15
  messages=[
@@ -26,8 +32,11 @@ def chat_with_model(prompt, model, max_tokens=4000, temperature=0):
26
 
27
  @lru_cache(maxsize=10000)
28
  @retry(tries=3)
29
- def embed(text):
30
- client = OpenAI()
 
 
 
31
 
32
  response = client.embeddings.create(
33
  model="text-embedding-3-large", input=[text])
 
5
 
6
 
7
  @retry(tries=3)
8
+ def chat_with_model(prompt, model, open_router_key=None, openai_api_key=None, max_tokens=4000, temperature=0):
9
+ if open_router_key:
10
+ client = OpenAI(
11
+ api_key=open_router_key,
12
+ base_url="https://openrouter.ai/api/v1"
13
+ )
14
+ elif openai_api_key:
15
+ client = OpenAI(api_key=openai_api_key)
16
+ else:
17
+ raise ValueError("Either open_router_key or openai_api_key must be provided.")
18
+
19
  response = client.chat.completions.create(
20
  model=model,
21
  messages=[
 
32
 
33
  @lru_cache(maxsize=10000)
34
  @retry(tries=3)
35
+ def embed(text, openai_api_key=None):
36
+ if openai_api_key:
37
+ client = OpenAI(api_key=openai_api_key)
38
+ else:
39
+ raise ValueError("openai_api_key must be provided.")
40
 
41
  response = client.embeddings.create(
42
  model="text-embedding-3-large", input=[text])