Spaces:

Whiteshadow12
/

Aidan-Bench

Sleeping

App Files Files Community

Whiteshadow12 commited on Aug 12

Commit

a4e6a71

•

1 Parent(s): 15bbe10

x

Browse files

Files changed (5) hide show

__pycache__/main.cpython-310.pyc +0 -0
__pycache__/models.cpython-310.pyc +0 -0
app.py +178 -156
main.py +14 -14
models.py +16 -7

__pycache__/main.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ

__pycache__/models.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/models.cpython-310.pyc and b/__pycache__/models.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -4,166 +4,188 @@ from models import chat_with_model, embed
 from prompts import questions as predefined_questions, create_gen_prompt, create_judge_prompt
 import requests
 import numpy as np
-import os  # Import the os module
 st.title("Aiden Bench - Generator")
 # API Key Inputs with Security and User Experience Enhancements
-st.warning("Please keep your API keys secure and confidential.")
-open_router_key = st.text_input("Enter your Open Router API Key:", type="password")
-openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password")
-# Set environment variables (temporarily)
-os.environ["OPEN_ROUTER_KEY"] = open_router_key
-os.environ["OPENAI_API_KEY"] = openai_api_key
-# Fetch models from OpenRouter API
-try:
-    response = requests.get("https://openrouter.ai/api/v1/models")
-    response.raise_for_status()  # Raise an exception for bad status codes
-    models = response.json()["data"]
-    # Sort models alphabetically by their ID
-    models.sort(key=lambda model: model["id"])
-    model_names = [model["id"] for model in models]
-except requests.exceptions.RequestException as e:
-    st.error(f"Error fetching models from OpenRouter API: {e}")
-    model_names = []  # Provide an empty list if API call fails
-# Model Selection
-if model_names:
-    model_name = st.selectbox("Select a Language Model", model_names)
-else:
-    st.error("No models available. Please check your API connection.")
-    st.stop()  # Stop execution if no models are available
-# Initialize session state for user_questions and predefined_questions
-if "user_questions" not in st.session_state:
-    st.session_state.user_questions = []
-# Workflow Selection
-workflow = st.radio("Select Workflow:", ["Use Predefined Questions", "Use User-Defined Questions"])
-# Handle Predefined Questions
-if workflow == "Use Predefined Questions":
-    st.header("Question Selection")
-    # Multiselect for predefined questions
-    selected_questions = st.multiselect(
-        "Select questions to benchmark:",
-        predefined_questions,
-        predefined_questions  # Select all by default
-    )
-# Handle User-Defined Questions
-elif workflow == "Use User-Defined Questions":
-    st.header("Question Input")
-    # Input for adding a new question
-    new_question = st.text_input("Enter a new question:")
-    if st.button("Add Question") and new_question:
-        new_question = new_question.strip()  # Remove leading/trailing whitespace
-        if new_question and new_question not in st.session_state.user_questions:
-            st.session_state.user_questions.append(new_question)  # Append to session state
-            st.success(f"Question '{new_question}' added successfully.")
-        else:
-            st.warning("Question already exists or is empty!")
-    # Display multiselect with updated user questions
-    selected_questions = st.multiselect(
-        "Select your custom questions:",
-        options=st.session_state.user_questions,
-        default=st.session_state.user_questions
-    )
-# Display selected questions
-st.write("Selected Questions:", selected_questions)
-# Benchmark Execution
-if st.button("Start Benchmark"):
-    if not selected_questions:
-        st.warning("Please select at least one question.")
-    elif not open_router_key or not openai_api_key:  # Check if API keys are provided
         st.warning("Please enter both API keys.")
     else:
-        # Initialize progress bar
-        progress_bar = st.progress(0)
-        num_questions = len(selected_questions)
-        results = []  # List to store results
-        # Iterate through selected questions
-        for i, question in enumerate(selected_questions):
-            # Display current question
-            st.write(f"Processing question {i+1}/{num_questions}: {question}")
-            previous_answers = []
-            question_novelty = 0
-            try:
-                while True:
-                    gen_prompt = create_gen_prompt(question, previous_answers)
-                    # Handle potential API errors for chat_with_model
-                    try:
-                        new_answer = chat_with_model(prompt=gen_prompt, model=model_name)
-                    except requests.exceptions.RequestException as e:
-                        st.error(f"API Error: {e}")
-                        break  # Exit the loop if API error occurs
-                    judge_prompt = create_judge_prompt(question, new_answer)
-                    judge = "openai/gpt-4o-mini"
-                    # Handle potential API errors for chat_with_model (judge)
-                    try:
-                        judge_response = chat_with_model(prompt=judge_prompt, model=judge)
-                    except requests.exceptions.RequestException as e:
-                        st.error(f"API Error (Judge): {e}")
-                        break  # Exit the loop if API error occurs
-                    coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
-                    if coherence_score <= 3:
-                        st.warning("Output is incoherent. Moving to next question.")
-                        break
-                    novelty_score = get_novelty_score(new_answer, previous_answers)
-                    if novelty_score < 0.1:
-                        st.warning("Output is redundant. Moving to next question.")
-                        break
-                    st.write(f"New Answer:\n{new_answer}")
-                    st.write(f"Coherence Score: {coherence_score}")
-                    st.write(f"Novelty Score: {novelty_score}")
-                    previous_answers.append(new_answer)
-                    question_novelty += novelty_score
-            except Exception as e:
-                st.error(f"Error processing question: {e}")
-            results.append({
-                "question": question,
-                "answers": previous_answers,
-                "coherence_score": coherence_score,
-                "novelty_score": novelty_score
-            })
-            # Update progress bar
-            progress_bar.progress((i + 1) / num_questions)
-        st.success("Benchmark completed!")
-        # Display results in a table
-        st.write("Results:")
-        results_table = []
-        for result in results:
-            for answer in result["answers"]:
-                results_table.append({
-                    "Question": result["question"],
-                    "Answer": answer,
-                    "Coherence Score": result["coherence_score"],
-                    "Novelty Score": result["novelty_score"]
                 })
-        st.table(results_table)

 from prompts import questions as predefined_questions, create_gen_prompt, create_judge_prompt
 import requests
 import numpy as np
+import os
 st.title("Aiden Bench - Generator")
 # API Key Inputs with Security and User Experience Enhancements
+st.warning("Please keep your API keys secure and confidential. This app does not store or log your API keys.")
+st.write("Learn how to obtain API keys from Open Router and OpenAI.")  # Add links or instructions here
+if "open_router_key" not in st.session_state:
+    st.session_state.open_router_key = ""
+if "openai_api_key" not in st.session_state:
+    st.session_state.openai_api_key = ""
+open_router_key = st.text_input("Enter your Open Router API Key:", type="password", value=st.session_state.open_router_key)
+openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password", value=st.session_state.openai_api_key)
+if st.button("Confirm API Keys"):
+    if open_router_key and openai_api_key:
+        st.session_state.open_router_key = open_router_key
+        st.session_state.openai_api_key = openai_api_key
+        st.success("API keys confirmed!")
+    else:
         st.warning("Please enter both API keys.")
+# Access API keys from session state
+if st.session_state.open_router_key and st.session_state.openai_api_key:
+    # Fetch models from OpenRouter API
+    try:
+        response = requests.get("https://openrouter.ai/api/v1/models")
+        response.raise_for_status()  # Raise an exception for bad status codes
+        models = response.json()["data"]
+        # Sort models alphabetically by their ID
+        models.sort(key=lambda model: model["id"])
+        model_names = [model["id"] for model in models]
+    except requests.exceptions.RequestException as e:
+        st.error(f"Error fetching models from OpenRouter API: {e}")
+        model_names = []  # Provide an empty list if API call fails
+    # Model Selection
+    if model_names:
+        model_name = st.selectbox("Select a Language Model", model_names)
     else:
+        st.error("No models available. Please check your API connection.")
+        st.stop()  # Stop execution if no models are available
+    # Initialize session state for user_questions and predefined_questions
+    if "user_questions" not in st.session_state:
+        st.session_state.user_questions = []
+    # Workflow Selection
+    workflow = st.radio("Select Workflow:", ["Use Predefined Questions", "Use User-Defined Questions"])
+    # Handle Predefined Questions
+    if workflow == "Use Predefined Questions":
+        st.header("Question Selection")
+        # Multiselect for predefined questions
+        selected_questions = st.multiselect(
+            "Select questions to benchmark:",
+            predefined_questions,
+            predefined_questions  # Select all by default
+        )
+    # Handle User-Defined Questions
+    elif workflow == "Use User-Defined Questions":
+        st.header("Question Input")
+        # Input for adding a new question
+        new_question = st.text_input("Enter a new question:")
+        if st.button("Add Question") and new_question:
+            new_question = new_question.strip()  # Remove leading/trailing whitespace
+            if new_question and new_question not in st.session_state.user_questions:
+                st.session_state.user_questions.append(new_question)  # Append to session state
+                st.success(f"Question '{new_question}' added successfully.")
+            else:
+                st.warning("Question already exists or is empty!")
+        # Display multiselect with updated user questions
+        selected_questions = st.multiselect(
+            "Select your custom questions:",
+            options=st.session_state.user_questions,
+            default=st.session_state.user_questions
+        )
+    # Display selected questions
+    st.write("Selected Questions:", selected_questions)
+    # Benchmark Execution
+    if st.button("Start Benchmark"):
+        if not selected_questions:
+            st.warning("Please select at least one question.")
+        else:
+            # Initialize progress bar
+            progress_bar = st.progress(0)
+            num_questions = len(selected_questions)
+            results = []  # List to store results
+            # Iterate through selected questions
+            for i, question in enumerate(selected_questions):
+                # Display current question
+                st.write(f"Processing question {i+1}/{num_questions}: {question}")
+                previous_answers = []
+                question_novelty = 0
+                try:
+                    while True:
+                        gen_prompt = create_gen_prompt(question, previous_answers)
+                        try:
+                            new_answer = chat_with_model(
+                                prompt=gen_prompt,
+                                model=model_name,
+                                open_router_key=st.session_state.open_router_key,
+                                openai_api_key=st.session_state.openai_api_key
+                            )
+                        except requests.exceptions.RequestException as e:
+                            st.error(f"API Error: {e}")
+                            break
+                        judge_prompt = create_judge_prompt(question, new_answer)
+                        judge = "openai/gpt-4o-mini"
+                        try:
+                            judge_response = chat_with_model(
+                                prompt=judge_prompt,
+                                model=judge,
+                                open_router_key=st.session_state.open_router_key,
+                                openai_api_key=st.session_state.openai_api_key
+                            )
+                        except requests.exceptions.RequestException as e:
+                            st.error(f"API Error (Judge): {e}")
+                            break
+                        coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
+                        if coherence_score <= 3:
+                            st.warning("Output is incoherent. Moving to next question.")
+                            break
+                        novelty_score = get_novelty_score(new_answer, previous_answers, st.session_state.openai_api_key)
+                        if novelty_score < 0.1:
+                            st.warning("Output is redundant. Moving to next question.")
+                            break
+                        st.write(f"New Answer:\n{new_answer}")
+                        st.write(f"Coherence Score: {coherence_score}")
+                        st.write(f"Novelty Score: {novelty_score}")
+                        previous_answers.append(new_answer)
+                        question_novelty += novelty_score
+                except Exception as e:
+                    st.error(f"Error processing question: {e}")
+                results.append({
+                    "question": question,
+                    "answers": previous_answers,
+                    "coherence_score": coherence_score,
+                    "novelty_score": novelty_score
                 })
+                # Update progress bar
+                progress_bar.progress((i + 1) / num_questions)
+            st.success("Benchmark completed!")
+            # Display results in a table
+            st.write("Results:")
+            results_table = []
+            for result in results:
+                for answer in result["answers"]:
+                    results_table.append({
+                        "Question": result["question"],
+                        "Answer": answer,
+                        "Coherence Score": result["coherence_score"],
+                        "Novelty Score": result["novelty_score"]
+                    })
+            st.table(results_table)
+else:
+    st.warning("Please confirm your API keys first.")

main.py CHANGED Viewed

@@ -79,25 +79,25 @@ def process_question(question, model_name):
     return question_novelty
-def get_novelty_score(new_answer: str, previous_answers: list):
-    new_embedding = embed(new_answer)
-    # If there are no previous answers, return maximum novelty
-    if not previous_answers:
-        return 1.0
-    previous_embeddings = [embed(answer) for answer in previous_answers]
-    similarities = [
-        np.dot(new_embedding, prev_embedding) /
-        (np.linalg.norm(new_embedding) * np.linalg.norm(prev_embedding))
-        for prev_embedding in previous_embeddings
-    ]
-    max_similarity = max(similarities)
-    novelty = 1 - max_similarity
-    return novelty
 def benchmark_model_multithreaded(model_name):

     return question_novelty
+def get_novelty_score(new_answer: str, previous_answers: list, openai_api_key=None):
+        new_embedding = embed(new_answer, openai_api_key)
+        # If there are no previous answers, return maximum novelty
+        if not previous_answers:
+            return 1.0
+        previous_embeddings = [embed(answer, openai_api_key) for answer in previous_answers]
+        similarities = [
+            np.dot(new_embedding, prev_embedding) /
+            (np.linalg.norm(new_embedding) * np.linalg.norm(prev_embedding))
+            for prev_embedding in previous_embeddings
+        ]
+        max_similarity = max(similarities)
+        novelty = 1 - max_similarity
+        return novelty
 def benchmark_model_multithreaded(model_name):

models.py CHANGED Viewed

@@ -5,11 +5,17 @@ from retry import retry
 @retry(tries=3)
-def chat_with_model(prompt, model, max_tokens=4000, temperature=0):
-    client = OpenAI(
-        api_key=os.getenv("OPEN_ROUTER_KEY"),
-        base_url="https://openrouter.ai/api/v1"
-    )
     response = client.chat.completions.create(
         model=model,
         messages=[
@@ -26,8 +32,11 @@ def chat_with_model(prompt, model, max_tokens=4000, temperature=0):
 @lru_cache(maxsize=10000)
 @retry(tries=3)
-def embed(text):
-    client = OpenAI()
     response = client.embeddings.create(
         model="text-embedding-3-large", input=[text])

 @retry(tries=3)
+def chat_with_model(prompt, model, open_router_key=None, openai_api_key=None, max_tokens=4000, temperature=0):
+    if open_router_key:
+        client = OpenAI(
+            api_key=open_router_key,
+            base_url="https://openrouter.ai/api/v1"
+        )
+    elif openai_api_key:
+        client = OpenAI(api_key=openai_api_key)
+    else:
+        raise ValueError("Either open_router_key or openai_api_key must be provided.")
     response = client.chat.completions.create(
         model=model,
         messages=[
 @lru_cache(maxsize=10000)
 @retry(tries=3)
+def embed(text, openai_api_key=None):
+    if openai_api_key:
+        client = OpenAI(api_key=openai_api_key)
+    else:
+        raise ValueError("openai_api_key must be provided.")
     response = client.embeddings.create(
         model="text-embedding-3-large", input=[text])