Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -30,30 +30,22 @@ ADS.TOKEN = os.getenv('ADS_API_KEY') # Ensure your ADS API key is stored in env
|
|
30 |
# Define system message with instructions
|
31 |
system_message = """
|
32 |
You are ExosAI, a helpful assistant specializing in Exoplanet and Astrophysics research.
|
33 |
-
|
34 |
Generate a detailed structured response based on the following science context and user input, including the necessary observables, physical parameters, and technical requirements for observations. The response should include the following sections:
|
35 |
-
|
36 |
Science Objectives: Describe key scientific study objectives related to the science context and user input.
|
37 |
-
|
38 |
Physical Parameters: Outline the physical parameters related to the science context and user input.
|
39 |
-
|
40 |
Observables: Specify the observables related to the science context and user input.
|
41 |
-
|
42 |
Description of Desired Observations: Detail the types of observations related to the science context and user input.
|
43 |
-
|
44 |
Technical Requirements Table: Generate a table with the following columns:
|
45 |
-
- Requirements: The specific observational requirements (e.g., UV observations, Optical observations or Infrared observations
|
46 |
- Necessary: The necessary values or parameters (e.g., wavelength ranges, spatial resolution).
|
47 |
- Desired: The desired values or parameters.
|
48 |
- Justification: A scientific explanation of why these requirements are important.
|
49 |
- Comments: Additional notes or remarks regarding each requirement.
|
50 |
-
|
51 |
Example:
|
52 |
| Requirements | Necessary | Desired | Justification | Comments |
|
53 |
|----------------------------------|------------------------------------------|------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
|
54 |
| UV Observations | Wavelength: 1200–2100 Å, 2500–3300 Å | Wavelength: 1200–3300 Å | Characterization of atomic and molecular emissions (H, C, O, S, etc.) from fluorescence and dissociative electron impact | Needed for detecting H2O, CO, CO2, and other volatile molecules relevant for volatile delivery studies. |
|
55 |
| Infrared Observations | Wavelength: 2.5–4.8 μm | Wavelength: 1.5–4.8 μm | Tracks water emissions and CO2 lines in icy bodies and small planetesimals | Also allows detection of 3 μm absorption feature in icy bodies. |
|
56 |
-
|
57 |
Ensure the response is structured clearly and the technical requirements table follows this format.
|
58 |
"""
|
59 |
|
@@ -62,13 +54,54 @@ def encode_text(text):
|
|
62 |
outputs = bi_model(**inputs)
|
63 |
return outputs.last_hidden_state.mean(dim=1).detach().numpy().flatten()
|
64 |
|
65 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
user_embedding = encode_text(user_input).reshape(1, -1)
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
most_relevant_idx = np.argmax(similarities)
|
71 |
-
|
|
|
|
|
|
|
|
|
72 |
|
73 |
def extract_keywords_with_gpt(user_input, max_tokens=100, temperature=0.3):
|
74 |
# Define a prompt to ask GPT-4 to extract keywords and important terms
|
@@ -307,7 +340,7 @@ def gpt_response_to_dataframe(gpt_response):
|
|
307 |
|
308 |
def chatbot(user_input, context="", subdomain="", use_encoder=False, max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
|
309 |
if use_encoder and context:
|
310 |
-
context_texts = context
|
311 |
relevant_context = retrieve_relevant_context(user_input, context_texts)
|
312 |
else:
|
313 |
relevant_context = ""
|
@@ -389,7 +422,7 @@ iface = gr.Interface(
|
|
389 |
gr.HTML(label="Miro"),
|
390 |
gr.HTML(label="Generate Mind Map on Mapify")
|
391 |
],
|
392 |
-
title="ExosAI - NASA SMD SCDD AI Assistant [version-0.
|
393 |
description="ExosAI is an AI-powered assistant for generating and visualising HWO Science Cases",
|
394 |
)
|
395 |
|
|
|
30 |
# Define system message with instructions
|
31 |
system_message = """
|
32 |
You are ExosAI, a helpful assistant specializing in Exoplanet and Astrophysics research.
|
|
|
33 |
Generate a detailed structured response based on the following science context and user input, including the necessary observables, physical parameters, and technical requirements for observations. The response should include the following sections:
|
|
|
34 |
Science Objectives: Describe key scientific study objectives related to the science context and user input.
|
|
|
35 |
Physical Parameters: Outline the physical parameters related to the science context and user input.
|
|
|
36 |
Observables: Specify the observables related to the science context and user input.
|
|
|
37 |
Description of Desired Observations: Detail the types of observations related to the science context and user input.
|
|
|
38 |
Technical Requirements Table: Generate a table with the following columns:
|
39 |
+
- Requirements: The specific observational requirements (e.g., UV observations, Optical observations or Infrared observations).
|
40 |
- Necessary: The necessary values or parameters (e.g., wavelength ranges, spatial resolution).
|
41 |
- Desired: The desired values or parameters.
|
42 |
- Justification: A scientific explanation of why these requirements are important.
|
43 |
- Comments: Additional notes or remarks regarding each requirement.
|
|
|
44 |
Example:
|
45 |
| Requirements | Necessary | Desired | Justification | Comments |
|
46 |
|----------------------------------|------------------------------------------|------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
|
47 |
| UV Observations | Wavelength: 1200–2100 Å, 2500–3300 Å | Wavelength: 1200–3300 Å | Characterization of atomic and molecular emissions (H, C, O, S, etc.) from fluorescence and dissociative electron impact | Needed for detecting H2O, CO, CO2, and other volatile molecules relevant for volatile delivery studies. |
|
48 |
| Infrared Observations | Wavelength: 2.5–4.8 μm | Wavelength: 1.5–4.8 μm | Tracks water emissions and CO2 lines in icy bodies and small planetesimals | Also allows detection of 3 μm absorption feature in icy bodies. |
|
|
|
49 |
Ensure the response is structured clearly and the technical requirements table follows this format.
|
50 |
"""
|
51 |
|
|
|
54 |
outputs = bi_model(**inputs)
|
55 |
return outputs.last_hidden_state.mean(dim=1).detach().numpy().flatten()
|
56 |
|
57 |
+
def get_chunks(text, chunk_size=300):
|
58 |
+
"""
|
59 |
+
Split a long piece of text into smaller chunks of approximately 'chunk_size' characters.
|
60 |
+
"""
|
61 |
+
if not text.strip():
|
62 |
+
raise ValueError("The provided context is empty or blank.")
|
63 |
+
|
64 |
+
# Split the text into chunks of approximately 'chunk_size' characters
|
65 |
+
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
|
66 |
+
return chunks
|
67 |
+
|
68 |
+
def retrieve_relevant_context(user_input, context_texts, chunk_size=300, similarity_threshold=0.3):
|
69 |
+
"""
|
70 |
+
Split the context text into smaller chunks, find the most relevant chunk
|
71 |
+
using cosine similarity, and return the most relevant chunk.
|
72 |
+
If no chunk meets the similarity threshold, return a fallback message.
|
73 |
+
"""
|
74 |
+
# Check if the context is empty or just whitespace
|
75 |
+
if not context_texts.strip():
|
76 |
+
return "Error: Context is empty or improperly formatted.", None
|
77 |
+
|
78 |
+
# Split the long context text into chunks using the chunking function
|
79 |
+
context_chunks = get_chunks(context_texts, chunk_size)
|
80 |
+
|
81 |
+
# Handle single context case
|
82 |
+
if len(context_chunks) == 1:
|
83 |
+
return context_chunks[0], 1.0 # Return the single chunk with perfect similarity
|
84 |
+
|
85 |
+
# Encode the user input to create a query embedding
|
86 |
user_embedding = encode_text(user_input).reshape(1, -1)
|
87 |
+
|
88 |
+
# Encode all context chunks to create embeddings
|
89 |
+
chunk_embeddings = np.array([encode_text(chunk) for chunk in context_chunks])
|
90 |
+
|
91 |
+
# Compute cosine similarity between the user input and each chunk
|
92 |
+
similarities = cosine_similarity(user_embedding, chunk_embeddings).flatten()
|
93 |
+
|
94 |
+
# Check if any similarity scores are above the threshold
|
95 |
+
if max(similarities) < similarity_threshold:
|
96 |
+
return "No relevant context found for the user input.", None
|
97 |
+
|
98 |
+
# Identify the most relevant chunk based on the highest cosine similarity score
|
99 |
most_relevant_idx = np.argmax(similarities)
|
100 |
+
most_relevant_chunk = context_chunks[most_relevant_idx]
|
101 |
+
|
102 |
+
# Return the most relevant chunk and the similarity score
|
103 |
+
return most_relevant_chunk
|
104 |
+
|
105 |
|
106 |
def extract_keywords_with_gpt(user_input, max_tokens=100, temperature=0.3):
|
107 |
# Define a prompt to ask GPT-4 to extract keywords and important terms
|
|
|
340 |
|
341 |
def chatbot(user_input, context="", subdomain="", use_encoder=False, max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
|
342 |
if use_encoder and context:
|
343 |
+
context_texts = context
|
344 |
relevant_context = retrieve_relevant_context(user_input, context_texts)
|
345 |
else:
|
346 |
relevant_context = ""
|
|
|
422 |
gr.HTML(label="Miro"),
|
423 |
gr.HTML(label="Generate Mind Map on Mapify")
|
424 |
],
|
425 |
+
title="ExosAI - NASA SMD SCDD AI Assistant [version-0.9a]",
|
426 |
description="ExosAI is an AI-powered assistant for generating and visualising HWO Science Cases",
|
427 |
)
|
428 |
|