import gradio as gr
from transformers import AutoTokenizer, AutoModel
from openai import OpenAI
import os
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from docx import Document
import io
import tempfile
from astroquery.nasa_ads import ADS
import pyvo as vo
# Load the NASA-specific bi-encoder model and tokenizer
bi_encoder_model_name = "nasa-impact/nasa-smd-ibm-st-v2"
bi_tokenizer = AutoTokenizer.from_pretrained(bi_encoder_model_name)
bi_model = AutoModel.from_pretrained(bi_encoder_model_name)
# Set up OpenAI client
api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key=api_key)
# Set up NASA ADS token
ADS.TOKEN = os.getenv('ADS_API_KEY') # Ensure your ADS API key is stored in environment variables
# Define system message with instructions
system_message = """
You are ExosAI, a helpful assistant specializing in Exoplanet research.
Your goal is to provide detailed, structured answers by following these guidelines:
- The central topic branches out into 3 science objectives.
- Each science objective branches out into 2 physical parameters, and each physical parameter branches out into 2 observables.
- Include details, provide scientific references, and make recommendations for observation parameters like wavelength, resolution, etc.
def encode_text(text):
inputs = bi_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=128)
outputs = bi_model(**inputs)
return outputs.last_hidden_state.mean(dim=1).detach().numpy().flatten()
def retrieve_relevant_context(user_input, context_texts):
user_embedding = encode_text(user_input).reshape(1, -1)
context_embeddings = np.array([encode_text(text) for text in context_texts])
context_embeddings = context_embeddings.reshape(len(context_embeddings), -1)
similarities = cosine_similarity(user_embedding, context_embeddings).flatten()
most_relevant_idx = np.argmax(similarities)
return context_texts[most_relevant_idx]
def extract_keywords_with_gpt(user_input, max_tokens=100, temperature=0.3):
# Define a prompt to ask GPT-4 to extract keywords and important terms
keyword_prompt = f"Extract the most important keywords, scientific concepts, and parameters from the following user query:\n\n{user_input}"
# Call GPT-4 to extract keywords based on the user prompt
response =
{"role": "system", "content": "You are an expert in identifying key scientific terms and concepts."},
{"role": "user", "content": keyword_prompt}
# Extract the content from GPT-4's reply
extracted_keywords = response.choices[0].message.content.strip()
return extracted_keywords
def fetch_nasa_ads_references(prompt):
# Step 1: Extract keywords using GPT (or another keyword extraction method)
keywords = extract_keywords_with_gpt(prompt) # Assuming you have this function
# Step 2: Refine the query using the extracted keywords
simplified_query = keywords # Or use the full prompt if no keyword extraction is done
# Step 3: Query NASA ADS for relevant papers
papers = ADS.query_simple(simplified_query)
if not papers or len(papers) == 0:
return [("No results found", "N/A", "N/A", "N/A", "N/A", "N/A")]
# Step 4: Extract references with title, authors, bibcode, DOI, journal, and publication date
references = []
for paper in papers[:5]: # Limit to 5 references
title = paper['title'][0]
authors = ", ".join(paper['author'][:3]) + (" et al." if len(paper['author']) > 3 else "")
bibcode = paper['bibcode']
# Fetch DOI if available
doi = paper.get('doi', ['N/A'])[0]
doi_link = f"{doi}" if doi != "N/A" else "N/A"
# Fetch journal and publication date
journal = paper.get('pub', 'Unknown Journal')
pubdate = paper.get('pubdate', 'Unknown Date')
# Add the extracted info to the list of references
references.append((title, authors, journal, pubdate, bibcode, doi_link))
return references
except Exception as e:
return [("Error fetching references", str(e), "N/A", "N/A", "N/A", "N/A")]
def fetch_exoplanet_data():
# Connect to NASA Exoplanet Archive TAP Service
tap_service = vo.dal.TAPService("")
# Query to fetch all columns from the pscomppars table
ex_query = """
SELECT TOP 10 pl_name, hostname, sy_snum, sy_pnum, discoverymethod, disc_year, disc_facility, pl_controv_flag, pl_orbper, pl_orbsmax, pl_rade, pl_bmasse, pl_orbeccen, pl_eqt, st_spectype, st_teff, st_rad, st_mass, ra, dec, sy_vmag
FROM pscomppars
# Execute the query
qresult =
# Convert to a Pandas DataFrame
ptable = qresult.to_table()
exoplanet_data = ptable.to_pandas()
return exoplanet_data
def generate_response(user_input, relevant_context="", references=[], max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
if relevant_context:
combined_input = f"Context: {relevant_context}\nQuestion: {user_input}\nAnswer (please organize the answer in a structured format with topics and subtopics):"
combined_input = f"Question: {user_input}\nAnswer (please organize the answer in a structured format with topics and subtopics):"
response =
{"role": "system", "content": system_message},
{"role": "user", "content": combined_input}
# Append references to the response
if references:
response_content = response.choices[0].message.content.strip()
references_text = "\n\nADS References:\n" + "\n".join(
[f"- {title} by {authors}, {journal}, published on {pubdate} (Bibcode: {bibcode}) [DOI: {doi_link}]" for title, authors, journal, pubdate, bibcode, doi_link in references]
return f"{response_content}\n{references_text}"
return response.choices[0].message.content.strip()
def generate_data_insights(user_input, exoplanet_data, max_tokens=500, temperature=0.3):
Generate insights by passing the user's input along with the exoplanet data to GPT-4.
# Convert the dataframe to a readable format for GPT (e.g., CSV-style text)
data_as_text = exoplanet_data.to_csv(index=False) # CSV-style for better readability
# Create a prompt with the user query and the data sample
insights_prompt = (
f"Analyze the following user query and provide relevant insights based on the provided exoplanet data.\n\n"
f"User Query: {user_input}\n\n"
f"Exoplanet Data:\n{data_as_text}\n\n"
f"Please provide insights that are relevant to the user's query."
# Call GPT-4 to generate insights based on the data and user input
response =
{"role": "system", "content": "You are an expert in analyzing astronomical data and generating insights."},
{"role": "user", "content": insights_prompt}
# Extract and return GPT-4's insights
data_insights = response.choices[0].message.content.strip()
return data_insights
def export_to_word(response_content):
doc = Document()
doc.add_heading('AI Generated SCDD', 0)
for line in response_content.split('\n'):
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
def chatbot(user_input, context="", use_encoder=False, max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
if use_encoder and context:
context_texts = context.split("\n")
relevant_context = retrieve_relevant_context(user_input, context_texts)
relevant_context = ""
# Fetch NASA ADS references using the full prompt
references = fetch_nasa_ads_references(user_input)
# Generate response from GPT-4
response = generate_response(user_input, relevant_context, references, max_tokens, temperature, top_p, frequency_penalty, presence_penalty)
# Export the response to a Word document
word_doc_path = export_to_word(response)
# Fetch exoplanet data
exoplanet_data = fetch_exoplanet_data()
# Generate insights based on the user query and exoplanet data
data_insights = generate_data_insights(user_input, exoplanet_data)
# Combine the response and the data insights
full_response = f"{response}\n\nInsights from Existing Data: {data_insights}"
# Embed Miro iframe
iframe_html = """
<iframe width="768" height="432" src=",-462,5434,3063&embedId=710273023721" frameborder="0" scrolling="no" allow="fullscreen; clipboard-read; clipboard-write" allowfullscreen></iframe>
mapify_button_html = """
.mapify-button {
background: linear-gradient(135deg, #1E90FF 0%, #87CEFA 100%);
border: none;
color: white;
padding: 15px 35px;
text-align: center;
text-decoration: none;
display: inline-block;
font-size: 18px;
font-weight: bold;
margin: 20px 2px;
cursor: pointer;
border-radius: 25px;
transition: all 0.3s ease;
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
.mapify-button:hover {
background: linear-gradient(135deg, #4682B4 0%, #1E90FF 100%);
box-shadow: 0 6px 20px rgba(0, 0, 0, 0.3);
transform: scale(1.05);
<a href="" target="_blank">
<button class="mapify-button">Create Mind Map on Mapify</button>
return full_response, iframe_html, mapify_button_html, word_doc_path, exoplanet_data
iface = gr.Interface(
gr.Textbox(lines=2, placeholder="Enter your Science Goal here...", label="Prompt ExosAI"),
gr.Textbox(lines=5, placeholder="Enter some context here...", label="Context"),
gr.Checkbox(label="Use NASA SMD Bi-Encoder for Context"),
gr.Slider(50, 1000, value=150, step=10, label="Max Tokens"),
gr.Slider(0.0, 1.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(0.0, 1.0, value=0.9, step=0.1, label="Top-p"),
gr.Slider(0.0, 1.0, value=0.5, step=0.1, label="Frequency Penalty"),
gr.Slider(0.0, 1.0, value=0.0, step=0.1, label="Presence Penalty")
gr.Textbox(label="ExosAI finds..."),
gr.HTML(label="Generate Mind Map on Mapify"),
gr.File(label="Download SCDD", type="filepath"),
gr.Dataframe(label="Exoplanet Data Table")
title="ExosAI - NASA SMD SCDD AI Assistant [version-0.5a]",
description="ExosAI is an AI-powered assistant for generating and visualising HWO Science Cases",