Update app.py
Browse files
app.py
CHANGED
@@ -12,14 +12,14 @@ from docx.oxml.ns import nsdecls
|
|
12 |
from docx.oxml import parse_xml
|
13 |
import io
|
14 |
import tempfile
|
15 |
-
#
|
16 |
-
import pyvo as vo
|
17 |
import pandas as pd
|
18 |
from pinecone import Pinecone
|
19 |
import logging
|
20 |
import re
|
21 |
|
22 |
from utils.ads_references import extract_keywords_with_gpt, fetch_nasa_ads_references
|
|
|
23 |
|
24 |
|
25 |
from langchain_openai import ChatOpenAI
|
@@ -41,9 +41,6 @@ bi_model = AutoModel.from_pretrained(bi_encoder_model_name)
|
|
41 |
api_key = os.getenv('OPENAI_API_KEY')
|
42 |
client = OpenAI(api_key=api_key)
|
43 |
|
44 |
-
# Set up NASA ADS token
|
45 |
-
#ADS.TOKEN = os.getenv('ADS_API_KEY') # Ensure your ADS API key is stored in environment variables
|
46 |
-
|
47 |
# Pinecone setup
|
48 |
pinecone_api_key = os.getenv('PINECONE_API_KEY')
|
49 |
pc = Pinecone(api_key=pinecone_api_key)
|
@@ -132,24 +129,6 @@ def clean_retrieved_context(raw_context):
|
|
132 |
# Return explicitly cleaned context
|
133 |
return cleaned.strip()
|
134 |
|
135 |
-
def fetch_exoplanet_data():
|
136 |
-
# Connect to NASA Exoplanet Archive TAP Service
|
137 |
-
tap_service = vo.dal.TAPService("https://exoplanetarchive.ipac.caltech.edu/TAP")
|
138 |
-
|
139 |
-
# Query to fetch all columns from the pscomppars table
|
140 |
-
ex_query = """
|
141 |
-
SELECT TOP 10 pl_name, hostname, sy_snum, sy_pnum, discoverymethod, disc_year, disc_facility, pl_controv_flag, pl_orbper, pl_orbsmax, pl_rade, pl_bmasse, pl_orbeccen, pl_eqt, st_spectype, st_teff, st_rad, st_mass, ra, dec, sy_vmag
|
142 |
-
FROM pscomppars
|
143 |
-
"""
|
144 |
-
# Execute the query
|
145 |
-
qresult = tap_service.search(ex_query)
|
146 |
-
|
147 |
-
# Convert to a Pandas DataFrame
|
148 |
-
ptable = qresult.to_table()
|
149 |
-
exoplanet_data = ptable.to_pandas()
|
150 |
-
|
151 |
-
return exoplanet_data
|
152 |
-
|
153 |
def generate_response(user_input, science_objectives="", relevant_context="", references=[], max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
|
154 |
# Case 1: Both relevant context and science objectives are provided
|
155 |
if relevant_context and science_objectives.strip():
|
@@ -195,37 +174,6 @@ def generate_response(user_input, science_objectives="", relevant_context="", re
|
|
195 |
# Return two clearly separated responses
|
196 |
return full_response, response_only
|
197 |
|
198 |
-
def generate_data_insights(user_input, exoplanet_data, max_tokens=500, temperature=0.3):
|
199 |
-
"""
|
200 |
-
Generate insights by passing the user's input along with the exoplanet data to GPT-4.
|
201 |
-
"""
|
202 |
-
# Convert the dataframe to a readable format for GPT (e.g., CSV-style text)
|
203 |
-
data_as_text = exoplanet_data.to_csv(index=False) # CSV-style for better readability
|
204 |
-
|
205 |
-
# Create a prompt with the user query and the data sample
|
206 |
-
insights_prompt = (
|
207 |
-
f"Analyze the following user query and provide relevant insights based on the provided exoplanet data.\n\n"
|
208 |
-
f"User Query: {user_input}\n\n"
|
209 |
-
f"Exoplanet Data:\n{data_as_text}\n\n"
|
210 |
-
f"Please provide insights that are relevant to the user's query."
|
211 |
-
)
|
212 |
-
|
213 |
-
# Call GPT-4 to generate insights based on the data and user input
|
214 |
-
response = client.chat.completions.create(
|
215 |
-
model="gpt-4",
|
216 |
-
messages=[
|
217 |
-
{"role": "system", "content": "You are an expert in analyzing astronomical data and generating insights."},
|
218 |
-
{"role": "user", "content": insights_prompt}
|
219 |
-
],
|
220 |
-
max_tokens=max_tokens,
|
221 |
-
temperature=temperature
|
222 |
-
)
|
223 |
-
|
224 |
-
# Extract and return GPT-4's insights
|
225 |
-
data_insights = response.choices[0].message.content.strip()
|
226 |
-
return data_insights
|
227 |
-
|
228 |
-
|
229 |
def export_to_word(response_content, subdomain_definition, science_goal, context, max_tokens, temperature, top_p, frequency_penalty, presence_penalty):
|
230 |
doc = Document()
|
231 |
|
@@ -430,7 +378,7 @@ def chatbot(user_input, science_objectives="", context="", subdomain="", max_tok
|
|
430 |
|
431 |
# Fetch exoplanet data and generate insights
|
432 |
exoplanet_data = fetch_exoplanet_data()
|
433 |
-
|
434 |
|
435 |
# Extract GPT-generated table into DataFrame
|
436 |
extracted_table_df = gpt_response_to_dataframe(full_response)
|
|
|
12 |
from docx.oxml import parse_xml
|
13 |
import io
|
14 |
import tempfile
|
15 |
+
#import pyvo as vo
|
|
|
16 |
import pandas as pd
|
17 |
from pinecone import Pinecone
|
18 |
import logging
|
19 |
import re
|
20 |
|
21 |
from utils.ads_references import extract_keywords_with_gpt, fetch_nasa_ads_references
|
22 |
+
from utils.data_insights import fetch_exoplanet_data, generate_data_insights
|
23 |
|
24 |
|
25 |
from langchain_openai import ChatOpenAI
|
|
|
41 |
api_key = os.getenv('OPENAI_API_KEY')
|
42 |
client = OpenAI(api_key=api_key)
|
43 |
|
|
|
|
|
|
|
44 |
# Pinecone setup
|
45 |
pinecone_api_key = os.getenv('PINECONE_API_KEY')
|
46 |
pc = Pinecone(api_key=pinecone_api_key)
|
|
|
129 |
# Return explicitly cleaned context
|
130 |
return cleaned.strip()
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
def generate_response(user_input, science_objectives="", relevant_context="", references=[], max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
|
133 |
# Case 1: Both relevant context and science objectives are provided
|
134 |
if relevant_context and science_objectives.strip():
|
|
|
174 |
# Return two clearly separated responses
|
175 |
return full_response, response_only
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
def export_to_word(response_content, subdomain_definition, science_goal, context, max_tokens, temperature, top_p, frequency_penalty, presence_penalty):
|
178 |
doc = Document()
|
179 |
|
|
|
378 |
|
379 |
# Fetch exoplanet data and generate insights
|
380 |
exoplanet_data = fetch_exoplanet_data()
|
381 |
+
data_insights_uq = generate_data_insights(user_input, client, exoplanet_data)
|
382 |
|
383 |
# Extract GPT-generated table into DataFrame
|
384 |
extracted_table_df = gpt_response_to_dataframe(full_response)
|