Spaces:
Runtime error
Runtime error
import streamlit as st | |
from sentence_transformers import SentenceTransformer, util | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import requests | |
def find_abstracts(soup): | |
#df = pd.DataFrame(columns = ["identifier", "abstract"]) | |
id_list = [] | |
abs_list = [] | |
title_list = [] | |
for record in soup.find_all("csw:record"): | |
id = record.find("dc:identifier") | |
abs = record.find("dct:abstract") | |
title = record.find("dc:title") | |
# append id and abs to df | |
#df = df.append([id.text, abs.text]) | |
id_list.append(id.text) | |
title_list.append(title.text) | |
if abs != None: | |
abs_list.append(abs.text) | |
else: | |
abs_list.append("NA") | |
return id_list, title_list, abs_list | |
def get_metadata(): | |
# Get the abstracts from Geoportal | |
URL = "https://www.ncei.noaa.gov/metadata/geoportal/opensearch?f=csw&from=0&size=5000&sort=title.sort" | |
page = requests.get(URL) | |
soup = BeautifulSoup(page.text, "lxml") | |
id_list, title_list, abs_list = find_abstracts(soup) | |
df = pd.DataFrame(list(zip(id_list,title_list, abs_list)), columns = ["identifier", "title", "abstract"]) | |
df.to_csv("./ncei-metadata.csv") | |
return df | |
def show_model(): | |
return | |
def main(): | |
st.title("Semantic Search for Datasets Using Sentence Transformers") | |
st.write("A case study for the National Centers for Environmental Information (NCEI)") | |
st.image("noaa_logo.png", width=150) | |
st.write("## Goal: search for datasets in NCEI's Archive using natural language queries") | |
st.write("[Repo](https://github.com/myrandaGoesToSpace/semantic-search-datasets)") | |
st.image("pres-whatisnoaa.png") | |
st.write("## The Problem Context") | |
st.write("Uses service called OneStop for data search") | |
st.write("**Problems:**") | |
st.write("- Uses keyword search -- not robust to natural language queries") | |
st.write("- Filtering options too specific for non-expert users") | |
#st.image("pres-onestop.png") | |
#st.image("pres-problems.png") | |
st.write("## The Model: [Sentence Transformers](https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1)") | |
st.image("pres-sentencetransformers.png") | |
st.write("## Project Data") | |
st.image("pres-metadata.png") | |
st.write("## The Process") | |
st.image("pres-creatingse.png") | |
st.write("## Results and Demo") | |
query = st.text_input("Enter your query:") | |
results = show_model(query) | |
st.image("pres-futureplans.png") | |
main() |