Spaces:
Build error
Build error
import streamlit as st | |
import pandas as pd | |
from pymongo import MongoClient | |
from dotenv import load_dotenv | |
import os | |
# 1. Load environment variables | |
load_dotenv() | |
MONGODB_URI = os.getenv( | |
"MONGODB_UR", | |
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0", | |
) | |
# 2. Create MongoDB connection | |
client = MongoClient(MONGODB_URI) | |
db = client["novascholar_db"] | |
collection = db["research_papers"] | |
def get_collection_data(paper_type: str): | |
""" | |
Fetch all documents from the specified collection based on paper type. | |
""" | |
try: | |
# Determine collection name based on paper type | |
collection_name = paper_type.replace(" ", "_").lower() | |
doc_collection = db[collection_name] | |
# Get all documents | |
docs = list(doc_collection.find()) | |
# Convert ObjectId to string | |
for doc in docs: | |
doc["_id"] = str(doc["_id"]) | |
return docs | |
except Exception as e: | |
st.error(f"Database Error: {str(e)}") | |
return None | |
def main(): | |
st.title("MongoDB Collection Download") | |
st.write("Download all documents from the selected research paper collection") | |
# Dropdown to select the type of research paper | |
paper_type = st.selectbox( | |
"Select type of research paper:", | |
[ | |
"Review Based Paper", | |
"Opinion/Perspective Based Paper", | |
"Empirical Research Paper", | |
"Research Paper (Other)", | |
], | |
) | |
if st.button("Fetch Data"): | |
with st.spinner("Retrieving documents from MongoDB..."): | |
docs = get_collection_data(paper_type) | |
if docs: | |
# Convert to DataFrame | |
df = pd.DataFrame(docs) | |
# Convert lists to comma-separated strings for consistency | |
for col in df.columns: | |
if df[col].apply(lambda x: isinstance(x, list)).any(): | |
df[col] = df[col].apply( | |
lambda x: ( | |
", ".join(map(str, x)) if isinstance(x, list) else x | |
) | |
) | |
st.success( | |
f"Successfully retrieved {len(df)} documents from '{paper_type}' collection." | |
) | |
st.dataframe(df) | |
# Provide option to download the data as CSV | |
csv = df.to_csv(index=False).encode("utf-8") | |
st.download_button( | |
label="Download CSV", | |
data=csv, | |
file_name=f"{paper_type.replace(' ', '_').lower()}_papers.csv", | |
mime="text/csv", | |
) | |
else: | |
st.warning(f"No documents found in the '{paper_type}' collection.") | |
if __name__ == "__main__": | |
main() | |