|
import os |
|
import streamlit as st |
|
from crewai import Agent, Task, Crew |
|
from crewai_tools import LlamaIndexTool |
|
from langchain_groq import ChatGroq |
|
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex |
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
|
import tempfile |
|
import requests |
|
|
|
|
|
st.title("Document Q&A Assistant with CrewAI") |
|
st.write("Upload a document, provide a link, or ask questions dynamically!") |
|
|
|
|
|
try: |
|
GROQ_API_KEY = st.secrets["GROQ_API_KEY"] |
|
TAVILY_API_KEY = st.secrets["TAVILY_API_KEY"] |
|
except KeyError as e: |
|
st.error(f"Missing API key in secrets: {e}. Please add it to your environment.") |
|
st.stop() |
|
|
|
|
|
if not GROQ_API_KEY or not TAVILY_API_KEY: |
|
st.error("One or more required API keys are missing. Please check your configuration.") |
|
st.stop() |
|
|
|
|
|
def download_pdf_from_url(url, save_path): |
|
response = requests.get(url) |
|
if response.status_code == 200: |
|
with open(save_path, 'wb') as f: |
|
f.write(response.content) |
|
return save_path |
|
else: |
|
st.error("Failed to download PDF from the provided URL.") |
|
return None |
|
|
|
|
|
document_source = st.radio("Choose input method:", ("Upload a PDF", "Provide PDF URL")) |
|
|
|
pdf_path = None |
|
if document_source == "Upload a PDF": |
|
uploaded_file = st.file_uploader("Upload a PDF file", type=['pdf']) |
|
if uploaded_file: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: |
|
temp_file.write(uploaded_file.getvalue()) |
|
pdf_path = temp_file.name |
|
st.success("File uploaded successfully!") |
|
else: |
|
pdf_url = st.text_input("Enter PDF URL") |
|
if st.button("Download PDF") and pdf_url: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: |
|
saved_path = download_pdf_from_url(pdf_url, temp_file.name) |
|
if saved_path: |
|
pdf_path = saved_path |
|
|
|
|
|
llm = ChatGroq(groq_api_key=GROQ_API_KEY, model="groq/llama-3.3-70b-versatile") |
|
|
|
|
|
def create_query_engine(pdf_path, llm): |
|
reader = SimpleDirectoryReader(input_files=[pdf_path]) |
|
docs = reader.load_data() |
|
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") |
|
index = VectorStoreIndex.from_documents(docs, embed_model=embed_model) |
|
return index.as_query_engine(similarity_top_k=5) |
|
|
|
|
|
if pdf_path: |
|
st.success("PDF loaded successfully!") |
|
query_engine = create_query_engine(pdf_path, llm) |
|
query_tool = LlamaIndexTool.from_query_engine( |
|
query_engine, |
|
name="Document Query Tool", |
|
description="Tool to analyze and retrieve information from the uploaded document." |
|
) |
|
|
|
|
|
researcher = Agent( |
|
role="Document Analyst", |
|
goal="Analyze documents and answer questions", |
|
backstory="Expert at retrieving insights from documents.", |
|
verbose=True, |
|
allow_delegation=False, |
|
tools=[query_tool], |
|
llm=llm, |
|
) |
|
|
|
task = Task( |
|
description="Answer user queries based on the uploaded document.", |
|
expected_output="Clear and concise answers to user questions.", |
|
agent=researcher, |
|
) |
|
|
|
crew = Crew(agents=[researcher], tasks=[task], verbose=True) |
|
|
|
st.subheader("Ask a Question") |
|
user_question = st.text_input("Enter your question") |
|
|
|
if st.button("Get Answer"): |
|
with st.spinner("Processing your request..."): |
|
result = crew.kickoff(inputs={"question": user_question}) |
|
st.success("Here is the answer:") |
|
st.write(result) |
|
else: |
|
st.warning("Please upload a PDF or provide a valid URL to continue.") |
|
|
|
|
|
if pdf_path and os.path.exists(pdf_path): |
|
os.remove(pdf_path) |
|
|