luansouza4444 commited on
Commit
a1b2b0f
·
verified ·
1 Parent(s): 7611fd1

Upload 2 files

Browse files
Files changed (2) hide show
  1. app (1).py +64 -0
  2. requirements.txt +9 -0
app (1).py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1ZybFOpX1r-SAA-RslP5WJkQ9gdI6JCCj
8
+ """
9
+
10
+ import streamlit as st
11
+ import os
12
+ from langchain.chat_models import ChatOpenAI
13
+ from langchain.document_loaders import PyPDFLoader
14
+ from langchain.embeddings import HuggingFaceEmbeddings
15
+ from langchain.vectorstores import FAISS
16
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
17
+ from langchain.chains import RetrievalQA
18
+ import tempfile
19
+
20
+ st.set_page_config(page_title="Análise de PDF com LangChain", layout="centered")
21
+ st.title("📄🔍 Análise de PDF com LangChain")
22
+
23
+ uploaded_file = st.file_uploader("Faça upload de um PDF", type="pdf")
24
+
25
+ if uploaded_file is not None:
26
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
27
+ tmp.write(uploaded_file.read())
28
+ pdf_path = tmp.name
29
+
30
+ with st.spinner("Processando o PDF..."):
31
+ try:
32
+ loader = PyPDFLoader(pdf_path)
33
+ documents = loader.load()
34
+
35
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
36
+ docs = text_splitter.split_documents(documents)
37
+
38
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
39
+ vectorstore = FAISS.from_documents(docs, embeddings)
40
+
41
+ llm = ChatOpenAI(
42
+ openai_api_base="https://openrouter.ai/api/v1",
43
+ openai_api_key=os.environ["OPENROUTER_API_KEY"],
44
+ model='deepseek/deepseek-r1-zero:free'
45
+ )
46
+
47
+ qa_chain = RetrievalQA.from_chain_type(
48
+ llm=llm,
49
+ retriever=vectorstore.as_retriever(),
50
+ return_source_documents=True
51
+ )
52
+
53
+ resposta = qa_chain.invoke({"query": "Qual é o principal assunto tratado neste PDF?"})
54
+
55
+ st.success("✅ Resposta gerada com sucesso!")
56
+ st.subheader("🤖 Resposta:")
57
+ st.write(resposta['result'])
58
+
59
+ st.subheader("📄 Fontes:")
60
+ for i, doc in enumerate(resposta['source_documents']):
61
+ st.markdown(f"**Fonte {i+1}:**\n\n{doc.page_content[:500]}...")
62
+
63
+ except Exception as e:
64
+ st.error(f"Erro ao processar o PDF: {str(e)}")
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ langchain
3
+ openai
4
+ python-dotenv
5
+ PyPDF2
6
+ faiss-cpu
7
+ tiktoken
8
+ pypdf
9
+ sentence-transformers