Cesar42 commited on
Commit
691efb9
·
verified ·
1 Parent(s): 26263da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -16
app.py CHANGED
@@ -1,46 +1,60 @@
1
  import streamlit as st
2
  import os
3
-
4
  from PyPDF2 import PdfReader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.vectorstores import FAISS
8
  from langchain.chat_models import ChatOpenAI
9
  from langchain.chains.question_answering import load_qa_chain
10
 
11
- st.set_page_config('preguntaDOC')
12
  st.header("Pregunta a tu PDF")
 
13
  OPENAI_API_KEY = st.text_input('OpenAI API Key', type='password')
 
 
14
  pdf_obj = st.file_uploader("Carga tu documento", type="pdf", on_change=st.cache_resource.clear)
15
 
16
  @st.cache_resource
17
- def create_embeddings(pdf):
 
 
18
  pdf_reader = PdfReader(pdf)
19
  text = ""
20
  for page in pdf_reader.pages:
21
  text += page.extract_text()
22
-
23
  text_splitter = RecursiveCharacterTextSplitter(
24
  chunk_size=800,
25
  chunk_overlap=100,
26
  length_function=len
27
- )
28
  chunks = text_splitter.split_text(text)
29
-
30
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
 
 
 
 
 
31
  knowledge_base = FAISS.from_texts(chunks, embeddings)
32
-
33
  return knowledge_base
34
 
35
- if pdf_obj:
36
- knowledge_base = create_embeddings(pdf_obj)
37
  user_question = st.text_input("Haz una pregunta sobre tu PDF:")
38
-
39
- if user_question:
40
  os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
41
  docs = knowledge_base.similarity_search(user_question, 3)
42
  llm = ChatOpenAI(model_name='gpt-3.5-turbo')
43
  chain = load_qa_chain(llm, chain_type="stuff")
44
- respuesta = chain.run(input_documents=docs, question=user_question)
45
-
46
- st.write(respuesta)
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import os
 
3
  from PyPDF2 import PdfReader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import HuggingFaceHubEmbeddings # Cambiado desde HuggingFaceEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.chat_models import ChatOpenAI
8
  from langchain.chains.question_answering import load_qa_chain
9
 
10
+ st.set_page_config(page_title='preguntaDOC')
11
  st.header("Pregunta a tu PDF")
12
+
13
  OPENAI_API_KEY = st.text_input('OpenAI API Key', type='password')
14
+ HUGGINGFACE_API_KEY = st.text_input('Hugging Face API Key', type='password') # Añadido para la API de Hugging Face
15
+
16
  pdf_obj = st.file_uploader("Carga tu documento", type="pdf", on_change=st.cache_resource.clear)
17
 
18
  @st.cache_resource
19
+ def create_embeddings(pdf, hf_api_key):
20
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_api_key # Configurar token de HF
21
+
22
  pdf_reader = PdfReader(pdf)
23
  text = ""
24
  for page in pdf_reader.pages:
25
  text += page.extract_text()
26
+
27
  text_splitter = RecursiveCharacterTextSplitter(
28
  chunk_size=800,
29
  chunk_overlap=100,
30
  length_function=len
31
+ )
32
  chunks = text_splitter.split_text(text)
33
+
34
+ # Usar HuggingFaceHubEmbeddings en lugar de HuggingFaceEmbeddings
35
+ embeddings = HuggingFaceHubEmbeddings(
36
+ repo_id="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
37
+ huggingfacehub_api_token=hf_api_key
38
+ )
39
+
40
  knowledge_base = FAISS.from_texts(chunks, embeddings)
 
41
  return knowledge_base
42
 
43
+ if pdf_obj and HUGGINGFACE_API_KEY:
44
+ knowledge_base = create_embeddings(pdf_obj, HUGGINGFACE_API_KEY)
45
  user_question = st.text_input("Haz una pregunta sobre tu PDF:")
46
+
47
+ if user_question and OPENAI_API_KEY:
48
  os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
49
  docs = knowledge_base.similarity_search(user_question, 3)
50
  llm = ChatOpenAI(model_name='gpt-3.5-turbo')
51
  chain = load_qa_chain(llm, chain_type="stuff")
52
+
53
+ with st.spinner("Procesando tu pregunta..."):
54
+ try:
55
+ respuesta = chain.run(input_documents=docs, question=user_question)
56
+ st.write(respuesta)
57
+ except Exception as e:
58
+ st.error(f"Error: {str(e)}")
59
+ elif pdf_obj and not HUGGINGFACE_API_KEY:
60
+ st.warning("Por favor, introduce una clave API de Hugging Face para procesar el documento.")