ChaitanyaFM commited on
Commit
145be8a
·
1 Parent(s): b868bdc

Added functionality to read text files

Browse files
Files changed (2) hide show
  1. app1.py +18 -8
  2. requirements.txt +0 -0
app1.py CHANGED
@@ -9,14 +9,25 @@ from langchain.chains import ConversationalRetrievalChain
9
  from htmlTemplates import css, bot_template, user_template
10
  from langchain.llms import HuggingFaceHub
11
 
12
- def get_pdf_text(pdf_docs):
13
  text = ""
14
- for pdf in pdf_docs:
15
- pdf_reader = PdfReader(pdf)
16
- for page in pdf_reader.pages:
17
- text += page.extract_text()
18
  return text
19
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def get_text_chunks(text):
22
  text_splitter = RecursiveCharacterTextSplitter(
@@ -64,8 +75,7 @@ def handle_userinput(user_question):
64
 
65
  def main():
66
  load_dotenv()
67
- st.set_page_config(page_title="ChatBot",
68
- page_icon=":books:")
69
  st.write(css, unsafe_allow_html=True)
70
 
71
  if "conversation" not in st.session_state:
@@ -85,7 +95,7 @@ def main():
85
  if st.button("Process"):
86
  with st.spinner("Processing"):
87
  # get pdf text
88
- raw_text = get_pdf_text(pdf_docs)
89
 
90
  # get the text chunks
91
  text_chunks = get_text_chunks(raw_text)
 
9
  from htmlTemplates import css, bot_template, user_template
10
  from langchain.llms import HuggingFaceHub
11
 
12
+ def get_pdf_text(pdf):
13
  text = ""
14
+ pdf_reader = PdfReader(pdf)
15
+ for page in pdf_reader.pages:
16
+ text += page.extract_text()
 
17
  return text
18
 
19
+ def get_files(text_doc):
20
+ text =""
21
+ for file in text_doc:
22
+ print(text)
23
+ if file.type == "text/plain":
24
+ # Read the text directly from the file
25
+ text += file.getvalue().decode("utf-8")
26
+ elif file.type == "application/pdf":
27
+ text += get_pdf_text(file)
28
+ return text
29
+
30
+
31
 
32
  def get_text_chunks(text):
33
  text_splitter = RecursiveCharacterTextSplitter(
 
75
 
76
  def main():
77
  load_dotenv()
78
+ st.set_page_config(page_title="ChatBot")
 
79
  st.write(css, unsafe_allow_html=True)
80
 
81
  if "conversation" not in st.session_state:
 
95
  if st.button("Process"):
96
  with st.spinner("Processing"):
97
  # get pdf text
98
+ raw_text = get_files(pdf_docs)
99
 
100
  # get the text chunks
101
  text_chunks = get_text_chunks(raw_text)
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ