MHamdan commited on
Commit
b8febdd
·
verified ·
1 Parent(s): 4615492
Files changed (1) hide show
  1. app.py +50 -30
app.py CHANGED
@@ -7,6 +7,7 @@ import docx
7
  import time
8
  from langchain_community.llms import OpenAI
9
  from langchain.chains import ConversationChain
 
10
  from langchain_core.prompts import PromptTemplate
11
  from dotenv import load_dotenv
12
  import os
@@ -20,34 +21,56 @@ sentiment_analyzer = pipeline("sentiment-analysis")
20
  topic_classifier = pipeline("zero-shot-classification")
21
 
22
  def fetch_text_from_url(url):
23
- response = requests.get(url)
24
- soup = BeautifulSoup(response.text, "html.parser")
25
- return " ".join(p.get_text() for p in soup.find_all("p"))
 
 
 
 
26
 
27
  def extract_text_from_pdf(file):
28
- pdf_reader = PyPDF2.PdfReader(file)
29
- text = ""
30
- for page in pdf_reader.pages:
31
- text += page.extract_text()
32
- return text
 
 
 
33
 
34
  def extract_text_from_docx(file):
35
- doc = docx.Document(file)
36
- text = ""
37
- for para in doc.paragraphs:
38
- text += para.text + "\n"
39
- return text
 
 
 
40
 
41
  def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
42
  if input_type == "URL":
43
  progress(0, desc="Fetching text from URL")
44
- input_text = fetch_text_from_url(input_text)
 
 
 
45
  elif input_type == "File":
46
  progress(0, desc="Extracting text from file")
47
- if input_text.name.lower().endswith(".pdf"):
48
- input_text = extract_text_from_pdf(input_text)
49
- elif input_text.name.lower().endswith(".docx"):
50
- input_text = extract_text_from_docx(input_text)
 
 
 
 
 
 
 
 
 
51
  else:
52
  input_text = input_text.read().decode("utf-8")
53
 
@@ -75,20 +98,22 @@ def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
75
 
76
  return original_text, summary, sentiment, ", ".join(topics)
77
 
78
- def chat(input_text, chat_history):
79
  prompt_template = """
80
  Assistant is an AI language model that helps with text analysis tasks.
81
 
82
- {chat_history}
 
 
83
  Human: {input_text}
84
  Assistant:"""
85
 
86
  prompt = PromptTemplate(
87
- input_variables=["chat_history", "input_text"],
88
  template=prompt_template
89
  )
90
 
91
- chain = ConversationChain(llm=llm, prompt=prompt)
92
  response = chain.predict(input_text=input_text)
93
 
94
  return response
@@ -133,13 +158,8 @@ def create_interface():
133
  input_value = url
134
  else:
135
  input_value = file
136
-
137
- try:
138
- original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
139
- except Exception as e:
140
- original_text = f"Error: {str(e)}"
141
- summary, sentiment, topics = "", "", ""
142
-
143
  return original_text, summary, sentiment, topics
144
 
145
  submit_button.click(
@@ -152,7 +172,7 @@ def create_interface():
152
  conversation_history.append(f"Human: {conversation_input}")
153
  response = chat(conversation_input, "\n".join(conversation_history))
154
  conversation_history.append(f"Assistant: {response}")
155
- return conversation_history, "", response
156
 
157
  conversation_button.click(
158
  fn=process_conversation,
 
7
  import time
8
  from langchain_community.llms import OpenAI
9
  from langchain.chains import ConversationChain
10
+ from langchain.memory import ConversationBufferMemory
11
  from langchain_core.prompts import PromptTemplate
12
  from dotenv import load_dotenv
13
  import os
 
21
  topic_classifier = pipeline("zero-shot-classification")
22
 
23
  def fetch_text_from_url(url):
24
+ try:
25
+ response = requests.get(url)
26
+ response.raise_for_status() # Raise an exception for 4xx or 5xx status codes
27
+ soup = BeautifulSoup(response.text, "html.parser")
28
+ return " ".join(p.get_text() for p in soup.find_all("p"))
29
+ except requests.exceptions.RequestException as e:
30
+ raise ValueError(f"Error fetching text from URL: {str(e)}")
31
 
32
  def extract_text_from_pdf(file):
33
+ try:
34
+ pdf_reader = PyPDF2.PdfReader(file)
35
+ text = ""
36
+ for page in pdf_reader.pages:
37
+ text += page.extract_text()
38
+ return text
39
+ except PyPDF2.errors.PdfReadError as e:
40
+ raise ValueError(f"Error extracting text from PDF: {str(e)}")
41
 
42
  def extract_text_from_docx(file):
43
+ try:
44
+ doc = docx.Document(file)
45
+ text = ""
46
+ for para in doc.paragraphs:
47
+ text += para.text + "\n"
48
+ return text
49
+ except docx.opc.exceptions.PackageNotFoundError as e:
50
+ raise ValueError(f"Error extracting text from DOCX: {str(e)}")
51
 
52
  def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
53
  if input_type == "URL":
54
  progress(0, desc="Fetching text from URL")
55
+ try:
56
+ input_text = fetch_text_from_url(input_text)
57
+ except ValueError as e:
58
+ return str(e), "", "", ""
59
  elif input_type == "File":
60
  progress(0, desc="Extracting text from file")
61
+ if input_text is None:
62
+ return "No file uploaded", "", "", ""
63
+ file_name = input_text.name.lower()
64
+ if file_name.endswith(".pdf"):
65
+ try:
66
+ input_text = extract_text_from_pdf(input_text)
67
+ except ValueError as e:
68
+ return str(e), "", "", ""
69
+ elif file_name.endswith(".docx"):
70
+ try:
71
+ input_text = extract_text_from_docx(input_text)
72
+ except ValueError as e:
73
+ return str(e), "", "", ""
74
  else:
75
  input_text = input_text.read().decode("utf-8")
76
 
 
98
 
99
  return original_text, summary, sentiment, ", ".join(topics)
100
 
101
+ def chat(input_text, conversation_history):
102
  prompt_template = """
103
  Assistant is an AI language model that helps with text analysis tasks.
104
 
105
+ Conversation history:
106
+ {conversation_history}
107
+
108
  Human: {input_text}
109
  Assistant:"""
110
 
111
  prompt = PromptTemplate(
112
+ input_variables=["conversation_history", "input_text"],
113
  template=prompt_template
114
  )
115
 
116
+ chain = ConversationChain(llm=llm, prompt=prompt, memory=ConversationBufferMemory(memory_key="conversation_history"))
117
  response = chain.predict(input_text=input_text)
118
 
119
  return response
 
158
  input_value = url
159
  else:
160
  input_value = file
161
+
162
+ original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
 
 
 
 
 
163
  return original_text, summary, sentiment, topics
164
 
165
  submit_button.click(
 
172
  conversation_history.append(f"Human: {conversation_input}")
173
  response = chat(conversation_input, "\n".join(conversation_history))
174
  conversation_history.append(f"Assistant: {response}")
175
+ return "\n".join(conversation_history), "", response
176
 
177
  conversation_button.click(
178
  fn=process_conversation,