Spaces:
Running
Running
app
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import docx
|
|
7 |
import time
|
8 |
from langchain_community.llms import OpenAI
|
9 |
from langchain.chains import ConversationChain
|
|
|
10 |
from langchain_core.prompts import PromptTemplate
|
11 |
from dotenv import load_dotenv
|
12 |
import os
|
@@ -20,34 +21,56 @@ sentiment_analyzer = pipeline("sentiment-analysis")
|
|
20 |
topic_classifier = pipeline("zero-shot-classification")
|
21 |
|
22 |
def fetch_text_from_url(url):
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
26 |
|
27 |
def extract_text_from_pdf(file):
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
33 |
|
34 |
def extract_text_from_docx(file):
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
40 |
|
41 |
def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
|
42 |
if input_type == "URL":
|
43 |
progress(0, desc="Fetching text from URL")
|
44 |
-
|
|
|
|
|
|
|
45 |
elif input_type == "File":
|
46 |
progress(0, desc="Extracting text from file")
|
47 |
-
if input_text
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
else:
|
52 |
input_text = input_text.read().decode("utf-8")
|
53 |
|
@@ -75,20 +98,22 @@ def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
|
|
75 |
|
76 |
return original_text, summary, sentiment, ", ".join(topics)
|
77 |
|
78 |
-
def chat(input_text,
|
79 |
prompt_template = """
|
80 |
Assistant is an AI language model that helps with text analysis tasks.
|
81 |
|
82 |
-
|
|
|
|
|
83 |
Human: {input_text}
|
84 |
Assistant:"""
|
85 |
|
86 |
prompt = PromptTemplate(
|
87 |
-
input_variables=["
|
88 |
template=prompt_template
|
89 |
)
|
90 |
|
91 |
-
chain = ConversationChain(llm=llm, prompt=prompt)
|
92 |
response = chain.predict(input_text=input_text)
|
93 |
|
94 |
return response
|
@@ -133,13 +158,8 @@ def create_interface():
|
|
133 |
input_value = url
|
134 |
else:
|
135 |
input_value = file
|
136 |
-
|
137 |
-
|
138 |
-
original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
|
139 |
-
except Exception as e:
|
140 |
-
original_text = f"Error: {str(e)}"
|
141 |
-
summary, sentiment, topics = "", "", ""
|
142 |
-
|
143 |
return original_text, summary, sentiment, topics
|
144 |
|
145 |
submit_button.click(
|
@@ -152,7 +172,7 @@ def create_interface():
|
|
152 |
conversation_history.append(f"Human: {conversation_input}")
|
153 |
response = chat(conversation_input, "\n".join(conversation_history))
|
154 |
conversation_history.append(f"Assistant: {response}")
|
155 |
-
return conversation_history, "", response
|
156 |
|
157 |
conversation_button.click(
|
158 |
fn=process_conversation,
|
|
|
7 |
import time
|
8 |
from langchain_community.llms import OpenAI
|
9 |
from langchain.chains import ConversationChain
|
10 |
+
from langchain.memory import ConversationBufferMemory
|
11 |
from langchain_core.prompts import PromptTemplate
|
12 |
from dotenv import load_dotenv
|
13 |
import os
|
|
|
21 |
topic_classifier = pipeline("zero-shot-classification")
|
22 |
|
23 |
def fetch_text_from_url(url):
|
24 |
+
try:
|
25 |
+
response = requests.get(url)
|
26 |
+
response.raise_for_status() # Raise an exception for 4xx or 5xx status codes
|
27 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
28 |
+
return " ".join(p.get_text() for p in soup.find_all("p"))
|
29 |
+
except requests.exceptions.RequestException as e:
|
30 |
+
raise ValueError(f"Error fetching text from URL: {str(e)}")
|
31 |
|
32 |
def extract_text_from_pdf(file):
|
33 |
+
try:
|
34 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
35 |
+
text = ""
|
36 |
+
for page in pdf_reader.pages:
|
37 |
+
text += page.extract_text()
|
38 |
+
return text
|
39 |
+
except PyPDF2.errors.PdfReadError as e:
|
40 |
+
raise ValueError(f"Error extracting text from PDF: {str(e)}")
|
41 |
|
42 |
def extract_text_from_docx(file):
|
43 |
+
try:
|
44 |
+
doc = docx.Document(file)
|
45 |
+
text = ""
|
46 |
+
for para in doc.paragraphs:
|
47 |
+
text += para.text + "\n"
|
48 |
+
return text
|
49 |
+
except docx.opc.exceptions.PackageNotFoundError as e:
|
50 |
+
raise ValueError(f"Error extracting text from DOCX: {str(e)}")
|
51 |
|
52 |
def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
|
53 |
if input_type == "URL":
|
54 |
progress(0, desc="Fetching text from URL")
|
55 |
+
try:
|
56 |
+
input_text = fetch_text_from_url(input_text)
|
57 |
+
except ValueError as e:
|
58 |
+
return str(e), "", "", ""
|
59 |
elif input_type == "File":
|
60 |
progress(0, desc="Extracting text from file")
|
61 |
+
if input_text is None:
|
62 |
+
return "No file uploaded", "", "", ""
|
63 |
+
file_name = input_text.name.lower()
|
64 |
+
if file_name.endswith(".pdf"):
|
65 |
+
try:
|
66 |
+
input_text = extract_text_from_pdf(input_text)
|
67 |
+
except ValueError as e:
|
68 |
+
return str(e), "", "", ""
|
69 |
+
elif file_name.endswith(".docx"):
|
70 |
+
try:
|
71 |
+
input_text = extract_text_from_docx(input_text)
|
72 |
+
except ValueError as e:
|
73 |
+
return str(e), "", "", ""
|
74 |
else:
|
75 |
input_text = input_text.read().decode("utf-8")
|
76 |
|
|
|
98 |
|
99 |
return original_text, summary, sentiment, ", ".join(topics)
|
100 |
|
101 |
+
def chat(input_text, conversation_history):
|
102 |
prompt_template = """
|
103 |
Assistant is an AI language model that helps with text analysis tasks.
|
104 |
|
105 |
+
Conversation history:
|
106 |
+
{conversation_history}
|
107 |
+
|
108 |
Human: {input_text}
|
109 |
Assistant:"""
|
110 |
|
111 |
prompt = PromptTemplate(
|
112 |
+
input_variables=["conversation_history", "input_text"],
|
113 |
template=prompt_template
|
114 |
)
|
115 |
|
116 |
+
chain = ConversationChain(llm=llm, prompt=prompt, memory=ConversationBufferMemory(memory_key="conversation_history"))
|
117 |
response = chain.predict(input_text=input_text)
|
118 |
|
119 |
return response
|
|
|
158 |
input_value = url
|
159 |
else:
|
160 |
input_value = file
|
161 |
+
|
162 |
+
original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
|
|
|
|
|
|
|
|
|
|
|
163 |
return original_text, summary, sentiment, topics
|
164 |
|
165 |
submit_button.click(
|
|
|
172 |
conversation_history.append(f"Human: {conversation_input}")
|
173 |
response = chat(conversation_input, "\n".join(conversation_history))
|
174 |
conversation_history.append(f"Assistant: {response}")
|
175 |
+
return "\n".join(conversation_history), "", response
|
176 |
|
177 |
conversation_button.click(
|
178 |
fn=process_conversation,
|