Spaces:

MHamdan
/

ContentAnalyzer

Sleeping

App Files Files Community

MHamdan commited on Feb 16

Commit

b8febdd

verified ·

1 Parent(s): 4615492

app

Browse files

Files changed (1) hide show

app.py +50 -30

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import docx
 import time
 from langchain_community.llms import OpenAI
 from langchain.chains import ConversationChain
 from langchain_core.prompts import PromptTemplate
 from dotenv import load_dotenv
 import os
@@ -20,34 +21,56 @@ sentiment_analyzer = pipeline("sentiment-analysis")
 topic_classifier = pipeline("zero-shot-classification")
 def fetch_text_from_url(url):
-    response = requests.get(url)
-    soup = BeautifulSoup(response.text, "html.parser")
-    return " ".join(p.get_text() for p in soup.find_all("p"))
 def extract_text_from_pdf(file):
-    pdf_reader = PyPDF2.PdfReader(file)
-    text = ""
-    for page in pdf_reader.pages:
-        text += page.extract_text()
-    return text
 def extract_text_from_docx(file):
-    doc = docx.Document(file)
-    text = ""
-    for para in doc.paragraphs:
-        text += para.text + "\n"
-    return text
 def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
     if input_type == "URL":
         progress(0, desc="Fetching text from URL")
-        input_text = fetch_text_from_url(input_text)
     elif input_type == "File":
         progress(0, desc="Extracting text from file")
-        if input_text.name.lower().endswith(".pdf"):
-            input_text = extract_text_from_pdf(input_text)
-        elif input_text.name.lower().endswith(".docx"):
-            input_text = extract_text_from_docx(input_text)
         else:
             input_text = input_text.read().decode("utf-8")
@@ -75,20 +98,22 @@ def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
     return original_text, summary, sentiment, ", ".join(topics)
-def chat(input_text, chat_history):
     prompt_template = """
     Assistant is an AI language model that helps with text analysis tasks.
-    {chat_history}
     Human: {input_text}
     Assistant:"""
     prompt = PromptTemplate(
-        input_variables=["chat_history", "input_text"],
         template=prompt_template
     )
-    chain = ConversationChain(llm=llm, prompt=prompt)
     response = chain.predict(input_text=input_text)
     return response
@@ -133,13 +158,8 @@ def create_interface():
                 input_value = url
             else:
                 input_value = file
-            try:
-                original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
-            except Exception as e:
-                original_text = f"Error: {str(e)}"
-                summary, sentiment, topics = "", "", ""
             return original_text, summary, sentiment, topics
         submit_button.click(
@@ -152,7 +172,7 @@ def create_interface():
             conversation_history.append(f"Human: {conversation_input}")
             response = chat(conversation_input, "\n".join(conversation_history))
             conversation_history.append(f"Assistant: {response}")
-            return conversation_history, "", response
         conversation_button.click(
             fn=process_conversation,

 import time
 from langchain_community.llms import OpenAI
 from langchain.chains import ConversationChain
+from langchain.memory import ConversationBufferMemory
 from langchain_core.prompts import PromptTemplate
 from dotenv import load_dotenv
 import os
 topic_classifier = pipeline("zero-shot-classification")
 def fetch_text_from_url(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception for 4xx or 5xx status codes
+        soup = BeautifulSoup(response.text, "html.parser")
+        return " ".join(p.get_text() for p in soup.find_all("p"))
+    except requests.exceptions.RequestException as e:
+        raise ValueError(f"Error fetching text from URL: {str(e)}")
 def extract_text_from_pdf(file):
+    try:
+        pdf_reader = PyPDF2.PdfReader(file)
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+        return text
+    except PyPDF2.errors.PdfReadError as e:
+        raise ValueError(f"Error extracting text from PDF: {str(e)}")
 def extract_text_from_docx(file):
+    try:
+        doc = docx.Document(file)
+        text = ""
+        for para in doc.paragraphs:
+            text += para.text + "\n"
+        return text
+    except docx.opc.exceptions.PackageNotFoundError as e:
+        raise ValueError(f"Error extracting text from DOCX: {str(e)}")
 def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
     if input_type == "URL":
         progress(0, desc="Fetching text from URL")
+        try:
+            input_text = fetch_text_from_url(input_text)
+        except ValueError as e:
+            return str(e), "", "", ""
     elif input_type == "File":
         progress(0, desc="Extracting text from file")
+        if input_text is None:
+            return "No file uploaded", "", "", ""
+        file_name = input_text.name.lower()
+        if file_name.endswith(".pdf"):
+            try:
+                input_text = extract_text_from_pdf(input_text)
+            except ValueError as e:
+                return str(e), "", "", ""
+        elif file_name.endswith(".docx"):
+            try:
+                input_text = extract_text_from_docx(input_text)
+            except ValueError as e:
+                return str(e), "", "", ""
         else:
             input_text = input_text.read().decode("utf-8")
     return original_text, summary, sentiment, ", ".join(topics)
+def chat(input_text, conversation_history):
     prompt_template = """
     Assistant is an AI language model that helps with text analysis tasks.
+    Conversation history:
+    {conversation_history}
     Human: {input_text}
     Assistant:"""
     prompt = PromptTemplate(
+        input_variables=["conversation_history", "input_text"],
         template=prompt_template
     )
+    chain = ConversationChain(llm=llm, prompt=prompt, memory=ConversationBufferMemory(memory_key="conversation_history"))
     response = chain.predict(input_text=input_text)
     return response
                 input_value = url
             else:
                 input_value = file
+            original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
             return original_text, summary, sentiment, topics
         submit_button.click(
             conversation_history.append(f"Human: {conversation_input}")
             response = chat(conversation_input, "\n".join(conversation_history))
             conversation_history.append(f"Assistant: {response}")
+            return "\n".join(conversation_history), "", response
         conversation_button.click(
             fn=process_conversation,