Spaces:
Sleeping
Sleeping
File size: 7,423 Bytes
5215be1 16eaebe 8e8a46c 16eaebe 4615492 b8febdd 4615492 22b42e4 4615492 22b42e4 16eaebe b8febdd 16eaebe b8febdd 16eaebe b8febdd 16eaebe b8febdd 16eaebe b8febdd 16eaebe 8e8a46c 16eaebe 8e8a46c 16eaebe 18d6761 b8febdd 22b42e4 b8febdd 22b42e4 b8febdd 22b42e4 b8febdd 22b42e4 5215be1 1575e7a 723e1fb 1575e7a edcb23f 1575e7a a20f23d 22b42e4 a20f23d 1575e7a edcb23f 1575e7a edcb23f 1575e7a b8febdd 1575e7a 22b42e4 b8febdd 22b42e4 1575e7a 5215be1 16eaebe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import gradio as gr
from transformers import pipeline
import requests
from bs4 import BeautifulSoup
import PyPDF2
import docx
import time
from langchain_community.llms import OpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
import os
load_dotenv() # Load environment variables from .env file
openai_api_key = os.getenv("OPENAI_API_KEY")
llm = OpenAI(openai_api_key=openai_api_key)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("sentiment-analysis")
topic_classifier = pipeline("zero-shot-classification")
def fetch_text_from_url(url):
try:
response = requests.get(url)
response.raise_for_status() # Raise an exception for 4xx or 5xx status codes
soup = BeautifulSoup(response.text, "html.parser")
return " ".join(p.get_text() for p in soup.find_all("p"))
except requests.exceptions.RequestException as e:
raise ValueError(f"Error fetching text from URL: {str(e)}")
def extract_text_from_pdf(file):
try:
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
except PyPDF2.errors.PdfReadError as e:
raise ValueError(f"Error extracting text from PDF: {str(e)}")
def extract_text_from_docx(file):
try:
doc = docx.Document(file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
except docx.opc.exceptions.PackageNotFoundError as e:
raise ValueError(f"Error extracting text from DOCX: {str(e)}")
def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
if input_type == "URL":
progress(0, desc="Fetching text from URL")
try:
input_text = fetch_text_from_url(input_text)
except ValueError as e:
return str(e), "", "", ""
elif input_type == "File":
progress(0, desc="Extracting text from file")
if input_text is None:
return "No file uploaded", "", "", ""
file_name = input_text.name.lower()
if file_name.endswith(".pdf"):
try:
input_text = extract_text_from_pdf(input_text)
except ValueError as e:
return str(e), "", "", ""
elif file_name.endswith(".docx"):
try:
input_text = extract_text_from_docx(input_text)
except ValueError as e:
return str(e), "", "", ""
else:
input_text = input_text.read().decode("utf-8")
original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")
summary, sentiment, topics = "", "", ""
if "Summarization" in tasks:
progress(0.3, desc="Generating summary")
summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
time.sleep(1) # Add a minimal delay for demonstration purposes
if "Sentiment Analysis" in tasks:
progress(0.6, desc="Analyzing sentiment")
sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis
time.sleep(1)
if "Topic Detection" in tasks:
progress(0.9, desc="Detecting topics")
topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection
time.sleep(1)
progress(1, desc="Analysis completed")
return original_text, summary, sentiment, ", ".join(topics)
def chat(input_text, conversation_history):
prompt_template = """
Assistant is an AI language model that helps with text analysis tasks.
Conversation history:
{conversation_history}
Human: {input_text}
Assistant:"""
prompt = PromptTemplate(
input_variables=["conversation_history", "input_text"],
template=prompt_template
)
chain = ConversationChain(llm=llm, prompt=prompt, memory=ConversationBufferMemory(memory_key="conversation_history"))
response = chain.predict(input_text=input_text)
return response
def create_interface():
with gr.Blocks(title="Text Analysis App") as interface:
gr.Markdown("## Choose data format to analyze")
input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type")
text_input = gr.Textbox(label="Text Input", visible=False)
url_input = gr.Textbox(label="URL Input", visible=False)
file_input = gr.File(label="File Upload", visible=False)
tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")
submit_button = gr.Button("Analyze")
progress_bar = gr.Progress()
with gr.Tab("Original Text"):
original_text_output = gr.Textbox(label="Original Text")
with gr.Tab("Summary"):
summary_output = gr.Textbox(label="Summary")
with gr.Tab("Sentiment"):
sentiment_output = gr.Textbox(label="Sentiment")
with gr.Tab("Topics"):
topics_output = gr.Textbox(label="Topics")
with gr.Tab("Conversation"):
conversation_history = gr.State([])
conversation_input = gr.Textbox(label="Human")
conversation_output = gr.Textbox(label="Assistant")
conversation_button = gr.Button("Send")
def update_input_visibility(input_type):
return {
text_input: gr.update(visible=input_type == "Text"),
url_input: gr.update(visible=input_type == "URL"),
file_input: gr.update(visible=input_type == "File")
}
input_type.change(update_input_visibility, inputs=[input_type], outputs=[text_input, url_input, file_input])
def process_input(input_type, text, url, file, tasks):
if input_type == "Text":
input_value = text
elif input_type == "URL":
input_value = url
else:
input_value = file
original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
return original_text, summary, sentiment, topics
submit_button.click(
fn=process_input,
inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
outputs=[original_text_output, summary_output, sentiment_output, topics_output]
)
def process_conversation(conversation_history, conversation_input):
conversation_history.append(f"Human: {conversation_input}")
response = chat(conversation_input, "\n".join(conversation_history))
conversation_history.append(f"Assistant: {response}")
return "\n".join(conversation_history), "", response
conversation_button.click(
fn=process_conversation,
inputs=[conversation_history, conversation_input],
outputs=[conversation_history, conversation_input, conversation_output]
)
return interface
if __name__ == "__main__":
create_interface().launch() |