File size: 6,160 Bytes
5215be1
16eaebe
8e8a46c
16eaebe
 
 
 
22b42e4
 
 
 
 
 
 
16eaebe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e8a46c
16eaebe
 
 
 
 
8e8a46c
16eaebe
 
 
 
18d6761
22b42e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5215be1
1575e7a
 
 
 
 
 
 
 
 
 
 
a20f23d
 
 
 
 
 
 
 
22b42e4
 
 
 
 
a20f23d
1575e7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22b42e4
 
 
 
 
 
 
 
 
 
 
 
1575e7a
5215be1
 
16eaebe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import gradio as gr
from transformers import pipeline
import requests
from bs4 import BeautifulSoup
import PyPDF2
import docx
import time
from langchain import OpenAI, ConversationChain, PromptTemplate
from dotenv import load_dotenv
import os

load_dotenv()  # Load environment variables from .env file
openai_api_key = os.getenv("openai_api_key")
llm = OpenAI(openai_api_key=openai_api_key)

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("sentiment-analysis")
topic_classifier = pipeline("zero-shot-classification")

def fetch_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    return " ".join(p.get_text() for p in soup.find_all("p"))

def extract_text_from_pdf(file):
    pdf_reader = PyPDF2.PdfReader(file)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def extract_text_from_docx(file):
    doc = docx.Document(file)
    text = ""
    for para in doc.paragraphs:
        text += para.text + "\n"
    return text

def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
    if input_type == "URL":
        progress(0, desc="Fetching text from URL")
        input_text = fetch_text_from_url(input_text)
    elif input_type == "File":
        progress(0, desc="Extracting text from file")
        if input_text.name.lower().endswith(".pdf"):
            input_text = extract_text_from_pdf(input_text)
        elif input_text.name.lower().endswith(".docx"):
            input_text = extract_text_from_docx(input_text)
        else:
            input_text = input_text.read().decode("utf-8")
    
    original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")
    
    summary, sentiment, topics = "", "", ""
    
    if "Summarization" in tasks:
        progress(0.3, desc="Generating summary")
        summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
        time.sleep(1)  # Add a minimal delay for demonstration purposes
    
    if "Sentiment Analysis" in tasks:
        progress(0.6, desc="Analyzing sentiment")
        sentiment = sentiment_analyzer(input_text[:512])[0]["label"]  # Truncate input for sentiment analysis
        time.sleep(1)
    
    if "Topic Detection" in tasks:
        progress(0.9, desc="Detecting topics")
        topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
        topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"]  # Truncate input for topic detection 
        time.sleep(1)
    
    progress(1, desc="Analysis completed")
    
    return original_text, summary, sentiment, ", ".join(topics)

def chat(input_text, chat_history):
    prompt_template = """
    Assistant is an AI language model that helps with text analysis tasks.

    {chat_history}
    Human: {input_text}
    Assistant:"""

    prompt = PromptTemplate(
        input_variables=["chat_history", "input_text"], 
        template=prompt_template
    )
    
    chain = ConversationChain(llm=llm, prompt=prompt)
    response = chain.predict(input_text=input_text)
    
    return response

def create_interface():
    with gr.Blocks(title="Text Analysis App") as interface:
        input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type")
        text_input = gr.Textbox(visible=False)
        url_input = gr.Textbox(visible=False)
        file_input = gr.File(visible=False)

        tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")

        submit_button = gr.Button("Analyze")
        progress_bar = gr.Progress()

        with gr.Tab("Original Text"):
            original_text_output = gr.Textbox(label="Original Text")
        with gr.Tab("Summary"):
            summary_output = gr.Textbox(label="Summary")
        with gr.Tab("Sentiment"):
            sentiment_output = gr.Textbox(label="Sentiment")
        with gr.Tab("Topics"):
            topics_output = gr.Textbox(label="Topics")
        with gr.Tab("Conversation"):
            conversation_history = gr.State([])
            conversation_input = gr.Textbox(label="Human")
            conversation_output = gr.Textbox(label="Assistant")
            conversation_button = gr.Button("Send")

        def update_input_visibility(input_type):
            text_input.visible = input_type == "Text"
            url_input.visible = input_type == "URL"
            file_input.visible = input_type == "File"

        input_type.change(update_input_visibility, inputs=input_type)

        def process_input(input_type, text, url, file, tasks):
            if input_type == "Text":
                input_value = text
            elif input_type == "URL":
                input_value = url
            else:
                input_value = file

            try:
                original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
            except Exception as e:
                original_text = f"Error: {str(e)}"
                summary, sentiment, topics = "", "", ""

            return original_text, summary, sentiment, topics

        submit_button.click(
            fn=process_input,
            inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
            outputs=[original_text_output, summary_output, sentiment_output, topics_output]
        )

        def process_conversation(conversation_history, conversation_input):
            conversation_history.append(f"Human: {conversation_input}")
            response = chat(conversation_input, "\n".join(conversation_history))
            conversation_history.append(f"Assistant: {response}")
            return conversation_history, "", response

        conversation_button.click(
            fn=process_conversation,
            inputs=[conversation_history, conversation_input],
            outputs=[conversation_history, conversation_input, conversation_output]
        )

    return interface

if __name__ == "__main__":
    create_interface().launch()