Spaces:

Sa-m
/

manifesto-explainer

Running

App Files Files Community

Sa-m commited on 9 days ago

Commit

bcf08f8

verified ·

1 Parent(s): c58aa0b

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -541

app.py CHANGED Viewed

@@ -1,624 +1,258 @@
-"""
-# MANIFESTO ANALYSIS
-"""
-##IMPORTING LIBRARIES
 import random
 import matplotlib.pyplot as plt
 import nltk
-from nltk.tokenize import word_tokenize,sent_tokenize
 from nltk.corpus import stopwords
-from nltk.stem.porter import PorterStemmer
 from nltk.stem import WordNetLemmatizer
-from nltk.corpus import stopwords
-from nltk.tokenize import word_tokenize
 from nltk.probability import FreqDist
 from cleantext import clean
 import textract
 import urllib.request
-import nltk.corpus
-from nltk.text import Text
-import io
-from io import StringIO,BytesIO
-import sys
 import pandas as pd
 import cv2
 import re
-from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
 from textblob import TextBlob
 from PIL import Image
 import os
 import gradio as gr
-from zipfile import ZipFile
-import contractions
-import unidecode
 import groq
 import json
-from dotenv import load_dotenv
-from sklearn.feature_extraction.text import TfidfVectorizer
-from collections import Counter
 import numpy as np
-# # Load environment variables from .env file
-# load_dotenv()
-nltk.download('punkt_tab')
-nltk.download('stopwords')
-nltk.download('punkt')
-nltk.download('wordnet')
-nltk.download('words')
-# Initialize Groq client for LLM capabilities
-try:
-    groq_api_key = GROQ_API_KEY
-    if groq_api_key:
-        groq_client = groq.Groq(api_key=groq_api_key)
-    else:
-        print("Warning: GROQ_API_KEY not found in environment variables. Summarization will be disabled.")
-        groq_client = None
-except Exception as e:
-    print(f"Error initializing Groq client: {e}")
-    groq_client = None
-"""## PARSING FILES"""
-#def Parsing(parsed_text):
-  #parsed_text=parsed_text.name
-  #raw_party =parser.from_file(parsed_text)
- # raw_party = raw_party['content'],cache_examples=True
-#  return clean(raw_party)
-def Parsing(parsed_text):
-  '''
-  Process a PDF file and extract its text content
-  parsed_text: Can be a file object with a 'name' attribute or a file path string
-  '''
-  try:
-    # Handle different input types
-    if hasattr(parsed_text, 'name'):
-      file_path = parsed_text.name
-    else:
-      file_path = parsed_text
-    # Extract text from PDF
-    raw_party = textract.process(file_path, encoding='ascii', method='pdfminer')
-    return clean(raw_party)
-  except Exception as e:
-    print(f"Error parsing PDF: {str(e)}")
-    return f"Error parsing PDF: {str(e)}"
-#Added more stopwords to avoid irrelevant terms
 stop_words = set(stopwords.words('english'))
 stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')
-"""## PREPROCESSING"""
 def clean_text(text):
-  '''
-  The function which returns clean text
-  '''
-  text = text.encode("ascii", errors="ignore").decode("ascii")  # remove non-asciicharacters
-  text=unidecode.unidecode(text)# diacritics remove
-  text=contractions.fix(text) # contraction fix
-  text = re.sub(r"\n", " ", text)
-  text = re.sub(r"\n\n", " ", text)
-  text = re.sub(r"\t", " ", text)
-  text = re.sub(r"/ ", " ", text)
-  text = text.strip(" ")
-  text = re.sub(" +", " ", text).strip()  # get rid of multiple spaces and replace with a single
-  text = [word for word in text.split() if word not in stop_words]
-  text = ' '.join(text)
-  return text
-# text_Party=clean_text(raw_party)
 def Preprocess(textParty):
-  '''
-  Removing special characters extra spaces
-  '''
-  text1Party = re.sub('[^A-Za-z0-9]+', ' ', textParty)
-  #Removing all stop words
-  pattern = re.compile(r'\b(' + r'|'.join(stopwords.words('english')) + r')\b\s*')
-  text2Party = pattern.sub('', text1Party)
-  # fdist_cong = FreqDist(word_tokens_cong)
-  return text2Party
-'''
-  Using Concordance, you can see each time a word is used, along with its
-  immediate context. It can give you a peek into how a word is being used
-  at the sentence level and what words are used with it
-'''
-def conc(text_Party,strng):
-  word_tokens_party = word_tokenize(text_Party)
-  moby = Text(word_tokens_party)
-  resultList = []
-  for i in range(0,1):
-      save_stdout = sys.stdout
-      result = StringIO()
-      sys.stdout = result
-      moby.concordance(strng,lines=4,width=82)
-      sys.stdout = save_stdout
-  s=result.getvalue().splitlines()
-  return result.getvalue()
-def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin = 10, right_margin = 10, numLins=4):
-    """
-        Function to get all the phases that contain the target word in a text/passage tar_passage.
-        Workaround to save the output given by nltk Concordance function
-        str target_word, str tar_passage int left_margin int right_margin --> list of str
-        left_margin and right_margin allocate the number of words/pununciation before and after target word
-        Left margin will take note of the beginning of the text
-    """
-    # Handle empty or None search terms
-    if not target_word or target_word.strip() == "":
-        return "Please enter a search term"
-    # Create list of tokens using nltk function
-    tokens = nltk.word_tokenize(tar_passage)
-    # Create the text of tokens
-    text = nltk.Text(tokens)
-    ## Collect all the index or offset position of the target word
-    c = nltk.ConcordanceIndex(text.tokens, key = lambda s: s.lower())
-    ## Collect the range of the words that is within the target word by using text.tokens[start;end].
-    ## The map function is use so that when the offset position - the target range < 0, it will be default to zero
-    concordance_txt = ([text.tokens[list(map(lambda x: x-5 if (x-left_margin)>0 else 0,[offset]))[0]:offset+right_margin] for offset in c.offsets(target_word)])
-    ## join the sentences for each of the target phrase and return it
-    result = [''.join([x.replace("Y","")+' ' for x in con_sub]) for con_sub in concordance_txt][:-1]
-    result=result[:numLins+1]
-    res='\n\n'.join(result)
-    return res
-def normalize(d, target=1.0):
-   raw = sum(d.values())
-   factor = target/raw
-   return {key:value*factor for key,value in d.items()}
-def generate_summary(text, max_length=1000):
-    """
-    Generate a summary of the manifesto text using Groq LLM
-    """
     if not groq_client:
         return "Summarization is not available. Please set up your GROQ_API_KEY in the .env file."
-    # Truncate text if it's too long to fit in context window
     if len(text) > 10000:
         text = text[:10000]
     try:
-        # Use Groq's LLaMA 3 model for summarization
         completion = groq_client.chat.completions.create(
-            model="llama3-8b-8192",  # Using LLaMA 3 8B model
             messages=[
                 {"role": "system", "content": "You are a helpful assistant that summarizes political manifestos. Provide a concise, objective summary that captures the key policy proposals, themes, and promises in the manifesto."},
                 {"role": "user", "content": f"Please summarize the following political manifesto text in about 300-500 words, focusing on the main policy areas, promises, and themes:\n\n{text}"}
             ],
-            temperature=0.3,  # Lower temperature for more focused output
-            max_tokens=800,   # Limit response length
         )
         return completion.choices[0].message.content
     except Exception as e:
-        return f"Error generating summary: {str(e)}. Please check your API key and connection."
 def fDistance(text2Party):
-  '''
-  Most frequent words search using TF-IDF to find more relevant words
-  '''
-  # Traditional frequency distribution
-  word_tokens_party = word_tokenize(text2Party) #Tokenizing
-  fdistance = FreqDist(word_tokens_party).most_common(10)
-  mem={}
-  for x in fdistance:
-    mem[x[0]]=x[1]
-  # Enhanced with TF-IDF for better relevance
-  sentences = sent_tokenize(text2Party)
-  # Use TF-IDF to find more relevant words
-  vectorizer = TfidfVectorizer(max_features=15, stop_words='english')
-  tfidf_matrix = vectorizer.fit_transform(sentences)
-  # Get feature names (words)
-  feature_names = vectorizer.get_feature_names_out()
-  # Calculate average TF-IDF score for each word across all sentences
-  tfidf_scores = {}
-  for i, word in enumerate(feature_names):
-      scores = [tfidf_matrix[j, i] for j in range(len(sentences)) if i < tfidf_matrix[j].shape[1]]
-      if scores:
-          tfidf_scores[word] = sum(scores) / len(scores)
-  # Sort by score and get top words
-  sorted_tfidf = dict(sorted(tfidf_scores.items(), key=lambda x: x[1], reverse=True)[:10])
-  # Combine traditional frequency with TF-IDF for better results
-  combined_scores = {}
-  for word in set(list(mem.keys()) + list(sorted_tfidf.keys())):
-      # Normalize and combine both scores (with more weight to TF-IDF)
-      freq_score = mem.get(word, 0) / max(mem.values()) if mem else 0
-      tfidf_score = sorted_tfidf.get(word, 0) / max(sorted_tfidf.values()) if sorted_tfidf else 0
-      combined_scores[word] = (freq_score * 0.3) + (tfidf_score * 0.7)  # Weight TF-IDF higher
-  # Get top 10 words by combined score
-  top_words = dict(sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:10])
-  return normalize(top_words)
-def fDistancePlot(text2Party,plotN=15):
-  '''
-  Most Frequent Words Visualization
-  '''
-  word_tokens_party = word_tokenize(text2Party) #Tokenizing
-  fdistance = FreqDist(word_tokens_party)
-  plt.title('Frequency Distribution')
-  plt.axis('off')
-  plt.figure(figsize=(4,3))
-  fdistance.plot(plotN)
-  plt.tight_layout()
-  buf = BytesIO()
-  plt.savefig(buf)
-  buf.seek(0)
-  img1 = np.array(Image.open(buf))
-  plt.clf()
-  return img1
-def DispersionPlot(textParty):
-  '''
-  Dispersion PLot
-  '''
-  word_tokens_party = word_tokenize(textParty) #Tokenizing
-  moby = Text(word_tokens_party)
-  fdistance = FreqDist(word_tokens_party)
-  word_Lst=[]
-  for x in range(5):
-    word_Lst.append(fdistance.most_common(6)[x][0])
-  plt.axis('off')
-  plt.figure(figsize=(4,3))
-  plt.title('Dispersion Plot')
-  moby.dispersion_plot(word_Lst)
-  plt.plot(color="#EF6D6D")
-  plt.tight_layout()
-  buf = BytesIO()
-  plt.savefig(buf)
-  buf.seek(0)
-  img = Image.open(buf)
-  plt.clf()
-  return img
-def getSubjectivity(text):
-  '''
-  Create a function to get the polarity
-  '''
-  return TextBlob(text).sentiment.subjectivity
-def getPolarity(text):
-  '''
-  Create a function to get the polarity
-  '''
-  return  TextBlob(text).sentiment.polarity
-def getAnalysis(score):
-  if score < 0:
-    return 'Negative'
-  elif score == 0:
-    return 'Neutral'
-  else:
-    return 'Positive'
-def Original_Image(path):
-  img= cv2.imread(path)
-  img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-  return img
-def Image_Processed(path):
-  '''
-  Reading the image file
-  '''
-  img= cv2.imread(path)
-  img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-  #Thresholding
-  ret, bw_img = cv2.threshold(img, 124, 255, cv2.THRESH_BINARY)
-  return bw_img
-def word_cloud(orgIm,mask_img,text_Party_pr,maxWord=2000,colorGener=True,
-    contCol='white',bckColor='white'):
-  '''
-  #Generating word cloud
-  '''
-  mask =mask_img
-  # Create and generate a word cloud image:
-  wordcloud = WordCloud(max_words=maxWord, background_color=bckColor,
-                        mask=mask,
-                        colormap='nipy_spectral_r',
-                        contour_color=contCol,
-                        width=800, height=800,
-                        margin=2,
-                        contour_width=3).generate(text_Party_pr)
-  # create coloring from image
-  plt.axis("off")
-  if colorGener==True:
-    image_colors = ImageColorGenerator(orgIm)
-    plt.imshow(wordcloud.recolor(color_func= image_colors),interpolation="bilinear")
-  else:
-    plt.imshow(wordcloud)
-def word_cloud_generator(parsed_text_name,text_Party):
-  parsed=parsed_text_name.lower()
-  if 'bjp' in parsed:
-    orgImg=Original_Image('bjpImg2.jpeg')
-    bwImg=Image_Processed('bjpImg2.jpeg')
-    plt.figure(figsize=(6,5))
-    word_cloud(orgImg,bwImg,text_Party,maxWord=3000,colorGener=True,
-    contCol='white', bckColor='black')
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf)
-    buf.seek(0)
-    img1 = Image.open(buf)
-    plt.clf()
-    return img1
-  elif 'congress' in parsed:
-    orgImg=Original_Image('congress3.jpeg')
-    bwImg=Image_Processed('congress3.jpeg')
-    plt.figure(figsize=(5,4))
-    word_cloud(orgImg,bwImg,text_Party,maxWord=3000,colorGener=True)
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf)
-    buf.seek(0)
-    img2 = Image.open(buf)
-    plt.clf()
-    return img2
-    #congrsMain.jpg
-  elif 'aap' in parsed:
-    orgImg=Original_Image('aapMain2.jpg')
-    bwImg=Image_Processed('aapMain2.jpg')
-    plt.figure(figsize=(5,4))
-    word_cloud(orgImg,bwImg,text_Party,maxWord=3000,colorGener=False,contCol='black')
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf)
-    buf.seek(0)
-    img3 = Image.open(buf)
-    plt.clf()
-    return img3
-  else :
-    wordcloud = WordCloud(max_words=2000, background_color="white",mode="RGB").generate(text_Party)
-    plt.figure(figsize=(5,5))
-    plt.imshow(wordcloud, interpolation="bilinear")
-    plt.axis("off")
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf)
-    buf.seek(0)
-    img4 = Image.open(buf)
-    plt.clf()
-    return img4
-'''
-url = "http://library.bjp.org/jspui/bitstream/123456789/2988/1/BJP-Election-english-2019.pdf"
-path_input = "./Bjp_Manifesto_2019.pdf"
-urllib.request.urlretrieve(url, filename=path_input)
-url="https://drive.google.com/uc?id=1BLCiy_BWilfVdrUH8kbO-44DJevwO5CG&export=download"
-path_input = "./Aap_Manifesto_2019.pdf"
-urllib.request.urlretrieve(url, filename=path_input)
-url="https://drive.google.com/uc?id=1HVZvTtYntl0YKLnE0cwu0CvAIRhXOv60&export=download"
-path_input = "./Congress_Manifesto_2019.pdf"
-urllib.request.urlretrieve(url, filename=path_input)
-'''
-def analysis(Manifesto, Search):
-  '''
-  Main analysis function that processes the manifesto and generates all outputs
-  Manifesto: PDF file uploaded by the user
-  Search: Search term entered by the user
-  '''
-  try:
-    print(f"Analysis function called with: Manifesto={Manifesto}, Search={Search}")
-    # Check if a file was uploaded
-    if Manifesto is None:
-      print("No file uploaded")
-      return "Please upload a PDF file", {}, None, None, None, None, None, "No file uploaded"
-    # Handle empty search term
-    if Search is None or Search.strip() == "":
-      Search = "government"  # Default search term
-      print(f"Using default search term: {Search}")
-    else:
-      print(f"Using provided search term: {Search}")
-    # Process the uploaded PDF
-    print(f"Processing file: {Manifesto.name if hasattr(Manifesto, 'name') else Manifesto}")
-    raw_party = Parsing(Manifesto)
-    # Check if parsing was successful
-    if isinstance(raw_party, str) and raw_party.startswith("Error"):
-      print(f"Parsing error: {raw_party}")
-      return raw_party, {}, None, None, None, None, None, "Error generating summary due to parsing failure"
-    print("Parsing successful, cleaning text...")
-    text_Party = clean_text(raw_party)
-    text_Party_processed = Preprocess(text_Party)
-    # Generate summary using LLM
-    print("Generating summary...")
-    summary = generate_summary(raw_party)
-    # Sentiment analysis
-    print("Performing sentiment analysis...")
-    df = pd.DataFrame(raw_party.split('\n'), columns=['Content'])
-    df['Subjectivity'] = df['Content'].apply(getSubjectivity)
-    df['Polarity'] = df['Content'].apply(getPolarity)
-    df['Analysis on Polarity'] = df['Polarity'].apply(getAnalysis)
-    df['Analysis on Subjectivity'] = df['Subjectivity'].apply(getAnalysis)
-    # Generate sentiment analysis plot
-    print("Generating sentiment analysis plot...")
-    plt.title('Sentiment Analysis')
-    plt.xlabel('Sentiment')
-    plt.ylabel('Counts')
-    plt.figure(figsize=(4,3))
-    df['Analysis on Polarity'].value_counts().plot(kind ='bar',color="#FF9F45")
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf)
-    buf.seek(0)
-    img1 = Image.open(buf)
-    plt.clf()
-    # Generate subjectivity analysis plot
-    print("Generating subjectivity analysis plot...")
-    plt.figure(figsize=(4,3))
-    df['Analysis on Subjectivity'].value_counts().plot(kind ='bar',color="#B667F1")
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf)
-    buf.seek(0)
-    img2 = Image.open(buf)
-    plt.clf()
-    # Generate word cloud
-    print("Generating word cloud...")
-    img3 = word_cloud_generator(Manifesto.name, text_Party_processed)
-    # Generate frequency distribution and dispersion plots
-    print("Generating frequency distribution...")
-    fdist_Party = fDistance(text_Party_processed)
-    img4 = fDistancePlot(text_Party_processed)
-    print("Generating dispersion plot...")
-    img5 = DispersionPlot(text_Party_processed)
-    # Search for the term in the text
-    print(f"Searching for term: {Search}")
-    searChRes = get_all_phases_containing_tar_wrd(Search, text_Party_processed)
-    searChRes = searChRes.replace(Search, "\u0332".join(Search))
-    plt.close('all')
-    print("Analysis completed successfully")
-    return searChRes, fdist_Party, img1, img2, img3, img4, img5, summary
-  except Exception as e:
-    error_message = f"Error analyzing manifesto: {str(e)}"
-    print(error_message)
-    import traceback
-    traceback.print_exc()
-    # Return placeholder values in case of error
-    return error_message, {}, None, None, None, None, None, "Error generating summary. Please check the console for details."
-Search_txt= "text"
 filePdf = "file"
-text = gr.Textbox(label='Context Based Search')
-mfw=gr.Label(label="Most Relevant Topics (LLM Enhanced)")
-plot1=gr.Image(label='Sentiment Analysis')
-plot2=gr.Image(label='Subjectivity Analysis')
-plot3=gr.Image(label='Word Cloud')
-plot4=gr.Image(label='Frequency Distribution')
-plot5=gr.Image(label='Dispersion Plot')
-summary_output = gr.Textbox(label='AI-Generated Summary', lines=10)
 with gr.Blocks(title='Manifesto Analysis') as demo:
     gr.Markdown("# Manifesto Analysis with LLM Enhancement")
-    gr.Markdown("### Analyze political manifestos with advanced NLP and LLM techniques")
     with gr.Row():
-        with gr.Column(scale=1):
             file_input = gr.File(label="Upload Manifesto PDF", file_types=[".pdf"])
             search_input = gr.Textbox(label="Search Term", placeholder="Enter a term to search in the manifesto")
             submit_btn = gr.Button("Analyze Manifesto")
     with gr.Tabs():
-        with gr.TabItem("Summary"):
-            summary_output
-        with gr.TabItem("Search Results"):
-            text
-        with gr.TabItem("Key Topics"):
-            mfw
         with gr.TabItem("Visualizations"):
             with gr.Row():
-                with gr.Column(scale=1):
-                    plot3
-                with gr.Column(scale=1):
-                    plot4
-            with gr.Row():
-                with gr.Column(scale=1):
-                    plot1
-                with gr.Column(scale=1):
-                    plot2
             with gr.Row():
-                plot5
     submit_btn.click(
         fn=analysis,
         inputs=[file_input, search_input],
-        outputs=[text, mfw, plot1, plot2, plot3, plot4, plot5, summary_output]
     )
-    # Add a debug print to verify the button is connected
-    print("Button connected to analysis function")
     gr.Examples(
         examples=[
-            ['Example/AAP_Manifesto_2019.pdf', 'government'],
-            ['Example/Bjp_Manifesto_2019.pdf', 'environment'],
-            ['Example/Congress_Manifesto_2019.pdf', 'safety']
         ],
         inputs=[file_input, search_input]
     )
-demo.launch(debug=True, share=False, show_error=True)
-# Old interface code replaced by the Blocks implementation above
-# io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['Example/AAP_Manifesto_2019.pdf','government'],['Example/Bjp_Manifesto_2019.pdf','environment'],['Example/Congress_Manifesto_2019.pdf','safety']])
-# io.launch(debug=True,share=False)

 import random
 import matplotlib.pyplot as plt
 import nltk
+from nltk.tokenize import word_tokenize, sent_tokenize
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 from nltk.probability import FreqDist
 from cleantext import clean
 import textract
 import urllib.request
+from io import BytesIO
+import sys
 import pandas as pd
 import cv2
 import re
+from wordcloud import WordCloud, ImageColorGenerator
 from textblob import TextBlob
 from PIL import Image
 import os
 import gradio as gr
+from dotenv import load_dotenv
 import groq
 import json
+import traceback
 import numpy as np
+# Load environment variables
+load_dotenv()
+# Download NLTK resources
+nltk.download(['punkt', 'stopwords', 'wordnet', 'words'])
+# Initialize Groq client
+groq_api_key = GROQ_API_KEY
+groq_client = groq.Groq(api_key=groq_api_key) if groq_api_key else None
+# Stopwords customization
 stop_words = set(stopwords.words('english'))
 stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')
+# --- Parsing & Preprocessing Functions ---
+def Parsing(parsed_text):
+    try:
+        if hasattr(parsed_text, 'name'):
+            file_path = parsed_text.name
+        else:
+            file_path = parsed_text
+        raw_party = textract.process(file_path, encoding='ascii', method='pdfminer')
+        return clean(raw_party)
+    except Exception as e:
+        print(f"Error parsing PDF: {e}")
+        return f"Error parsing PDF: {e}"
 def clean_text(text):
+    text = text.encode("ascii", errors="ignore").decode("ascii")
+    text = unidecode.unidecode(text)
+    text = contractions.fix(text)
+    text = re.sub(r"\n", " ", text)
+    text = re.sub(r"\t", " ", text)
+    text = re.sub(r"/ ", " ", text)
+    text = text.strip()
+    text = re.sub(" +", " ", text).strip()
+    text = [word for word in text.split() if word not in stop_words]
+    return ' '.join(text)
 def Preprocess(textParty):
+    text1Party = re.sub('[^A-Za-z0-9]+', ' ', textParty)
+    pattern = re.compile(r'\b(' + r'|'.join(stopwords.words('english')) + r')\b\s*')
+    text2Party = pattern.sub('', text1Party)
+    return text2Party
+# --- Core Analysis Functions ---
+def generate_summary(text):
     if not groq_client:
         return "Summarization is not available. Please set up your GROQ_API_KEY in the .env file."
     if len(text) > 10000:
         text = text[:10000]
     try:
         completion = groq_client.chat.completions.create(
+            model="llama3-8b-8192",
             messages=[
                 {"role": "system", "content": "You are a helpful assistant that summarizes political manifestos. Provide a concise, objective summary that captures the key policy proposals, themes, and promises in the manifesto."},
                 {"role": "user", "content": f"Please summarize the following political manifesto text in about 300-500 words, focusing on the main policy areas, promises, and themes:\n\n{text}"}
             ],
+            temperature=0.3,
+            max_tokens=800
         )
         return completion.choices[0].message.content
     except Exception as e:
+        return f"Error generating summary: {str(e)}"
 def fDistance(text2Party):
+    word_tokens_party = word_tokenize(text2Party)
+    fdistance = FreqDist(word_tokens_party).most_common(10)
+    mem = {x[0]: x[1] for x in fdistance}
+    vectorizer = TfidfVectorizer(max_features=15, stop_words='english')
+    tfidf_matrix = vectorizer.fit_transform(sent_tokenize(text2Party))
+    feature_names = vectorizer.get_feature_names_out()
+    tfidf_scores = {}
+    for i, word in enumerate(feature_names):
+        scores = [tfidf_matrix[j, i] for j in range(len(sent_tokenize(text2Party))) if i < tfidf_matrix[j].shape[1]]
+        if scores:
+            tfidf_scores[word] = sum(scores) / len(scores)
+    combined_scores = {}
+    for word in set(list(mem.keys()) + list(tfidf_scores.keys())):
+        freq_score = mem.get(word, 0) / max(mem.values()) if mem else 0
+        tfidf_score = tfidf_scores.get(word, 0) / max(tfidf_scores.values()) if tfidf_scores else 0
+        combined_scores[word] = (freq_score * 0.3) + (tfidf_score * 0.7)
+    top_words = dict(sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:10])
+    return normalize(top_words)
+def normalize(d, target=1.0):
+    raw = sum(d.values())
+    factor = target / raw if raw != 0 else 0
+    return {key: value * factor for key, value in d.items()}
+# --- Visualization Functions with Error Handling ---
+def safe_plot(func, *args, **kwargs):
+    try:
+        plt.clf()
+        func(*args, **kwargs)
+        buf = BytesIO()
+        plt.savefig(buf, format='png')
+        buf.seek(0)
+        return Image.open(buf)
+    except Exception as e:
+        print(f"Plotting error: {e}")
+        return None
+def fDistancePlot(text2Party):
+    return safe_plot(lambda: FreqDist(word_tokenize(text2Party)).plot(15, title='Frequency Distribution'))
+def DispersionPlot(textParty):
+    tokens = word_tokenize(textParty)
+    moby = Text(tokens)
+    common_words = [item[0] for item in FreqDist(tokens).most_common(5)]
+    return safe_plot(lambda: moby.dispersion_plot(common_words, title='Dispersion Plot'))
+def word_cloud_generator(parsed_text_name, text_Party):
+    try:
+        parsed = parsed_text_name.lower()
+        if 'bjp' in parsed:
+            mask_path = 'bjpImg2.jpeg'
+        elif 'congress' in parsed:
+            mask_path = 'congress3.jpeg'
+        elif 'aap' in parsed:
+            mask_path = 'aapMain2.jpg'
+        else:
+            mask_path = None
+        if mask_path and os.path.exists(mask_path):
+            orgImg = Image.open(mask_path)
+            mask = np.array(orgImg)
+            wordcloud = WordCloud(max_words=3000, mask=mask).generate(text_Party)
+            plt.imshow(wordcloud)
+        else:
+            wordcloud = WordCloud(max_words=2000).generate(text_Party)
+            plt.imshow(wordcloud)
+        plt.axis("off")
+        buf = BytesIO()
+        plt.savefig(buf, format='png')
+        buf.seek(0)
+        return Image.open(buf)
+    except Exception as e:
+        print(f"Word cloud error: {e}")
+        return None
+# --- Main Analysis Function ---
+def analysis(Manifesto, Search):
+    try:
+        if Manifesto is None:
+            return "No file uploaded", {}, None, None, None, None, None, "No file uploaded"
+        if Search.strip() == "":
+            Search = "government"
+        raw_party = Parsing(Manifesto)
+        if isinstance(raw_party, str) and raw_party.startswith("Error"):
+            return raw_party, {}, None, None, None, None, None, "Parsing failed"
+        text_Party = clean_text(raw_party)
+        text_Party_processed = Preprocess(text_Party)
+        summary = generate_summary(raw_party)
+        df = pd.DataFrame([{'Content': text_Party_processed}], columns=['Content'])
+        df['Subjectivity'] = df['Content'].apply(lambda x: TextBlob(x).sentiment.subjectivity)
+        df['Polarity'] = df['Content'].apply(lambda x: TextBlob(x).sentiment.polarity)
+        df['Polarity_Label'] = df['Polarity'].apply(lambda x: 'Positive' if x > 0 else 'Negative' if x < 0 else 'Neutral')
+        df['Subjectivity_Label'] = df['Subjectivity'].apply(lambda x: 'High' if x > 0.5 else 'Low')
+        # Generate Plots with Safe Plotting
+        sentiment_plot = safe_plot(lambda: df['Polarity_Label'].value_counts().plot(kind='bar', color="#FF9F45", title='Sentiment Analysis'))
+        subjectivity_plot = safe_plot(lambda: df['Subjectivity_Label'].value_counts().plot(kind='bar', color="#B667F1", title='Subjectivity Analysis'))
+        freq_plot = fDistancePlot(text_Party_processed)
+        dispersion_plot = DispersionPlot(text_Party_processed)
+        wordcloud = word_cloud_generator(Manifesto.name, text_Party_processed)
+        fdist_Party = fDistance(text_Party_processed)
+        searChRes = get_all_phases_containing_tar_wrd(Search, text_Party_processed)
+        return searChRes, fdist_Party, sentiment_plot, subjectivity_plot, wordcloud, freq_plot, dispersion_plot, summary
+    except Exception as e:
+        error_msg = f"Critical error: {str(e)}"
+        print(error_msg)
+        traceback.print_exc()
+        return error_msg, {}, None, None, None, None, None, "Analysis failed"
+# --- Gradio Interface ---
+Search_txt = "text"
 filePdf = "file"
 with gr.Blocks(title='Manifesto Analysis') as demo:
     gr.Markdown("# Manifesto Analysis with LLM Enhancement")
     with gr.Row():
+        with gr.Column():
             file_input = gr.File(label="Upload Manifesto PDF", file_types=[".pdf"])
             search_input = gr.Textbox(label="Search Term", placeholder="Enter a term to search in the manifesto")
             submit_btn = gr.Button("Analyze Manifesto")
     with gr.Tabs():
+        with gr.TabItem("Summary"): gr.Textbox(label='AI-Generated Summary', lines=10)
+        with gr.TabItem("Search Results"): gr.Textbox(label='Context Based Search')
+        with gr.TabItem("Key Topics"): gr.Label(label="Most Relevant Topics (LLM Enhanced)")
         with gr.TabItem("Visualizations"):
             with gr.Row():
+                gr.Image(label='Sentiment Analysis'), gr.Image(label='Subjectivity Analysis')
             with gr.Row():
+                gr.Image(label='Word Cloud'), gr.Image(label='Frequency Distribution')
+            gr.Image(label='Dispersion Plot')
     submit_btn.click(
         fn=analysis,
         inputs=[file_input, search_input],
+        outputs=[
+            gr.Textbox(label='Context Based Search'),
+            gr.Label(label="Most Relevant Topics (LLM Enhanced)"),
+            gr.Image(label='Sentiment Analysis'),
+            gr.Image(label='Subjectivity Analysis'),
+            gr.Image(label='Word Cloud'),
+            gr.Image(label='Frequency Distribution'),
+            gr.Image(label='Dispersion Plot'),
+            gr.Textbox(label='AI-Generated Summary', lines=10)
+        ]
     )
     gr.Examples(
         examples=[
+            ["Example/AAP_Manifesto_2019.pdf", "government"],
+            ["Example/Bjp_Manifesto_2019.pdf", "environment"],
+            ["Example/Congress_Manifesto_2019.pdf", "safety"]
         ],
         inputs=[file_input, search_input]
     )
+demo.launch(debug=True, share=False, show_error=True)