{ "cells": [ { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* Running on local URL: http://127.0.0.1:7870\n", "* Running on public URL: https://a94e18f722148a0463.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, TextClassificationPipeline\n", "import torch\n", "import gradio as gr\n", "from openpyxl import load_workbook\n", "from numpy import mean\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "theme = gr.themes.Soft(\n", " primary_hue=\"amber\",\n", " secondary_hue=\"amber\",\n", " neutral_hue=\"stone\",\n", ")\n", "\n", "# Load tokenizers and models\n", "tokenizer = AutoTokenizer.from_pretrained(\"suriya7/bart-finetuned-text-summarization\")\n", "model = AutoModelForSeq2SeqLM.from_pretrained(\"suriya7/bart-finetuned-text-summarization\")\n", "\n", "tokenizer_keywords = AutoTokenizer.from_pretrained(\"transformer3/H2-keywordextractor\")\n", "model_keywords = AutoModelForSeq2SeqLM.from_pretrained(\"transformer3/H2-keywordextractor\")\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "new_model = AutoModelForSequenceClassification.from_pretrained('roberta-rating')\n", "new_tokenizer = AutoTokenizer.from_pretrained('roberta-rating')\n", "\n", "classifier = TextClassificationPipeline(model=new_model, tokenizer=new_tokenizer, device=device)\n", "\n", "label_mapping = {1: '1/5', 2: '2/5', 3: '3/5', 4: '4/5', 5: '5/5'}\n", "\n", "# Function to display and filter the Excel workbook\n", "def filter_xl(file, keywords):\n", " # Load the workbook and convert it to a DataFrame\n", " workbook = load_workbook(filename=file)\n", " sheet = workbook.active\n", " data = sheet.values\n", " columns = next(data)[0:]\n", " df = pd.DataFrame(data, columns=columns)\n", " \n", " if keywords:\n", " keyword_list = keywords.split(',')\n", " for keyword in keyword_list:\n", " df = df[df.apply(lambda row: row.astype(str).str.contains(keyword.strip(), case=False).any(), axis=1)]\n", " \n", " return df\n", "\n", "# Function to calculate overall rating from filtered data\n", "def calculate_rating(filtered_df):\n", " reviews = filtered_df.to_numpy().flatten()\n", " ratings = []\n", " for review in reviews:\n", " if pd.notna(review):\n", " rating = int(classifier(review)[0]['label'].split('_')[1])\n", " ratings.append(rating)\n", " \n", " return round(mean(ratings), 2), ratings\n", "\n", "# Function to calculate results including summary, keywords, and sentiment\n", "def calculate_results(file, keywords):\n", " filtered_df = filter_xl(file, keywords)\n", " overall_rating, ratings = calculate_rating(filtered_df)\n", " \n", " # Summarize and extract keywords from the filtered reviews\n", " text = \" \".join(filtered_df.to_numpy().flatten())\n", " inputs = tokenizer([text], max_length=1024, truncation=True, return_tensors=\"pt\")\n", " summary_ids = model.generate(inputs[\"input_ids\"], num_beams=2, min_length=10, max_length=50)\n", " summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n", " summary = summary.replace(\"I\", \"They\").replace(\"my\", \"their\").replace(\"me\", \"them\")\n", "\n", " inputs_keywords = tokenizer_keywords([text], max_length=1024, truncation=True, return_tensors=\"pt\")\n", " summary_ids_keywords = model_keywords.generate(inputs_keywords[\"input_ids\"], num_beams=2, min_length=0, max_length=100)\n", " keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n", "\n", " # Determine overall sentiment\n", " sentiments = []\n", " for review in filtered_df.to_numpy().flatten():\n", " if pd.notna(review):\n", " sentiment = classifier(review)[0]['label']\n", " sentiment_label = \"Positive\" if sentiment == \"LABEL_4\" or sentiment == \"LABEL_5\" else \"Negative\" if sentiment == \"LABEL_1\" or sentiment == \"LABEL_2\" else \"Neutral\"\n", " sentiments.append(sentiment_label)\n", " \n", " overall_sentiment = \"Positive\" if sentiments.count(\"Positive\") > sentiments.count(\"Negative\") else \"Negative\" if sentiments.count(\"Negative\") > sentiments.count(\"Positive\") else \"Neutral\"\n", "\n", " return overall_rating, summary, keywords, overall_sentiment, ratings, sentiments\n", "\n", "# Function to analyze a single review\n", "def analyze_review(review):\n", " if not review.strip():\n", " return \"Error: No text provided\", \"Error: No text provided\", \"Error: No text provided\", \"Error: No text provided\"\n", " \n", " # Calculate rating\n", " rating = int(classifier(review)[0]['label'].split('_')[1])\n", " \n", " # Summarize review\n", " inputs = tokenizer([review], max_length=1024, truncation=True, return_tensors=\"pt\")\n", " summary_ids = model.generate(inputs[\"input_ids\"], num_beams=2, min_length=10, max_length=50)\n", " summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n", " summary = summary.replace(\"I\", \"he/she\").replace(\"my\", \"his/her\").replace(\"me\", \"him/her\")\n", "\n", " # Extract keywords\n", " inputs_keywords = tokenizer_keywords([review], max_length=1024, truncation=True, return_tensors=\"pt\")\n", " summary_ids_keywords = model_keywords.generate(inputs_keywords[\"input_ids\"], num_beams=2, min_length=0, max_length=100)\n", " keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n", "\n", " # Determine sentiment\n", " sentiment = classifier(review)[0]['label']\n", " sentiment_label = \"Positive\" if sentiment == \"LABEL_4\" or sentiment == \"LABEL_5\" else \"Negative\" if sentiment == \"LABEL_1\" or sentiment == \"LABEL_2\" else \"Neutral\"\n", "\n", " return rating, summary, keywords, sentiment_label\n", "\n", "# Function to count rows in the filtered DataFrame\n", "def count_rows(filtered_df):\n", " return len(filtered_df)\n", "\n", "# Function to plot ratings\n", "def plot_ratings(ratings):\n", " plt.figure(figsize=(10, 5))\n", " plt.hist(ratings, bins=range(1, 7), edgecolor='black', align='left')\n", " plt.xlabel('Rating')\n", " plt.ylabel('Frequency')\n", " plt.title('Distribution of Ratings')\n", " plt.xticks(range(1, 6))\n", " plt.grid(True)\n", " plt.savefig('ratings_distribution.png')\n", " return 'ratings_distribution.png'\n", "\n", "# Function to plot sentiments\n", "def plot_sentiments(sentiments):\n", " sentiment_counts = pd.Series(sentiments).value_counts()\n", " plt.figure(figsize=(10, 5))\n", " sentiment_counts.plot(kind='bar', color=['green', 'red', 'blue'])\n", " plt.xlabel('Sentiment')\n", " plt.ylabel('Frequency')\n", " plt.title('Distribution of Sentiments')\n", " plt.grid(True)\n", " plt.savefig('sentiments_distribution.png')\n", " return 'sentiments_distribution.png'\n", "\n", "# Gradio interface\n", "with gr.Blocks(theme=theme) as demo:\n", " gr.Markdown(\"

Feedback and Auditing Survey AI Analyzer


\")\n", " with gr.Tabs():\n", " with gr.TabItem(\"Upload and Filter\"):\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " excel_file = gr.File(label=\"Upload Excel File\")\n", " #excel_file = gr.File(label=\"Upload Excel File\", file_types=[\".xlsx\", \".xlsm\", \".xltx\", \".xltm\"])\n", " keywords_input = gr.Textbox(label=\"Filter by Keywords (comma-separated)\")\n", " display_button = gr.Button(\"Display and Filter Excel Data\")\n", " clear_button_upload = gr.Button(\"Clear\")\n", " row_count = gr.Textbox(label=\"Number of Rows\", interactive=False)\n", " with gr.Column(scale=3):\n", " filtered_data = gr.Dataframe(label=\"Filtered Excel Contents\")\n", " \n", " with gr.TabItem(\"Calculate Results\"):\n", " with gr.Row():\n", " with gr.Column():\n", " overall_rating = gr.Textbox(label=\"Overall Rating\")\n", " summary = gr.Textbox(label=\"Summary\")\n", " keywords_output = gr.Textbox(label=\"Keywords\")\n", " overall_sentiment = gr.Textbox(label=\"Overall Sentiment\")\n", " calculate_button = gr.Button(\"Calculate Results\")\n", " with gr.Column():\n", " ratings_graph = gr.Image(label=\"Ratings Distribution\")\n", " sentiments_graph = gr.Image(label=\"Sentiments Distribution\")\n", " calculate_graph_button = gr.Button(\"Calculate Graph Results\")\n", " \n", " with gr.TabItem(\"Testing Area / Write a Review\"):\n", " with gr.Row():\n", " with gr.Column(scale=2):\n", " review_input = gr.Textbox(label=\"Write your review here\")\n", " analyze_button = gr.Button(\"Analyze Review\")\n", " clear_button_review = gr.Button(\"Clear\")\n", " with gr.Column(scale=2):\n", " review_rating = gr.Textbox(label=\"Rating\")\n", " review_summary = gr.Textbox(label=\"Summary\")\n", " review_keywords = gr.Textbox(label=\"Keywords\")\n", " review_sentiment = gr.Textbox(label=\"Sentiment\")\n", "\n", " display_button.click(lambda file, keywords: (filter_xl(file, keywords), count_rows(filter_xl(file, keywords))), inputs=[excel_file, keywords_input], outputs=[filtered_data, row_count])\n", " calculate_graph_button.click(lambda file, keywords: (*calculate_results(file, keywords)[:4], plot_ratings(calculate_results(file, keywords)[4]), plot_sentiments(calculate_results(file, keywords)[5])), inputs=[excel_file, keywords_input], outputs=[overall_rating, summary, keywords_output, overall_sentiment, ratings_graph, sentiments_graph])\n", " calculate_button.click(lambda file, keywords: (*calculate_results(file, keywords)[:4], plot_ratings(calculate_results(file, keywords)[4])), inputs=[excel_file, keywords_input], outputs=[overall_rating, summary, keywords_output, overall_sentiment])\n", " analyze_button.click(analyze_review, inputs=review_input, outputs=[review_rating, review_summary, review_keywords, review_sentiment])\n", " clear_button_upload.click(lambda: (\"\"), outputs=[keywords_input])\n", " clear_button_review.click(lambda: (\"\", \"\", \"\", \"\", \"\"), outputs=[review_input, review_rating, review_summary, review_keywords, review_sentiment])\n", "\n", "demo.launch(share=True)" ] } ], "metadata": { "kernelspec": { "display_name": "SolutionsInPR", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 2 }