iukhan commited on
Commit
633a65b
·
verified ·
1 Parent(s): 1f91055

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -117
app.py CHANGED
@@ -1,125 +1,29 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- from wordcloud import WordCloud, STOPWORDS
5
- import matplotlib.pyplot as plt
6
  import PyPDF2
7
  from docx import Document
8
- import plotly.express as px
9
- import base64
10
- from io import BytesIO
11
-
12
- # Functions for file reading
13
- def read_txt(file):
14
- return file.getvalue().decode("utf-8")
15
-
16
- def read_docx(file):
17
- doc = Document(file)
18
- return " ".join([para.text for para in doc.paragraphs])
19
-
20
- def read_pdf(file):
21
- pdf = PyPDF2.PdfReader(file)
22
- return " ".join([page.extract_text() for page in pdf.pages])
23
-
24
- # Function to filter out stopwords
25
- def filter_stopwords(text, additional_stopwords=[]):
26
- words = text.split()
27
- all_stopwords = STOPWORDS.union(set(additional_stopwords))
28
- filtered_words = [word for word in words if word.lower() not in all_stopwords]
29
- return " ".join(filtered_words)
30
-
31
- # Function to create download link for plot
32
- def get_image_download_link(buffered, format_):
33
- image_base64 = base64.b64encode(buffered.getvalue()).decode()
34
- return f'<a href="data:image/{format_};base64,{image_base64}" download="wordcloud.{format_}">Download Plot as {format_}</a>'
35
-
36
- # Function to generate a download link for a DataFrame
37
- def get_table_download_link(df, filename, file_label):
38
- csv = df.to_csv(index=False)
39
- b64 = base64.b64encode(csv.encode()).decode()
40
- return f'<a href="data:file/csv;base64,{b64}" download="{filename}">{file_label}</a>'
41
-
42
- # Streamlit code
43
- st.title("Word Cloud Generator")
44
- st.subheader("📁 Upload a pdf, docx or text file to generate a word cloud")
45
-
46
- uploaded_file = st.file_uploader("Choose a file", type=["txt", "pdf", "docx"])
47
- st.set_option('deprecation.showPyplotGlobalUse', False)
48
-
49
- if uploaded_file:
50
- file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size}
51
- st.write(file_details)
52
-
53
- # Check the file type and read the file
54
- if uploaded_file.type == "text/plain":
55
- text = read_txt(uploaded_file)
56
- elif uploaded_file.type == "application/pdf":
57
- text = read_pdf(uploaded_file)
58
- elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
59
- text = read_docx(uploaded_file)
60
- else:
61
- st.error("File type not supported. Please upload a txt, pdf or docx file.")
62
- st.stop()
63
-
64
- # Generate word count table
65
- words = text.split()
66
- word_count = pd.DataFrame({'Word': words}).groupby('Word').size().reset_index(name='Count').sort_values('Count', ascending=False)
67
-
68
- # Sidebar: Checkbox and Multiselect box for stopwords
69
- use_standard_stopwords = st.sidebar.checkbox("Use standard stopwords?", True)
70
- top_words = word_count['Word'].head(50).tolist()
71
- additional_stopwords = st.sidebar.multiselect("Additional stopwords:", sorted(top_words))
72
-
73
- if use_standard_stopwords:
74
- all_stopwords = STOPWORDS.union(set(additional_stopwords))
75
- else:
76
- all_stopwords = set(additional_stopwords)
77
-
78
- text = filter_stopwords(text, all_stopwords)
79
-
80
- if text:
81
- # Word Cloud dimensions
82
- width = st.sidebar.slider("Select Word Cloud Width", 400, 2000, 1200, 50)
83
- height = st.sidebar.slider("Select Word Cloud Height", 200, 2000, 800, 50)
84
-
85
- # Generate wordcloud
86
- st.subheader("Generated Word Cloud")
87
- fig, ax = plt.subplots(figsize=(width/100, height/100)) # Convert pixels to inches for figsize
88
- wordcloud_img = WordCloud(width=width, height=height, background_color='white', max_words=200, contour_width=3, contour_color='steelblue').generate(text)
89
- ax.imshow(wordcloud_img, interpolation='bilinear')
90
- ax.axis('off')
91
-
92
- # Save plot functionality
93
- format_ = st.selectbox("Select file format to save the plot", ["png", "jpeg", "svg", "pdf"])
94
- resolution = st.slider("Select Resolution", 100, 500, 300, 50)
95
- # Generate word count table
96
- st.subheader("Word Count Table")
97
- words = text.split()
98
- word_count = pd.DataFrame({'Word': words}).groupby('Word').size().reset_index(name='Count').sort_values('Count', ascending=False)
99
- st.write(word_count)
100
- st.pyplot(fig)
101
- if st.button(f"Save as {format_}"):
102
- buffered = BytesIO()
103
- plt.savefig(buffered, format=format_, dpi=resolution)
104
- st.markdown(get_image_download_link(buffered, format_), unsafe_allow_html=True)
105
-
106
 
107
- # Word count table at the end
108
- st.sidebar.markdown("---")
109
- st.sidebar.subheader("Learn Advanced AI| Hope to Skill with Irfan Malik")
110
- # add a youtube video
111
- st.sidebar.video("https://www.youtube.com/watch?v=vCKF1vYVNTs&list=PLxf3-FrL8GzRALeq_9BtdQclN6SF4bTCG")
112
- st.sidebar.markdown("---")
113
- # add author name and info
114
- st.sidebar.markdown("Created by: [Virtual Student](https://github.com/dashboard)")
115
- st.sidebar.markdown("website:(https://xevenskills.com)")
116
 
 
117
 
 
 
 
118
 
 
 
 
 
119
 
120
- st.subheader("Word Count Table")
121
- st.write(word_count)
122
- # Provide download link for table
123
- if st.button('Download Word Count Table as CSV'):
124
- st.markdown(get_table_download_link(word_count, "word_count.csv", "Click Here to Download"), unsafe_allow_html=True)
125
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from io import BytesIO
3
+ from PIL import Image
 
 
4
  import PyPDF2
5
  from docx import Document
6
+ from wordcloud import WordCloud, STOPWORDS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ app = Flask(__name__)
 
 
 
 
 
 
 
 
9
 
10
+ ALLOWED_EXTENSIONS = {'txt', 'pdf', 'docx'}
11
 
12
+ def allowed_file(filename):
13
+ return '.' in filename and \
14
+ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
15
 
16
+ @app.route('/api/generate-wordcloud', methods=['POST'])
17
+ def generate_wordcloud():
18
+ if 'file' not in request.files:
19
+ return jsonify({'error': 'Missing file upload'}), 400
20
 
21
+ file = request.files['file']
22
+ if file.filename == '':
23
+ return jsonify({'error': 'No selected file'}), 400
 
 
24
 
25
+ if file and allowed_file(file.filename):
26
+ text = ''
27
+ if file.filename.endswith('.txt'):
28
+ text = file.read().decode('utf-8')
29
+ elif file.filename.