Spaces:
Sleeping
Sleeping
HuuHuy227
commited on
Commit
·
d6d5bda
1
Parent(s):
7b9f840
new-modified
Browse files- Dockerfile +1 -4
- app.py +177 -227
- requirements.txt +5 -5
- utils.py +0 -133
Dockerfile
CHANGED
@@ -6,15 +6,12 @@ WORKDIR /app
|
|
6 |
|
7 |
# Install system dependencies for cairosvg
|
8 |
RUN apt-get update && apt-get install -y \
|
|
|
9 |
build-essential \
|
10 |
python3-dev \
|
11 |
python3-pip \
|
12 |
python3-setuptools \
|
13 |
-
libcairo2-dev \
|
14 |
pkg-config \
|
15 |
-
libcairo2 \
|
16 |
-
libcairo-gobject2 \
|
17 |
-
python3-cairo \
|
18 |
libpango1.0-dev \
|
19 |
shared-mime-info \
|
20 |
mime-support \
|
|
|
6 |
|
7 |
# Install system dependencies for cairosvg
|
8 |
RUN apt-get update && apt-get install -y \
|
9 |
+
graphviz \
|
10 |
build-essential \
|
11 |
python3-dev \
|
12 |
python3-pip \
|
13 |
python3-setuptools \
|
|
|
14 |
pkg-config \
|
|
|
|
|
|
|
15 |
libpango1.0-dev \
|
16 |
shared-mime-info \
|
17 |
mime-support \
|
app.py
CHANGED
@@ -1,246 +1,196 @@
|
|
1 |
import streamlit as st
|
2 |
import spacy
|
3 |
-
|
4 |
import pandas as pd
|
5 |
-
from collections import Counter
|
6 |
-
import plotly.express as px
|
7 |
-
from utils import analyze_text
|
8 |
-
from utils import svg_to_png
|
9 |
import base64
|
|
|
|
|
10 |
|
11 |
-
#
|
12 |
-
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
"
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
1. Enter your text in the input box
|
39 |
-
2. Click "Analyze Text" to see:
|
40 |
-
- Sentence structure visualization
|
41 |
-
- Detailed token analysis
|
42 |
-
- Additional analysis in expandable sections
|
43 |
-
3. Use mouse wheel or buttons to zoom the visualization
|
44 |
-
4. Click and drag to pan around
|
45 |
-
""")
|
46 |
-
|
47 |
-
if analyze_button:
|
48 |
-
if text_input:
|
49 |
-
tokens, entities, noun_chunks, stats, doc = analyze_text(nlp, text_input)
|
50 |
-
|
51 |
-
# 1. Dependency Parse with improved visualization
|
52 |
-
st.header("Sentence Structure Analysis")
|
53 |
-
|
54 |
-
# Generate sentence visualizations
|
55 |
-
sentences = list(doc.sents)
|
56 |
-
sentence_htmls = []
|
57 |
-
for sent in sentences:
|
58 |
-
sent_html = displacy.render(sent, style="dep", options={
|
59 |
-
"distance": 120,
|
60 |
-
"arrow_stroke": 2,
|
61 |
-
"arrow_width": 8,
|
62 |
-
"font": "Arial",
|
63 |
-
"bg": "#ffffff",
|
64 |
})
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
-
#
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
png_b64 = base64.b64encode(png_bytes).decode()
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
transform-origin: 0 0;
|
92 |
-
transition: transform 0.1s;
|
93 |
-
}
|
94 |
-
.download-btn {
|
95 |
-
position: absolute;
|
96 |
-
right: 10px;
|
97 |
-
top: 10px;
|
98 |
-
background: rgba(255, 255, 255, 0.8);
|
99 |
-
border: 1px solid #ddd;
|
100 |
-
border-radius: 4px;
|
101 |
-
padding: 5px 10px;
|
102 |
-
cursor: pointer;
|
103 |
-
}
|
104 |
-
.download-btn:hover {
|
105 |
-
background: white;
|
106 |
-
}
|
107 |
-
</style>
|
108 |
-
""", unsafe_allow_html=True)
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
// Zoom functionality
|
131 |
-
container.addEventListener('wheel', (e) => {{
|
132 |
-
e.preventDefault();
|
133 |
-
const rect = container.getBoundingClientRect();
|
134 |
-
const mouseX = e.clientX - rect.left;
|
135 |
-
const mouseY = e.clientY - rect.top;
|
136 |
-
|
137 |
-
const delta = e.deltaY * -0.01;
|
138 |
-
const newScale = Math.max(1, Math.min(scale + delta, 4));
|
139 |
-
const scaleChange = newScale / scale;
|
140 |
-
|
141 |
-
translateX = mouseX - (mouseX - translateX) * scaleChange;
|
142 |
-
translateY = mouseY - (mouseY - translateY) * scaleChange;
|
143 |
-
|
144 |
-
scale = newScale;
|
145 |
-
updateTransform();
|
146 |
-
}});
|
147 |
-
|
148 |
-
// Pan functionality
|
149 |
-
container.addEventListener('mousedown', (e) => {{
|
150 |
-
isPanning = true;
|
151 |
-
startX = e.clientX - translateX;
|
152 |
-
startY = e.clientY - translateY;
|
153 |
-
container.style.cursor = 'grabbing';
|
154 |
-
}});
|
155 |
-
|
156 |
-
container.addEventListener('mousemove', (e) => {{
|
157 |
-
if (!isPanning) return;
|
158 |
-
translateX = e.clientX - startX;
|
159 |
-
translateY = e.clientY - startY;
|
160 |
-
updateTransform();
|
161 |
-
}});
|
162 |
-
|
163 |
-
container.addEventListener('mouseup', () => {{
|
164 |
-
isPanning = false;
|
165 |
-
container.style.cursor = 'grab';
|
166 |
-
}});
|
167 |
-
|
168 |
-
container.addEventListener('mouseleave', () => {{
|
169 |
-
isPanning = false;
|
170 |
-
container.style.cursor = 'grab';
|
171 |
-
}});
|
172 |
-
|
173 |
-
function updateTransform() {{
|
174 |
-
img.style.transform = `translate(${{translateX}}px, ${{translateY}}px) scale(${{scale}})`;
|
175 |
-
}}
|
176 |
-
|
177 |
-
// Initialize
|
178 |
-
container.style.cursor = 'grab';
|
179 |
-
container.style.height = '500px';
|
180 |
-
</script>
|
181 |
-
"""
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
193 |
|
194 |
-
#
|
195 |
-
|
196 |
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
|
208 |
-
|
209 |
-
st.
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
|
216 |
-
#
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
labels={'x': 'Entity Type', 'y': 'Count'}
|
223 |
-
)
|
224 |
-
st.plotly_chart(fig)
|
225 |
|
226 |
-
|
227 |
-
|
228 |
-
st.
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
st.info("No noun chunks found in the text.")
|
235 |
-
|
236 |
-
with st.expander("Text Statistics"):
|
237 |
-
col1, col2, col3 = st.columns(3)
|
238 |
-
with col1:
|
239 |
-
st.metric("Word Count", stats['Word Count'])
|
240 |
-
with col2:
|
241 |
-
st.metric("Sentence Count", stats['Sentence Count'])
|
242 |
-
with col3:
|
243 |
-
st.metric("Unique Words", stats['Unique Words'])
|
244 |
|
245 |
-
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import spacy
|
3 |
+
import graphviz
|
4 |
import pandas as pd
|
|
|
|
|
|
|
|
|
5 |
import base64
|
6 |
+
import shutil
|
7 |
+
import subprocess
|
8 |
|
9 |
+
# Load English language model for spaCy
|
10 |
+
nlp = spacy.load('en_core_web_md')
|
11 |
|
12 |
+
def check_graphviz_installation():
|
13 |
+
"""
|
14 |
+
Check if Graphviz is installed and accessible
|
15 |
+
"""
|
16 |
+
if shutil.which('dot') is None:
|
17 |
+
return False
|
18 |
+
try:
|
19 |
+
subprocess.run(['dot', '-V'], capture_output=True, check=True)
|
20 |
+
return True
|
21 |
+
except (subprocess.SubprocessError, OSError):
|
22 |
+
return False
|
23 |
|
24 |
+
def identify_clauses(doc):
|
25 |
+
"""
|
26 |
+
Identify clauses in the sentence using spaCy, correctly separating dependent and independent clauses
|
27 |
+
"""
|
28 |
+
clauses = []
|
29 |
+
|
30 |
+
# First identify all subordinate clauses and their spans
|
31 |
+
subordinate_spans = []
|
32 |
+
for token in doc:
|
33 |
+
if token.dep_ in ["ccomp", "xcomp", "advcl", "relcl"]:
|
34 |
+
span = doc[token.left_edge.i:token.right_edge.i + 1]
|
35 |
+
subordinate_spans.append({
|
36 |
+
"span": span,
|
37 |
+
"type": {
|
38 |
+
"ccomp": "Complement Clause",
|
39 |
+
"xcomp": "Open Complement Clause",
|
40 |
+
"advcl": "Adverbial Clause",
|
41 |
+
"relcl": "Adjective Clause"
|
42 |
+
}[token.dep_]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
})
|
44 |
+
|
45 |
+
# Find the root and construct the main clause by excluding subordinate spans
|
46 |
+
root = None
|
47 |
+
for token in doc:
|
48 |
+
if token.dep_ == "ROOT":
|
49 |
+
root = token
|
50 |
+
break
|
51 |
+
|
52 |
+
if root:
|
53 |
+
# Get all tokens in the root's subtree
|
54 |
+
main_clause_tokens = set(token for token in root.subtree)
|
55 |
|
56 |
+
# Remove tokens that are part of subordinate clauses
|
57 |
+
for sub_clause in subordinate_spans:
|
58 |
+
for token in sub_clause["span"]:
|
59 |
+
if token in main_clause_tokens:
|
60 |
+
main_clause_tokens.remove(token)
|
|
|
61 |
|
62 |
+
# Construct the main clause text from remaining tokens
|
63 |
+
main_clause_text = " ".join(sorted([token.text for token in main_clause_tokens],
|
64 |
+
key=lambda x: [t.i for t in doc if t.text == x][0]))
|
65 |
+
main_clause_text = main_clause_text.strip().replace(",","").replace(".","")
|
66 |
+
clauses.append({"Type": "Independent Clause", "Text": main_clause_text})
|
67 |
+
|
68 |
+
# Add the subordinate clauses
|
69 |
+
for sub_clause in subordinate_spans:
|
70 |
+
clauses.append({
|
71 |
+
"Type": sub_clause["type"],
|
72 |
+
"Text": sub_clause["span"].text
|
73 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
+
return clauses
|
76 |
+
|
77 |
+
def analyze_clause_functions(doc):
|
78 |
+
"""
|
79 |
+
Analyze the function of each clause
|
80 |
+
"""
|
81 |
+
functions = []
|
82 |
+
|
83 |
+
for token in doc:
|
84 |
+
if token.dep_ == "ROOT":
|
85 |
+
functions.append({"Type": "Independent Clause", "Function": "Express the primary action or state"})
|
86 |
+
elif token.dep_ == "ccomp":
|
87 |
+
functions.append({"Type": "Complement Clause", "Function": "Acts as object of the main verb"})
|
88 |
+
elif token.dep_ == "xcomp":
|
89 |
+
functions.append({"Type": "Open Complement Clause", "Function": "Predicate complement without its own subject"})
|
90 |
+
elif token.dep_ == "advcl":
|
91 |
+
functions.append({"Type": "Adverbial Clause", "Function": "Modifies the verb like an adverb"})
|
92 |
+
elif token.dep_ == "relcl":
|
93 |
+
functions.append({"Type": "Adjective Clause", "Function": "Modifies a noun like an adjective"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
+
return functions
|
96 |
+
|
97 |
+
def create_dependency_graph(doc):
|
98 |
+
"""
|
99 |
+
Create a graphviz visualization of the dependency tree
|
100 |
+
"""
|
101 |
+
if not check_graphviz_installation():
|
102 |
+
return None
|
103 |
+
|
104 |
+
dot = graphviz.Digraph(comment='Dependency Tree')
|
105 |
+
|
106 |
+
# Add nodes
|
107 |
+
for token in doc:
|
108 |
+
dot.node(str(token.i), f"{token.text}\n({token.pos_})")
|
109 |
+
|
110 |
+
# Add edges
|
111 |
+
for token in doc:
|
112 |
+
if token.head is not token: # Skip root
|
113 |
+
dot.edge(str(token.head.i), str(token.i), token.dep_)
|
114 |
+
|
115 |
+
return dot
|
116 |
|
117 |
+
def get_graph_download_link(dot):
|
118 |
+
"""
|
119 |
+
Generate a download link for the graph image
|
120 |
+
"""
|
121 |
+
try:
|
122 |
+
# Create PDF in memory
|
123 |
+
pdf = dot.pipe(format='pdf')
|
124 |
|
125 |
+
# Encode to base64
|
126 |
+
b64 = base64.b64encode(pdf).decode()
|
127 |
|
128 |
+
href = f'<a href="data:application/pdf;base64,{b64}" download="syntax_tree.pdf">Download Syntax Tree (PDF)</a>'
|
129 |
+
return href
|
130 |
+
except Exception as e:
|
131 |
+
return f"Error generating download link: {str(e)}"
|
132 |
+
|
133 |
+
def main():
|
134 |
+
# Set page to wide mode for better visualization
|
135 |
+
st.set_page_config(layout="wide")
|
136 |
+
st.markdown("<h1 style='text-align: center; color: white;'>English Clause Analyzer</h1>", unsafe_allow_html=True)
|
137 |
+
st.write("Enter an English sentence to analyze its clauses, their functions, and syntax tree.")
|
138 |
+
|
139 |
+
# Input text
|
140 |
+
text = st.text_area("Enter your sentence:", "When I arrived at the station, the train had already left.", height=100)
|
141 |
+
|
142 |
+
if st.button("Analyze"):
|
143 |
+
if text:
|
144 |
+
# Process the text
|
145 |
+
doc = nlp(text)
|
146 |
|
147 |
+
# Create two columns for layout
|
148 |
+
col1, col2 = st.columns(2)
|
149 |
+
|
150 |
+
with col1:
|
151 |
+
# Identify clauses
|
152 |
+
clauses = identify_clauses(doc)
|
153 |
+
st.subheader(f"Clauses Analysis")
|
154 |
|
155 |
+
# Convert clauses to DataFrame for better presentation
|
156 |
+
df_clauses = pd.DataFrame(clauses)
|
157 |
+
st.table(df_clauses.style.set_properties(**{
|
158 |
+
'background-color': 'rgba(0,0,0,0.1)',
|
159 |
+
'color': 'white'
|
160 |
+
}))
|
|
|
|
|
|
|
161 |
|
162 |
+
# Display clause functions
|
163 |
+
functions = analyze_clause_functions(doc)
|
164 |
+
st.subheader("Clause Functions")
|
165 |
+
df_functions = pd.DataFrame(functions)
|
166 |
+
st.table(df_functions.style.set_properties(**{
|
167 |
+
'background-color': 'rgba(0,0,0,0.1)',
|
168 |
+
'color': 'white'
|
169 |
+
}))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
+
with col2:
|
172 |
+
# Display dependency visualization
|
173 |
+
st.subheader("Syntax Tree Visualization")
|
174 |
+
if not check_graphviz_installation():
|
175 |
+
st.error("Graphviz is not installed. Please install it using:")
|
176 |
+
st.code("sudo apt-get install graphviz")
|
177 |
+
st.markdown("After installation, restart the application.")
|
178 |
+
else:
|
179 |
+
dot = create_dependency_graph(doc)
|
180 |
+
st.graphviz_chart(dot)
|
181 |
+
|
182 |
+
# Add download button for the graph
|
183 |
+
st.markdown(get_graph_download_link(dot), unsafe_allow_html=True)
|
184 |
+
|
185 |
+
# Display part-of-speech tags in a table
|
186 |
+
st.subheader("Part-of-Speech Analysis")
|
187 |
+
pos_data = [{"Word": token.text, "Part of Speech": token.pos_,
|
188 |
+
"Description": spacy.explain(token.pos_)} for token in doc]
|
189 |
+
df_pos = pd.DataFrame(pos_data)
|
190 |
+
st.table(df_pos.style.set_properties(**{
|
191 |
+
'background-color': 'rgba(0,0,0,0.1)',
|
192 |
+
'color': 'white'
|
193 |
+
}))
|
194 |
+
|
195 |
+
if __name__ == "__main__":
|
196 |
+
main()
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
streamlit
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
1 |
+
streamlit
|
2 |
+
nltk
|
3 |
+
spacy
|
4 |
+
matplotlib
|
5 |
+
graphviz
|
utils.py
DELETED
@@ -1,133 +0,0 @@
|
|
1 |
-
import io
|
2 |
-
from cairosvg import svg2png
|
3 |
-
from PIL import Image
|
4 |
-
# import base64
|
5 |
-
|
6 |
-
def get_entity_explanation(label):
|
7 |
-
"""Return explanation for named entity labels"""
|
8 |
-
explanations = {
|
9 |
-
'PERSON': 'People, including fictional',
|
10 |
-
'NORP': 'Nationalities, religious or political groups',
|
11 |
-
'FAC': 'Buildings, airports, highways, bridges, etc.',
|
12 |
-
'ORG': 'Companies, agencies, institutions, etc.',
|
13 |
-
'GPE': 'Countries, cities, states',
|
14 |
-
'LOC': 'Non-GPE locations, mountain ranges, water bodies',
|
15 |
-
'PRODUCT': 'Objects, vehicles, foods, etc.',
|
16 |
-
'EVENT': 'Named hurricanes, battles, wars, sports events, etc.',
|
17 |
-
'WORK_OF_ART': 'Titles of books, songs, etc.',
|
18 |
-
'DATE': 'Absolute or relative dates or periods',
|
19 |
-
'TIME': 'Times smaller than a day',
|
20 |
-
'MONEY': 'Monetary values, including unit',
|
21 |
-
'QUANTITY': 'Measurements, as of weight or distance'
|
22 |
-
}
|
23 |
-
return explanations.get(label, 'Other type of entity')
|
24 |
-
|
25 |
-
def analyze_text(nlp, text):
|
26 |
-
doc = nlp(text)
|
27 |
-
|
28 |
-
# Basic tokenization and POS analysis
|
29 |
-
tokens = [{
|
30 |
-
'Text': token.text,
|
31 |
-
'Lemma': token.lemma_,
|
32 |
-
'POS': token.pos_,
|
33 |
-
'Tag': token.tag_,
|
34 |
-
'Dependency': token.dep_,
|
35 |
-
'Shape': token.shape_,
|
36 |
-
'Is Alpha': token.is_alpha,
|
37 |
-
'Is Stop': token.is_stop
|
38 |
-
} for token in doc]
|
39 |
-
|
40 |
-
# Named Entity Recognition
|
41 |
-
entities = [{
|
42 |
-
'Text': ent.text,
|
43 |
-
'Label': ent.label_,
|
44 |
-
'Explanation': get_entity_explanation(ent.label_),
|
45 |
-
'Start': ent.start_char,
|
46 |
-
'End': ent.end_char
|
47 |
-
} for ent in doc.ents]
|
48 |
-
|
49 |
-
# Noun Chunks (phrases)
|
50 |
-
noun_chunks = [{
|
51 |
-
'Text': chunk.text,
|
52 |
-
'Root Text': chunk.root.text,
|
53 |
-
'Root Dep': chunk.root.dep_,
|
54 |
-
'Root Head Text': chunk.root.head.text
|
55 |
-
} for chunk in doc.noun_chunks]
|
56 |
-
|
57 |
-
# Text Statistics
|
58 |
-
stats = {
|
59 |
-
'Word Count': len([token for token in doc if not token.is_punct]),
|
60 |
-
'Sentence Count': len(list(doc.sents)),
|
61 |
-
'Average Words per Sentence': round(len([token for token in doc if not token.is_punct]) / len(list(doc.sents)), 2),
|
62 |
-
'Unique Words': len(set([token.text.lower() for token in doc if token.is_alpha])),
|
63 |
-
'Stop Words %': round(len([token for token in doc if token.is_stop]) / len(doc) * 100, 2)
|
64 |
-
}
|
65 |
-
|
66 |
-
return tokens, entities, noun_chunks, stats, doc
|
67 |
-
|
68 |
-
def svg_to_png(svg_content, background_color='white'):
|
69 |
-
"""Convert SVG to PNG with specified background color"""
|
70 |
-
# Split multiple SVGs if present
|
71 |
-
svg_parts = svg_content.split('<br><br>')
|
72 |
-
images = []
|
73 |
-
|
74 |
-
for svg in svg_parts:
|
75 |
-
# Add SVG namespace if missing
|
76 |
-
if not 'xmlns="http://www.w3.org/2000/svg"' in svg:
|
77 |
-
svg = svg.replace('<svg', '<svg xmlns="http://www.w3.org/2000/svg"')
|
78 |
-
|
79 |
-
try:
|
80 |
-
# Convert SVG to PNG bytes
|
81 |
-
png_bytes = svg2png(bytestring=svg.encode('utf-8'),
|
82 |
-
background_color=background_color,
|
83 |
-
scale=1)
|
84 |
-
|
85 |
-
# Create PIL Image from PNG bytes
|
86 |
-
img = Image.open(io.BytesIO(png_bytes))
|
87 |
-
|
88 |
-
# Convert RGBA to RGB with white background
|
89 |
-
if img.mode == 'RGBA':
|
90 |
-
background = Image.new('RGB', img.size, background_color)
|
91 |
-
background.paste(img, mask=img.split()[3]) # Use alpha channel as mask
|
92 |
-
img = background
|
93 |
-
|
94 |
-
# Add some padding
|
95 |
-
padding = 20 # pixels
|
96 |
-
img_with_padding = Image.new('RGB',
|
97 |
-
(img.width, img.height + padding * 2),
|
98 |
-
background_color)
|
99 |
-
img_with_padding.paste(img, (0, padding))
|
100 |
-
images.append(img_with_padding)
|
101 |
-
|
102 |
-
except Exception as e:
|
103 |
-
st.error(f"Error converting SVG to PNG: {str(e)}")
|
104 |
-
continue
|
105 |
-
|
106 |
-
if not images:
|
107 |
-
return None
|
108 |
-
|
109 |
-
# Combine images vertically if there are multiple
|
110 |
-
if len(images) > 1:
|
111 |
-
# Calculate total height and max width
|
112 |
-
total_height = sum(img.height for img in images)
|
113 |
-
max_width = max(img.width for img in images)
|
114 |
-
|
115 |
-
# Create new image to hold all sentences
|
116 |
-
combined = Image.new('RGB', (max_width, total_height), background_color)
|
117 |
-
|
118 |
-
# Paste each image
|
119 |
-
y_offset = 0
|
120 |
-
for img in images:
|
121 |
-
# Center image horizontally
|
122 |
-
x_offset = (max_width - img.width) // 2
|
123 |
-
combined.paste(img, (x_offset, y_offset))
|
124 |
-
y_offset += img.height
|
125 |
-
else:
|
126 |
-
combined = images[0]
|
127 |
-
|
128 |
-
# Convert to bytes for Streamlit
|
129 |
-
img_byte_arr = io.BytesIO()
|
130 |
-
combined.save(img_byte_arr, format='PNG')
|
131 |
-
img_byte_arr.seek(0)
|
132 |
-
|
133 |
-
return img_byte_arr.getvalue()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|