Spaces:
Running
Running
Update to POC Delux
Browse files
app.py
CHANGED
@@ -2,9 +2,12 @@ import streamlit as st
|
|
2 |
from dotenv import load_dotenv
|
3 |
import requests
|
4 |
from bs4 import BeautifulSoup
|
|
|
5 |
import openai
|
6 |
import os
|
7 |
import hmac
|
|
|
|
|
8 |
|
9 |
load_dotenv()
|
10 |
|
@@ -33,7 +36,7 @@ def create_article(length_option, articles, params):
|
|
33 |
messages=[
|
34 |
{
|
35 |
"role": "system",
|
36 |
-
"content": f"You are a professional journalist whose task is to write your own article based on one or more articles. This article should combine the content of the original articles, but have its own writing style, which is as follows: {writing_style} The length of your article should be {length} sentences long.",
|
37 |
},
|
38 |
{
|
39 |
"role": "system",
|
@@ -53,8 +56,58 @@ def create_article(length_option, articles, params):
|
|
53 |
except Exception as e:
|
54 |
print(f"Fehler beim erstellen des artikels: {str(e)}")
|
55 |
st.error(f"Something went wrong: {str(e)}", icon="🚨")
|
56 |
-
|
57 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
# Webseite herunterladen
|
59 |
response = requests.get(url)
|
60 |
|
@@ -63,18 +116,34 @@ def extract_article(url):
|
|
63 |
# HTML-Inhalt parsen
|
64 |
soup = BeautifulSoup(response.text, 'html.parser')
|
65 |
|
66 |
-
#
|
67 |
-
|
68 |
-
|
69 |
-
# Textinhalt der <p>-Tags zusammenführen
|
70 |
-
text_content = '\n'.join([p.get_text() for p in paragraphs])
|
71 |
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
else:
|
74 |
# Falls die Anfrage nicht erfolgreich war, eine Fehlermeldung ausgeben
|
75 |
print(f"Fehler: {response.status_code}")
|
76 |
return None
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
def extract_article_links(**kwargs):
|
79 |
# print(len(kwargs["links"]))
|
80 |
with st.spinner("Extrahiere..."):
|
@@ -82,12 +151,48 @@ def extract_article_links(**kwargs):
|
|
82 |
for link in kwargs["links"]:
|
83 |
results.append(extract_article(link))
|
84 |
st.session_state["extracted_articles"] = results
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
def finalize_articles():
|
87 |
final_articles = []
|
88 |
for i in range(len(st.session_state["extracted_articles"])):
|
89 |
final_articles.append(st.session_state["final_article_"+str(i+1)])
|
90 |
st.session_state["final_articles"] = final_articles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
if "extracted_articles" not in st.session_state:
|
93 |
st.session_state["extracted_articles"] = []
|
@@ -95,6 +200,18 @@ if "article_links" not in st.session_state:
|
|
95 |
st.session_state["article_links"] = []
|
96 |
if "final_articles" not in st.session_state:
|
97 |
st.session_state["final_articles"] = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
def check_password():
|
100 |
"""Returns `True` if the user had the correct password."""
|
@@ -125,14 +242,32 @@ if not check_password():
|
|
125 |
|
126 |
col1, col2 = st.columns([2, 1])
|
127 |
|
128 |
-
col1.title("
|
129 |
col2.image("heute_logo.png")
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
if st.session_state["link_input_"+str(len(st.session_state["article_links"])+1)]:
|
137 |
st.session_state["article_links"].append(st.session_state["link_input_"+str(len(st.session_state["article_links"])+1)])
|
138 |
st.rerun()
|
@@ -144,28 +279,39 @@ if len(st.session_state["final_articles"]) < 1:
|
|
144 |
st.button("Extrahiere Artikel",on_click=extract_article_links,kwargs={"links":st.session_state["article_links"]})
|
145 |
except Exception as e:
|
146 |
print(f"Fehler beim extrahieren der artikel: {str(e)}")
|
147 |
-
st.error(f"
|
148 |
-
|
149 |
-
st.write("Hier
|
150 |
for i,article in enumerate(st.session_state["extracted_articles"]):
|
151 |
with st.expander(f"Artikel {i+1}"):
|
152 |
if article:
|
153 |
-
st.text_area("
|
154 |
else:
|
155 |
-
st.info("
|
156 |
-
st.text_area("
|
157 |
st.button("Artikel finalisieren",on_click=finalize_articles)
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
st.
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from dotenv import load_dotenv
|
3 |
import requests
|
4 |
from bs4 import BeautifulSoup
|
5 |
+
from bs4 import NavigableString
|
6 |
import openai
|
7 |
import os
|
8 |
import hmac
|
9 |
+
from st_copy_to_clipboard import st_copy_to_clipboard
|
10 |
+
|
11 |
|
12 |
load_dotenv()
|
13 |
|
|
|
36 |
messages=[
|
37 |
{
|
38 |
"role": "system",
|
39 |
+
"content": f"You are a professional journalist whose task is to write your own article based on one or more articles. This article should combine the content of the original articles, but have its own writing style, which is as follows: {writing_style} Do not use unusual phrases or neologisms from the original articles. The length of your article should be {length} sentences long.",
|
40 |
},
|
41 |
{
|
42 |
"role": "system",
|
|
|
56 |
except Exception as e:
|
57 |
print(f"Fehler beim erstellen des artikels: {str(e)}")
|
58 |
st.error(f"Something went wrong: {str(e)}", icon="🚨")
|
59 |
+
|
60 |
+
def create_headline(article):
|
61 |
+
openai.api_key = os.environ.get("OPEN_API_KEY")
|
62 |
+
openai.api_base = os.environ.get("OPEN_API_BASE")
|
63 |
+
openai.api_type = os.environ.get("OPEN_API_TYPE")
|
64 |
+
openai.api_version = os.environ.get("OPEN_API_VERSION")
|
65 |
+
|
66 |
+
writing_style = os.environ.get("WRITING_STYLE")
|
67 |
+
|
68 |
+
try:
|
69 |
+
res = openai.ChatCompletion.create(
|
70 |
+
engine="gpt-4-1106",
|
71 |
+
temperature=0.4,
|
72 |
+
messages=[
|
73 |
+
{
|
74 |
+
"role": "system",
|
75 |
+
"content": f"You are a professional journalist and have the task of generating a headline for an article you have written. I will give you the writing style that was used to create the article as info. Writing style: {writing_style} The headline should be as short as possible, but still capture the essence of the article. It should be a maximum of 10 words long",
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"role": "system",
|
79 |
+
"content": f"Source article: {article}"
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"role": "system",
|
83 |
+
"content": "Schreibe die Headline immer in deutscher Sprache."
|
84 |
+
}
|
85 |
+
],
|
86 |
+
)
|
87 |
+
return res["choices"][0]["message"]["content"]
|
88 |
+
except Exception as e:
|
89 |
+
print(f"Fehler beim erstellen der headline: {str(e)}")
|
90 |
+
st.error(f"Something went wrong: {str(e)}", icon="🚨")
|
91 |
+
|
92 |
+
def extract_text_from_element(element):
|
93 |
+
# Initialisiere einen leeren Textstring
|
94 |
+
text_content = ""
|
95 |
+
|
96 |
+
# Überprüfe, ob das Element ein <p>, <ul> oder <ol>-Tag ist
|
97 |
+
if element.name in ['p', 'ul', 'ol']:
|
98 |
+
# Extrahiere den Text des Tags und füge ihn zum Textstring hinzu
|
99 |
+
text_content += element.get_text() + '\n'
|
100 |
+
|
101 |
+
|
102 |
+
# Überprüfe, ob das Element ein Tag mit Kindern ist (kein Textknoten)
|
103 |
+
if not isinstance(element, NavigableString):
|
104 |
+
# Rekursiv durch jedes Child-Element gehen und den Text hinzufügen
|
105 |
+
for child in element.children:
|
106 |
+
text_content += extract_text_from_element(child)
|
107 |
+
|
108 |
+
return text_content
|
109 |
+
|
110 |
+
def extract_article(url):
|
111 |
# Webseite herunterladen
|
112 |
response = requests.get(url)
|
113 |
|
|
|
116 |
# HTML-Inhalt parsen
|
117 |
soup = BeautifulSoup(response.text, 'html.parser')
|
118 |
|
119 |
+
# Finden Sie das <article>-Tag (nehmen Sie an, dass es eins gibt)
|
120 |
+
article_tag = soup.find('article')
|
|
|
|
|
|
|
121 |
|
122 |
+
if article_tag:
|
123 |
+
# Starte die Rekursion für jedes Child-Element des <article>-Tags
|
124 |
+
extracted_text = extract_text_from_element(article_tag)
|
125 |
+
stripped_text = filter_empty_lines(extracted_text)
|
126 |
+
return stripped_text
|
127 |
+
else:
|
128 |
+
print("Kein <article>-Tag gefunden.")
|
129 |
+
return None
|
130 |
else:
|
131 |
# Falls die Anfrage nicht erfolgreich war, eine Fehlermeldung ausgeben
|
132 |
print(f"Fehler: {response.status_code}")
|
133 |
return None
|
134 |
|
135 |
+
def filter_empty_lines(text):
|
136 |
+
# Teile den Text in Zeilen auf
|
137 |
+
lines = text.split('\n')
|
138 |
+
|
139 |
+
# Filtere leere Zeilen heraus
|
140 |
+
non_empty_lines = filter(lambda line: line.strip() != '', lines)
|
141 |
+
|
142 |
+
# Verbinde die nicht leeren Zeilen zu einem String
|
143 |
+
filtered_text = '\n'.join(non_empty_lines)
|
144 |
+
|
145 |
+
return filtered_text
|
146 |
+
|
147 |
def extract_article_links(**kwargs):
|
148 |
# print(len(kwargs["links"]))
|
149 |
with st.spinner("Extrahiere..."):
|
|
|
151 |
for link in kwargs["links"]:
|
152 |
results.append(extract_article(link))
|
153 |
st.session_state["extracted_articles"] = results
|
154 |
+
if st.session_state["process_step"] < 1:
|
155 |
+
st.session_state["process_step"] += 1
|
156 |
+
st.session_state["selected_page"] = 1
|
157 |
+
|
158 |
+
def extract_article_links_for_heading(**kwargs):
|
159 |
+
article = extract_article(kwargs["link"])
|
160 |
+
|
161 |
def finalize_articles():
|
162 |
final_articles = []
|
163 |
for i in range(len(st.session_state["extracted_articles"])):
|
164 |
final_articles.append(st.session_state["final_article_"+str(i+1)])
|
165 |
st.session_state["final_articles"] = final_articles
|
166 |
+
if st.session_state["process_step"] < 2:
|
167 |
+
st.session_state["process_step"] += 1
|
168 |
+
st.session_state["selected_page"] += 1
|
169 |
+
|
170 |
+
def increase_page():
|
171 |
+
if st.session_state["selected_page"] <= st.session_state["process_step"]:
|
172 |
+
st.session_state["selected_page"] += 1
|
173 |
+
|
174 |
+
def decrease_page():
|
175 |
+
if st.session_state["selected_page"] > 0:
|
176 |
+
st.session_state["selected_page"] -= 1
|
177 |
+
|
178 |
+
def on_click_handler_generate_article(**kwargs):
|
179 |
+
with st.spinner("Generiere Artikel..."):
|
180 |
+
created_article = create_article(kwargs["length_option"],kwargs["final_articles"],kwargs["add_info"])
|
181 |
+
headline = create_headline(created_article)
|
182 |
+
st.session_state["generated_article"] = created_article
|
183 |
+
st.session_state["generated_headline"] = headline
|
184 |
+
if st.session_state["process_step"] < 3:
|
185 |
+
st.session_state["process_step"] += 1
|
186 |
+
st.session_state["selected_page"] += 1
|
187 |
+
|
188 |
+
def reset_session_state():
|
189 |
+
st.session_state["extracted_articles"] = []
|
190 |
+
st.session_state["article_links"] = []
|
191 |
+
st.session_state["final_articles"] = []
|
192 |
+
st.session_state["process_step"] = 0
|
193 |
+
st.session_state["selected_page"] = 0
|
194 |
+
st.session_state["generated_article"] = ""
|
195 |
+
|
196 |
|
197 |
if "extracted_articles" not in st.session_state:
|
198 |
st.session_state["extracted_articles"] = []
|
|
|
200 |
st.session_state["article_links"] = []
|
201 |
if "final_articles" not in st.session_state:
|
202 |
st.session_state["final_articles"] = []
|
203 |
+
if "process_step" not in st.session_state:
|
204 |
+
st.session_state["process_step"] = 0
|
205 |
+
if "selected_page" not in st.session_state:
|
206 |
+
st.session_state["selected_page"] = 0
|
207 |
+
if "generated_article" not in st.session_state:
|
208 |
+
st.session_state["generated_article"] = ""
|
209 |
+
if "function_state" not in st.session_state:
|
210 |
+
st.session_state["function_state"] = True
|
211 |
+
if "generated_headline" not in st.session_state:
|
212 |
+
st.session_state["generated_headline"] = ""
|
213 |
+
|
214 |
+
PROCESS_STEPS = ["Artikel Extraktion","Artikel Finalisierung","Artikel Generierung","Artikel Ausgabe"]
|
215 |
|
216 |
def check_password():
|
217 |
"""Returns `True` if the user had the correct password."""
|
|
|
242 |
|
243 |
col1, col2 = st.columns([2, 1])
|
244 |
|
245 |
+
col1.title("TINA")
|
246 |
col2.image("heute_logo.png")
|
247 |
|
248 |
+
# with st.sidebar:
|
249 |
+
# st.title("Funktions Auswahl")
|
250 |
+
# st.write("Hier kannst Du zwischen der Artikel generierung und Überschrift generierung wählen.")
|
251 |
+
# st.button("Artikel Generierung", key="article_gen_btn", use_container_width=True, on_click=lambda: st.session_state.update({"function_state": True}))
|
252 |
+
# st.button("Überschrift Generierung", key="headline_gen_btn", use_container_width=True, on_click=lambda: st.session_state.update({"function_state": False}))
|
253 |
+
|
254 |
+
if st.session_state["function_state"]:
|
255 |
+
tab_col1, tab_col2, tab_col3, tab_col4 = st.columns([1, 1, 1, 1])
|
256 |
+
|
257 |
+
tab_col1.button("Artikel Extraktion", key="tab1", use_container_width=True, on_click=lambda: st.session_state.update({"selected_page": 0}), disabled=st.session_state["selected_page"] == 0)
|
258 |
+
tab_col2.button("Artikel Finalisierung", key="tab2", use_container_width=True, on_click=lambda: st.session_state.update({"selected_page": 1}), disabled=st.session_state["process_step"] < 1 or st.session_state["selected_page"] == 1)
|
259 |
+
tab_col3.button("Artikel Generierung", key="tab3", use_container_width=True, on_click=lambda: st.session_state.update({"selected_page": 2}), disabled=st.session_state["process_step"] < 2 or st.session_state["selected_page"] == 2)
|
260 |
+
tab_col4.button("Artikel Ausgabe", key="tab4", use_container_width=True, on_click=lambda: st.session_state.update({"selected_page": 3}), disabled=st.session_state["process_step"] < 3 or st.session_state["selected_page"] == 3)
|
261 |
|
262 |
+
nav_col1, nav_col2, nav_col3 = st.columns([1, 4, 1])
|
263 |
+
|
264 |
+
nav_col1.button("◀️", key="nav1", use_container_width=True, on_click=decrease_page, disabled=st.session_state["selected_page"] == 0)
|
265 |
+
nav_col2.markdown(f"<div style='text-align: center;'>{PROCESS_STEPS[st.session_state['selected_page']]}</div>", unsafe_allow_html=True)
|
266 |
+
nav_col3.button("▶️", key="nav2", use_container_width=True, on_click=increase_page, disabled=st.session_state["selected_page"] == st.session_state["process_step"])
|
267 |
+
|
268 |
+
if(st.session_state["selected_page"] == 0):
|
269 |
+
st.write("Bitte gebe die Links der Artikel ein, welche Du extrahiert haben möchtest.")
|
270 |
+
st.text_input("Gebe den "+str(len(st.session_state["article_links"])+1)+". Link ein:",key="link_input_"+str(len(st.session_state["article_links"])+1))
|
271 |
if st.session_state["link_input_"+str(len(st.session_state["article_links"])+1)]:
|
272 |
st.session_state["article_links"].append(st.session_state["link_input_"+str(len(st.session_state["article_links"])+1)])
|
273 |
st.rerun()
|
|
|
279 |
st.button("Extrahiere Artikel",on_click=extract_article_links,kwargs={"links":st.session_state["article_links"]})
|
280 |
except Exception as e:
|
281 |
print(f"Fehler beim extrahieren der artikel: {str(e)}")
|
282 |
+
st.error(f"Du hast einen oder mehrere Links nicht in dem korrekten Format angegeben. Bitte Lade die Seite neu und benutze korrekte Links: {str(e)}", icon="🚨")
|
283 |
+
elif(st.session_state["selected_page"] == 1):
|
284 |
+
st.write("Hier kannst Du die extrahierten Artikel ansehen und bei Bedarf anpassen.")
|
285 |
for i,article in enumerate(st.session_state["extracted_articles"]):
|
286 |
with st.expander(f"Artikel {i+1}"):
|
287 |
if article:
|
288 |
+
st.text_area("Editiere die Artikel, falls nötig:", value=article, key="final_article_"+str(i+1), height=500)
|
289 |
else:
|
290 |
+
st.info("Die Webseite des Artikels blockiert das automatische extrahieren von Artikeln. Wenn Du den Artikel dennoch benutzen möchtest, dann kannst Du diesen kopieren und einfügen.", icon="ℹ️")
|
291 |
+
st.text_area("Füge den Artikel ein, falls nötig:", value=article, key="final_article_"+str(i+1), height=500)
|
292 |
st.button("Artikel finalisieren",on_click=finalize_articles)
|
293 |
+
elif(st.session_state["selected_page"] == 2):
|
294 |
+
for i in range(len(st.session_state["final_articles"])):
|
295 |
+
if st.session_state["final_articles"][i]:
|
296 |
+
with st.expander("Artikel "+ str(i+1)):
|
297 |
+
st.write(st.session_state["final_articles"][i])
|
298 |
|
299 |
+
if len(st.session_state["final_articles"]) > 0:
|
300 |
+
st.write("Benutzte Artikel:")
|
301 |
+
for i,link in enumerate(st.session_state["article_links"]):
|
302 |
+
st.write(f"Link {i+1}: {link}")
|
303 |
+
st.text_area("Füge weitere Informationen für den Prompt hinzu, falls nötig:",key="add_info")
|
304 |
+
st.write("Artikellänge")
|
305 |
+
st.radio("Optionen",["Kurz", "Mittel", "Lang"], key="length_option")
|
306 |
+
st.button("Artikel generieren", key="article_btn", on_click=on_click_handler_generate_article, kwargs={"length_option":st.session_state["length_option"],"final_articles":st.session_state["final_articles"],"add_info":st.session_state["add_info"]})
|
307 |
+
elif(st.session_state["selected_page"] == 3):
|
308 |
+
st.write(st.session_state["generated_headline"])
|
309 |
+
st.write(st.session_state["generated_article"])
|
310 |
+
st.write("Kopieren Sie den Artikel: ")
|
311 |
+
st_copy_to_clipboard(st.session_state["generated_headline"]+"\n"+st.session_state["generated_article"])
|
312 |
+
st.button("Neuen Artikel generieren", key="reset_btn", on_click=reset_session_state)
|
313 |
+
else:
|
314 |
+
st.write("Bitte gebe den Link ein für welchen du eine Überschrift generiert haben möchtest.")
|
315 |
+
st.text_input("Gebe den Link ein:",key="headline_link_input")
|
316 |
+
if st.session_state["headline_link_input"]:
|
317 |
+
st.button("Generiere Überschrift",on_click=extract_article_links_for_heading,kwargs={"link":[st.session_state["headline_link_input"]]})
|