mbosse99 commited on
Commit
8f94101
·
verified ·
1 Parent(s): a1e299f

Update to POC Delux

Browse files
Files changed (1) hide show
  1. app.py +180 -34
app.py CHANGED
@@ -2,9 +2,12 @@ import streamlit as st
2
  from dotenv import load_dotenv
3
  import requests
4
  from bs4 import BeautifulSoup
 
5
  import openai
6
  import os
7
  import hmac
 
 
8
 
9
  load_dotenv()
10
 
@@ -33,7 +36,7 @@ def create_article(length_option, articles, params):
33
  messages=[
34
  {
35
  "role": "system",
36
- "content": f"You are a professional journalist whose task is to write your own article based on one or more articles. This article should combine the content of the original articles, but have its own writing style, which is as follows: {writing_style} The length of your article should be {length} sentences long.",
37
  },
38
  {
39
  "role": "system",
@@ -53,8 +56,58 @@ def create_article(length_option, articles, params):
53
  except Exception as e:
54
  print(f"Fehler beim erstellen des artikels: {str(e)}")
55
  st.error(f"Something went wrong: {str(e)}", icon="🚨")
56
-
57
- def extract_article(url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  # Webseite herunterladen
59
  response = requests.get(url)
60
 
@@ -63,18 +116,34 @@ def extract_article(url):
63
  # HTML-Inhalt parsen
64
  soup = BeautifulSoup(response.text, 'html.parser')
65
 
66
- # Alle <p>-Tags finden und den Textinhalt extrahieren
67
- paragraphs = soup.find_all('p')
68
-
69
- # Textinhalt der <p>-Tags zusammenführen
70
- text_content = '\n'.join([p.get_text() for p in paragraphs])
71
 
72
- return text_content
 
 
 
 
 
 
 
73
  else:
74
  # Falls die Anfrage nicht erfolgreich war, eine Fehlermeldung ausgeben
75
  print(f"Fehler: {response.status_code}")
76
  return None
77
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def extract_article_links(**kwargs):
79
  # print(len(kwargs["links"]))
80
  with st.spinner("Extrahiere..."):
@@ -82,12 +151,48 @@ def extract_article_links(**kwargs):
82
  for link in kwargs["links"]:
83
  results.append(extract_article(link))
84
  st.session_state["extracted_articles"] = results
85
-
 
 
 
 
 
 
86
  def finalize_articles():
87
  final_articles = []
88
  for i in range(len(st.session_state["extracted_articles"])):
89
  final_articles.append(st.session_state["final_article_"+str(i+1)])
90
  st.session_state["final_articles"] = final_articles
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  if "extracted_articles" not in st.session_state:
93
  st.session_state["extracted_articles"] = []
@@ -95,6 +200,18 @@ if "article_links" not in st.session_state:
95
  st.session_state["article_links"] = []
96
  if "final_articles" not in st.session_state:
97
  st.session_state["final_articles"] = []
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  def check_password():
100
  """Returns `True` if the user had the correct password."""
@@ -125,14 +242,32 @@ if not check_password():
125
 
126
  col1, col2 = st.columns([2, 1])
127
 
128
- col1.title("AI Journalist")
129
  col2.image("heute_logo.png")
130
 
131
- if len(st.session_state["final_articles"]) < 1:
132
- if len(st.session_state["extracted_articles"]) < 1:
133
- st.write("Bitte geben Sie die Links der Artikel ein, welche sie extrahieren möchten.")
 
 
 
 
 
 
 
 
 
 
134
 
135
- st.text_input("Geben Sie den "+str(len(st.session_state["article_links"])+1)+". Link ein:",key="link_input_"+str(len(st.session_state["article_links"])+1))
 
 
 
 
 
 
 
 
136
  if st.session_state["link_input_"+str(len(st.session_state["article_links"])+1)]:
137
  st.session_state["article_links"].append(st.session_state["link_input_"+str(len(st.session_state["article_links"])+1)])
138
  st.rerun()
@@ -144,28 +279,39 @@ if len(st.session_state["final_articles"]) < 1:
144
  st.button("Extrahiere Artikel",on_click=extract_article_links,kwargs={"links":st.session_state["article_links"]})
145
  except Exception as e:
146
  print(f"Fehler beim extrahieren der artikel: {str(e)}")
147
- st.error(f"Sie haben einen oder mehrere Links nicht in dem korrekten Format angegeben. Bitte Laden Sie die Seite neu und benutzen Sie korrekte Links: {str(e)}", icon="🚨")
148
- else:
149
- st.write("Hier können Sie sich die extrahierten Artikel ansehen und bei Bedarf anpassen.")
150
  for i,article in enumerate(st.session_state["extracted_articles"]):
151
  with st.expander(f"Artikel {i+1}"):
152
  if article:
153
- st.text_area("Edit the article if needed:", value=article, key="final_article_"+str(i+1), height=500)
154
  else:
155
- st.info("The website on which the article is published blocks the automatic extraction of its content. If you still want to use the article, you will have to insert the text manually.", icon="ℹ️")
156
- st.text_area("Paste the article if needed:", value=article, key="final_article_"+str(i+1), height=500)
157
  st.button("Artikel finalisieren",on_click=finalize_articles)
 
 
 
 
 
158
 
159
- for i in range(len(st.session_state["final_articles"])):
160
- if st.session_state["final_articles"][i]:
161
- with st.expander("Artikel "+ str(i+1)):
162
- st.write(st.session_state["final_articles"][i])
163
-
164
- if len(st.session_state["final_articles"]) > 0:
165
- st.text_area("Fügen Sie weitere Informationen für den Prompt hinzu, falls nötig:",key="add_info")
166
- st.write("Artikellänge")
167
- st.radio("Optionen",["Kurz", "Mittel", "Lang"], key="length_option")
168
- if st.button("Artikel generieren", key="article_btn"):
169
- with st.spinner("Generiere Artikel..."):
170
- created_article = create_article(st.session_state["length_option"],st.session_state["final_articles"],st.session_state["add_info"])
171
- st.write(created_article)
 
 
 
 
 
 
 
2
  from dotenv import load_dotenv
3
  import requests
4
  from bs4 import BeautifulSoup
5
+ from bs4 import NavigableString
6
  import openai
7
  import os
8
  import hmac
9
+ from st_copy_to_clipboard import st_copy_to_clipboard
10
+
11
 
12
  load_dotenv()
13
 
 
36
  messages=[
37
  {
38
  "role": "system",
39
+ "content": f"You are a professional journalist whose task is to write your own article based on one or more articles. This article should combine the content of the original articles, but have its own writing style, which is as follows: {writing_style} Do not use unusual phrases or neologisms from the original articles. The length of your article should be {length} sentences long.",
40
  },
41
  {
42
  "role": "system",
 
56
  except Exception as e:
57
  print(f"Fehler beim erstellen des artikels: {str(e)}")
58
  st.error(f"Something went wrong: {str(e)}", icon="🚨")
59
+
60
+ def create_headline(article):
61
+ openai.api_key = os.environ.get("OPEN_API_KEY")
62
+ openai.api_base = os.environ.get("OPEN_API_BASE")
63
+ openai.api_type = os.environ.get("OPEN_API_TYPE")
64
+ openai.api_version = os.environ.get("OPEN_API_VERSION")
65
+
66
+ writing_style = os.environ.get("WRITING_STYLE")
67
+
68
+ try:
69
+ res = openai.ChatCompletion.create(
70
+ engine="gpt-4-1106",
71
+ temperature=0.4,
72
+ messages=[
73
+ {
74
+ "role": "system",
75
+ "content": f"You are a professional journalist and have the task of generating a headline for an article you have written. I will give you the writing style that was used to create the article as info. Writing style: {writing_style} The headline should be as short as possible, but still capture the essence of the article. It should be a maximum of 10 words long",
76
+ },
77
+ {
78
+ "role": "system",
79
+ "content": f"Source article: {article}"
80
+ },
81
+ {
82
+ "role": "system",
83
+ "content": "Schreibe die Headline immer in deutscher Sprache."
84
+ }
85
+ ],
86
+ )
87
+ return res["choices"][0]["message"]["content"]
88
+ except Exception as e:
89
+ print(f"Fehler beim erstellen der headline: {str(e)}")
90
+ st.error(f"Something went wrong: {str(e)}", icon="🚨")
91
+
92
+ def extract_text_from_element(element):
93
+ # Initialisiere einen leeren Textstring
94
+ text_content = ""
95
+
96
+ # Überprüfe, ob das Element ein <p>, <ul> oder <ol>-Tag ist
97
+ if element.name in ['p', 'ul', 'ol']:
98
+ # Extrahiere den Text des Tags und füge ihn zum Textstring hinzu
99
+ text_content += element.get_text() + '\n'
100
+
101
+
102
+ # Überprüfe, ob das Element ein Tag mit Kindern ist (kein Textknoten)
103
+ if not isinstance(element, NavigableString):
104
+ # Rekursiv durch jedes Child-Element gehen und den Text hinzufügen
105
+ for child in element.children:
106
+ text_content += extract_text_from_element(child)
107
+
108
+ return text_content
109
+
110
+ def extract_article(url):
111
  # Webseite herunterladen
112
  response = requests.get(url)
113
 
 
116
  # HTML-Inhalt parsen
117
  soup = BeautifulSoup(response.text, 'html.parser')
118
 
119
+ # Finden Sie das <article>-Tag (nehmen Sie an, dass es eins gibt)
120
+ article_tag = soup.find('article')
 
 
 
121
 
122
+ if article_tag:
123
+ # Starte die Rekursion für jedes Child-Element des <article>-Tags
124
+ extracted_text = extract_text_from_element(article_tag)
125
+ stripped_text = filter_empty_lines(extracted_text)
126
+ return stripped_text
127
+ else:
128
+ print("Kein <article>-Tag gefunden.")
129
+ return None
130
  else:
131
  # Falls die Anfrage nicht erfolgreich war, eine Fehlermeldung ausgeben
132
  print(f"Fehler: {response.status_code}")
133
  return None
134
 
135
+ def filter_empty_lines(text):
136
+ # Teile den Text in Zeilen auf
137
+ lines = text.split('\n')
138
+
139
+ # Filtere leere Zeilen heraus
140
+ non_empty_lines = filter(lambda line: line.strip() != '', lines)
141
+
142
+ # Verbinde die nicht leeren Zeilen zu einem String
143
+ filtered_text = '\n'.join(non_empty_lines)
144
+
145
+ return filtered_text
146
+
147
  def extract_article_links(**kwargs):
148
  # print(len(kwargs["links"]))
149
  with st.spinner("Extrahiere..."):
 
151
  for link in kwargs["links"]:
152
  results.append(extract_article(link))
153
  st.session_state["extracted_articles"] = results
154
+ if st.session_state["process_step"] < 1:
155
+ st.session_state["process_step"] += 1
156
+ st.session_state["selected_page"] = 1
157
+
158
+ def extract_article_links_for_heading(**kwargs):
159
+ article = extract_article(kwargs["link"])
160
+
161
  def finalize_articles():
162
  final_articles = []
163
  for i in range(len(st.session_state["extracted_articles"])):
164
  final_articles.append(st.session_state["final_article_"+str(i+1)])
165
  st.session_state["final_articles"] = final_articles
166
+ if st.session_state["process_step"] < 2:
167
+ st.session_state["process_step"] += 1
168
+ st.session_state["selected_page"] += 1
169
+
170
+ def increase_page():
171
+ if st.session_state["selected_page"] <= st.session_state["process_step"]:
172
+ st.session_state["selected_page"] += 1
173
+
174
+ def decrease_page():
175
+ if st.session_state["selected_page"] > 0:
176
+ st.session_state["selected_page"] -= 1
177
+
178
+ def on_click_handler_generate_article(**kwargs):
179
+ with st.spinner("Generiere Artikel..."):
180
+ created_article = create_article(kwargs["length_option"],kwargs["final_articles"],kwargs["add_info"])
181
+ headline = create_headline(created_article)
182
+ st.session_state["generated_article"] = created_article
183
+ st.session_state["generated_headline"] = headline
184
+ if st.session_state["process_step"] < 3:
185
+ st.session_state["process_step"] += 1
186
+ st.session_state["selected_page"] += 1
187
+
188
+ def reset_session_state():
189
+ st.session_state["extracted_articles"] = []
190
+ st.session_state["article_links"] = []
191
+ st.session_state["final_articles"] = []
192
+ st.session_state["process_step"] = 0
193
+ st.session_state["selected_page"] = 0
194
+ st.session_state["generated_article"] = ""
195
+
196
 
197
  if "extracted_articles" not in st.session_state:
198
  st.session_state["extracted_articles"] = []
 
200
  st.session_state["article_links"] = []
201
  if "final_articles" not in st.session_state:
202
  st.session_state["final_articles"] = []
203
+ if "process_step" not in st.session_state:
204
+ st.session_state["process_step"] = 0
205
+ if "selected_page" not in st.session_state:
206
+ st.session_state["selected_page"] = 0
207
+ if "generated_article" not in st.session_state:
208
+ st.session_state["generated_article"] = ""
209
+ if "function_state" not in st.session_state:
210
+ st.session_state["function_state"] = True
211
+ if "generated_headline" not in st.session_state:
212
+ st.session_state["generated_headline"] = ""
213
+
214
+ PROCESS_STEPS = ["Artikel Extraktion","Artikel Finalisierung","Artikel Generierung","Artikel Ausgabe"]
215
 
216
  def check_password():
217
  """Returns `True` if the user had the correct password."""
 
242
 
243
  col1, col2 = st.columns([2, 1])
244
 
245
+ col1.title("TINA")
246
  col2.image("heute_logo.png")
247
 
248
+ # with st.sidebar:
249
+ # st.title("Funktions Auswahl")
250
+ # st.write("Hier kannst Du zwischen der Artikel generierung und Überschrift generierung wählen.")
251
+ # st.button("Artikel Generierung", key="article_gen_btn", use_container_width=True, on_click=lambda: st.session_state.update({"function_state": True}))
252
+ # st.button("Überschrift Generierung", key="headline_gen_btn", use_container_width=True, on_click=lambda: st.session_state.update({"function_state": False}))
253
+
254
+ if st.session_state["function_state"]:
255
+ tab_col1, tab_col2, tab_col3, tab_col4 = st.columns([1, 1, 1, 1])
256
+
257
+ tab_col1.button("Artikel Extraktion", key="tab1", use_container_width=True, on_click=lambda: st.session_state.update({"selected_page": 0}), disabled=st.session_state["selected_page"] == 0)
258
+ tab_col2.button("Artikel Finalisierung", key="tab2", use_container_width=True, on_click=lambda: st.session_state.update({"selected_page": 1}), disabled=st.session_state["process_step"] < 1 or st.session_state["selected_page"] == 1)
259
+ tab_col3.button("Artikel Generierung", key="tab3", use_container_width=True, on_click=lambda: st.session_state.update({"selected_page": 2}), disabled=st.session_state["process_step"] < 2 or st.session_state["selected_page"] == 2)
260
+ tab_col4.button("Artikel Ausgabe", key="tab4", use_container_width=True, on_click=lambda: st.session_state.update({"selected_page": 3}), disabled=st.session_state["process_step"] < 3 or st.session_state["selected_page"] == 3)
261
 
262
+ nav_col1, nav_col2, nav_col3 = st.columns([1, 4, 1])
263
+
264
+ nav_col1.button("◀️", key="nav1", use_container_width=True, on_click=decrease_page, disabled=st.session_state["selected_page"] == 0)
265
+ nav_col2.markdown(f"<div style='text-align: center;'>{PROCESS_STEPS[st.session_state['selected_page']]}</div>", unsafe_allow_html=True)
266
+ nav_col3.button("▶️", key="nav2", use_container_width=True, on_click=increase_page, disabled=st.session_state["selected_page"] == st.session_state["process_step"])
267
+
268
+ if(st.session_state["selected_page"] == 0):
269
+ st.write("Bitte gebe die Links der Artikel ein, welche Du extrahiert haben möchtest.")
270
+ st.text_input("Gebe den "+str(len(st.session_state["article_links"])+1)+". Link ein:",key="link_input_"+str(len(st.session_state["article_links"])+1))
271
  if st.session_state["link_input_"+str(len(st.session_state["article_links"])+1)]:
272
  st.session_state["article_links"].append(st.session_state["link_input_"+str(len(st.session_state["article_links"])+1)])
273
  st.rerun()
 
279
  st.button("Extrahiere Artikel",on_click=extract_article_links,kwargs={"links":st.session_state["article_links"]})
280
  except Exception as e:
281
  print(f"Fehler beim extrahieren der artikel: {str(e)}")
282
+ st.error(f"Du hast einen oder mehrere Links nicht in dem korrekten Format angegeben. Bitte Lade die Seite neu und benutze korrekte Links: {str(e)}", icon="🚨")
283
+ elif(st.session_state["selected_page"] == 1):
284
+ st.write("Hier kannst Du die extrahierten Artikel ansehen und bei Bedarf anpassen.")
285
  for i,article in enumerate(st.session_state["extracted_articles"]):
286
  with st.expander(f"Artikel {i+1}"):
287
  if article:
288
+ st.text_area("Editiere die Artikel, falls nötig:", value=article, key="final_article_"+str(i+1), height=500)
289
  else:
290
+ st.info("Die Webseite des Artikels blockiert das automatische extrahieren von Artikeln. Wenn Du den Artikel dennoch benutzen möchtest, dann kannst Du diesen kopieren und einfügen.", icon="ℹ️")
291
+ st.text_area("Füge den Artikel ein, falls nötig:", value=article, key="final_article_"+str(i+1), height=500)
292
  st.button("Artikel finalisieren",on_click=finalize_articles)
293
+ elif(st.session_state["selected_page"] == 2):
294
+ for i in range(len(st.session_state["final_articles"])):
295
+ if st.session_state["final_articles"][i]:
296
+ with st.expander("Artikel "+ str(i+1)):
297
+ st.write(st.session_state["final_articles"][i])
298
 
299
+ if len(st.session_state["final_articles"]) > 0:
300
+ st.write("Benutzte Artikel:")
301
+ for i,link in enumerate(st.session_state["article_links"]):
302
+ st.write(f"Link {i+1}: {link}")
303
+ st.text_area("Füge weitere Informationen für den Prompt hinzu, falls nötig:",key="add_info")
304
+ st.write("Artikellänge")
305
+ st.radio("Optionen",["Kurz", "Mittel", "Lang"], key="length_option")
306
+ st.button("Artikel generieren", key="article_btn", on_click=on_click_handler_generate_article, kwargs={"length_option":st.session_state["length_option"],"final_articles":st.session_state["final_articles"],"add_info":st.session_state["add_info"]})
307
+ elif(st.session_state["selected_page"] == 3):
308
+ st.write(st.session_state["generated_headline"])
309
+ st.write(st.session_state["generated_article"])
310
+ st.write("Kopieren Sie den Artikel: ")
311
+ st_copy_to_clipboard(st.session_state["generated_headline"]+"\n"+st.session_state["generated_article"])
312
+ st.button("Neuen Artikel generieren", key="reset_btn", on_click=reset_session_state)
313
+ else:
314
+ st.write("Bitte gebe den Link ein für welchen du eine Überschrift generiert haben möchtest.")
315
+ st.text_input("Gebe den Link ein:",key="headline_link_input")
316
+ if st.session_state["headline_link_input"]:
317
+ st.button("Generiere Überschrift",on_click=extract_article_links_for_heading,kwargs={"link":[st.session_state["headline_link_input"]]})