Add1E's picture
Update app.py
66edcd9 verified
import streamlit as st
import requests
from bs4 import BeautifulSoup
import csv
import os
import pandas as pd
import random
from huggingface_hub import Repository, HfApi, HfFolder
import openai
api = HfApi()
token = os.getenv("token")
tokenread = os.getenv("tokenread")
localdir = "HeadlinePrediction"
repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/HeadlinePrediction", token=token)
def add_to_csv(var1, var2, var3, var4, var5, var6, var7, filename):
# Öffnen der CSV-Datei im Anhängemodus
with open(os.path.abspath(f'{localdir}/{filename}'), 'a', newline='', encoding = "utf-8") as file:
writer = csv.writer(file)
# Hinzufügen der Variablen als neue Zeile in die CSV-Datei
writer.writerow([var1, var2, var3, var4, var5, var6, var7])
# Definiert die Funktion zum Scrapen der Webseite
def scrape_website(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
content = {
'scraped_html': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-8df4b824-0 iGqRWd")),
'heute_überschrift': ' '.join(p.get_text() for p in soup.find_all('h1', class_="sc-8df4b824-0 geJYKG")),
'heute_zsm': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-8df4b824-0 AwWvY"))
}
return content
except Exception as e:
return str(e)
def send_to_chatgpt(prompt_file, transcript):
try:
# Liest den Prompt aus der Datei
#with open(prompt_file, 'r', encoding='utf-8') as file:
# prompt = file.read().strip()
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_type = os.getenv("OPENAI_API_TYPE")
openai.api_version = os.getenv("OPENAI_API_VERSION")
response = openai.ChatCompletion.create(
engine="gpt-4-0613",
messages=[
{"role": "system", "content": prompt_file},
{"role": "system", "content": transcript}
],
)
return response.choices[0].message["content"]
except Exception as e:
return str(e)
st.title("Webseiten-Scraper")
base_prompt = os.getenv("prompt")
prompt2 = os.getenv("prompt2")
prompt3 = os.getenv("prompt3")
csv_name = "results.csv"
df = pd.read_csv(csv_name, encoding='utf-8')
df.columns = [col.replace(' ', '_') for col in df.columns]
# Eingabefelder für URL und p_class
url = st.text_input("URL eingeben", "https://www.beispielwebsite.com")
if st.button("Scrape"):
if url:
scraped_html = scrape_website(url)
st.code(scraped_html)
response = send_to_chatgpt(base_prompt, scraped_html['scraped_html'])
st.write("Heute-Überschrift: ")
st.code(scraped_html['heute_überschrift'])
st.write("Heute-Kernaussage: ")
st.code(scraped_html['heute_zsm'])
st.write("ChatGPT: ")
st.code(response)
#teile = response.split("Kernaussagen:")
#ueberschriften_teil, kernaussagen_teil = teile[0], teile[1]
#ueberschriften = ueberschriften_teil.split("\n")[1:] # Erste Zeile überspringen
#ueberschrift_1 = ueberschriften[0].split(" ", 1)[1] # "1." entfernen
#ueberschrift_2 = ueberschriften[1].split(" ", 1)[1] # "2." entfernen
#kernaussagen = kernaussagen_teil.split("\n")[1:] # Erste Zeile überspringen
#kernaussage_1 = kernaussagen[0].split(" ", 1)[1] # "1." entfernen
#kernaussage_2 = kernaussagen[1].split(" ", 1)[1] # "2." entfernen
#add_to_csv(
# url, scraped_html['heute_überschrift'], ueberschrift_1, ueberschrift_2, scraped_html['heute_zsm'],
# kernaussage_1, kernaussage_2, csv_name
#)
else:
st.error("Bitte geben Sie eine gültige URL ein.")
if st.button("What does GPT need?"):
if url:
scraped_html = scrape_website(url)
scraped_txt = "Headline: " + scraped_html['heute_überschrift'] + " Sub-headline: " + scraped_html['heute_zsm'] + " news text: " + scraped_html['scraped_html']
response = send_to_chatgpt(prompt2, scraped_txt)
st.write("Heute-Artikel: ")
st.code(scraped_html['heute_überschrift'])
st.code(scraped_html['heute_zsm'])
st.write(scraped_html['scraped_html'])
st.write("ChatGPT says it needs: ")
st.code(response)
#response2 = send_to_chatgpt(prompt3, response)
#st.write("What ChatGPT wrote with this instructions")
#st.code(response2)
else:
st.error("Bitte geben Sie eine gültige URL ein.")
if st.sidebar.button("Upload Data"):
repo.git_add(os.path.abspath(f'{localdir}/{filename}'))
repo.git_commit("Add new headlines.csv")
repo.git_push()
auswahl = st.sidebar.selectbox("Wählen Sie eine Prediction:", ["None", "Prediction 1", "Prediction 2", "Prediction 3", "Prediction 4", "Prediction 5"])
random_numbers = set()
while len(random_numbers) < 5:
random_numbers.add(random.randint(0, len(df)-1))
random_numbers = list(random_numbers)
if auswahl == "Prediction 1":
st.dataframe(df.iloc[random_numbers[0]])
elif auswahl == "Prediction 2":
st.dataframe(df.iloc[random_numbers[1]])
elif auswahl == "Prediction 3":
st.dataframe(df.iloc[random_numbers[2]])
elif auswahl == "Prediction 4":
st.dataframe(df.iloc[random_numbers[3]])
elif auswahl == "Prediction 5":
st.dataframe(df.iloc[random_numbers[4]])
if st.sidebar.button("Show Full Data"):
df = pd.read_csv(csv_name, encoding='utf-8')
st.dataframe(df)