Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
from bs4 import BeautifulSoup | |
import csv | |
import os | |
import pandas as pd | |
import random | |
from huggingface_hub import Repository, HfApi, HfFolder | |
import openai | |
api = HfApi() | |
token = os.getenv("token") | |
tokenread = os.getenv("tokenread") | |
localdir = "HeadlinePrediction" | |
repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/HeadlinePrediction", token=token) | |
def add_to_csv(var1, var2, var3, var4, var5, var6, var7, filename): | |
# Öffnen der CSV-Datei im Anhängemodus | |
with open(os.path.abspath(f'{localdir}/results.csv'), 'a', newline='', encoding = "utf-8") as file: | |
writer = csv.writer(file) | |
# Hinzufügen der Variablen als neue Zeile in die CSV-Datei | |
writer.writerow([var1, var2, var3, var4, var5, var6, var7]) | |
# Definiert die Funktion zum Scrapen der Webseite | |
def scrape_website(url): | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
content = { | |
'scraped_html': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-beqWaB jOAegM")), | |
'heute_überschrift': ' '.join(p.get_text() for p in soup.find_all('h1', class_="sc-beqWaB iTcspr")), | |
'heute_zsm': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-beqWaB iOdRIJ")) | |
} | |
return content | |
except Exception as e: | |
return str(e) | |
def send_to_chatgpt(api_key, prompt_file, transcript): | |
try: | |
# Liest den Prompt aus der Datei | |
with open(prompt_file, 'r', encoding='utf-8') as file: | |
prompt = file.read().strip() | |
openai.api_key = api_key | |
response = openai.ChatCompletion.create( | |
model="gpt-4-1106-preview", | |
messages=[ | |
{"role": "system", "content": prompt}, | |
{"role": "system", "content": transcript} | |
], | |
) | |
return response.choices[0].message["content"] | |
except Exception as e: | |
return str(e) | |
st.title("Webseiten-Scraper") | |
# Beispiel für die Verwendung der Funktiona | |
api_key = os.getenv("api_key") # Setzen Sie hier Ihren OpenAI-API-Schlüssel ein | |
base_prompt = "txt.txt" | |
csv_name = "results.csv" | |
df = pd.read_csv(csv_name, encoding='utf-8') | |
df.columns = [col.replace(' ', '_') for col in df.columns] | |
# Eingabefelder für URL und p_class | |
url = st.text_input("URL eingeben", "https://www.beispielwebsite.com") | |
if st.button("Scrape"): | |
if url: | |
scraped_html = scrape_website(url) | |
response = send_to_chatgpt(api_key, base_prompt, scraped_html['scraped_html']) | |
st.code(response) | |
teile = response.split("Kernaussagen:") | |
ueberschriften_teil, kernaussagen_teil = teile[0], teile[1] | |
ueberschriften = ueberschriften_teil.split("\n")[1:] # Erste Zeile überspringen | |
ueberschrift_1 = ueberschriften[0].split(" ", 1)[1] # "1." entfernen | |
ueberschrift_2 = ueberschriften[1].split(" ", 1)[1] # "2." entfernen | |
kernaussagen = kernaussagen_teil.split("\n")[1:] # Erste Zeile überspringen | |
kernaussage_1 = kernaussagen[0].split(" ", 1)[1] # "1." entfernen | |
kernaussage_2 = kernaussagen[1].split(" ", 1)[1] # "2." entfernen | |
add_to_csv( | |
url, scraped_html['heute_überschrift'], ueberschrift_1, ueberschrift_2, scraped_html['heute_zsm'], | |
kernaussage_1, kernaussage_2, csv_name | |
) | |
else: | |
st.error("Bitte geben Sie eine gültige URL ein.") | |
if st.sidebar.button("Upload Data"): | |
repo.git_add(os.path.abspath(f'{localdir}/results.csv')) | |
repo.git_commit("Add new headlines.csv") | |
repo.git_push() | |
auswahl = st.sidebar.selectbox("Wählen Sie eine Prediction:", ["None", "Prediction 1", "Prediction 2", "Prediction 3", "Prediction 4", "Prediction 5"]) | |
random_numbers = set() | |
while len(random_numbers) < 5: | |
random_numbers.add(random.randint(0, len(df)-1)) | |
random_numbers = list(random_numbers) | |
if auswahl == "Prediction 1": | |
st.dataframe(df.iloc[random_numbers[0]]) | |
elif auswahl == "Prediction 2": | |
st.dataframe(df.iloc[random_numbers[1]]) | |
elif auswahl == "Prediction 3": | |
st.dataframe(df.iloc[random_numbers[2]]) | |
elif auswahl == "Prediction 4": | |
st.dataframe(df.iloc[random_numbers[3]]) | |
elif auswahl == "Prediction 5": | |
st.dataframe(df.iloc[random_numbers[4]]) | |
if st.sidebar.button("Show Full Data"): | |
df = pd.read_csv(csv_name, encoding='utf-8') | |
st.dataframe(df) |