import streamlit as st import requests from bs4 import BeautifulSoup import csv import os import pandas as pd import random from huggingface_hub import Repository, HfApi, HfFolder import openai api = HfApi() token = os.getenv("token") tokenread = os.getenv("tokenread") localdir = "HeadlinePrediction" repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/HeadlinePrediction", token=token) def add_to_csv(var1, var2, var3, var4, var5, var6, var7, filename): # Öffnen der CSV-Datei im Anhängemodus with open(os.path.abspath(f'{localdir}/{filename}'), 'a', newline='', encoding = "utf-8") as file: writer = csv.writer(file) # Hinzufügen der Variablen als neue Zeile in die CSV-Datei writer.writerow([var1, var2, var3, var4, var5, var6, var7]) # Definiert die Funktion zum Scrapen der Webseite def scrape_website(url): try: response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') content = { 'scraped_html': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-8df4b824-0 iGqRWd")), 'heute_überschrift': ' '.join(p.get_text() for p in soup.find_all('h1', class_="sc-8df4b824-0 geJYKG")), 'heute_zsm': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-8df4b824-0 AwWvY")) } return content except Exception as e: return str(e) def send_to_chatgpt(prompt_file, transcript): try: # Liest den Prompt aus der Datei #with open(prompt_file, 'r', encoding='utf-8') as file: # prompt = file.read().strip() openai.api_key = os.getenv("OPENAI_API_KEY") openai.api_base = os.getenv("OPENAI_API_BASE") openai.api_type = os.getenv("OPENAI_API_TYPE") openai.api_version = os.getenv("OPENAI_API_VERSION") response = openai.ChatCompletion.create( engine="gpt-4-0613", messages=[ {"role": "system", "content": prompt_file}, {"role": "system", "content": transcript} ], ) return response.choices[0].message["content"] except Exception as e: return str(e) st.title("Webseiten-Scraper") base_prompt = os.getenv("prompt") prompt2 = os.getenv("prompt2") prompt3 = os.getenv("prompt3") csv_name = "results.csv" df = pd.read_csv(csv_name, encoding='utf-8') df.columns = [col.replace(' ', '_') for col in df.columns] # Eingabefelder für URL und p_class url = st.text_input("URL eingeben", "https://www.beispielwebsite.com") if st.button("Scrape"): if url: scraped_html = scrape_website(url) st.code(scraped_html) response = send_to_chatgpt(base_prompt, scraped_html['scraped_html']) st.write("Heute-Überschrift: ") st.code(scraped_html['heute_überschrift']) st.write("Heute-Kernaussage: ") st.code(scraped_html['heute_zsm']) st.write("ChatGPT: ") st.code(response) #teile = response.split("Kernaussagen:") #ueberschriften_teil, kernaussagen_teil = teile[0], teile[1] #ueberschriften = ueberschriften_teil.split("\n")[1:] # Erste Zeile überspringen #ueberschrift_1 = ueberschriften[0].split(" ", 1)[1] # "1." entfernen #ueberschrift_2 = ueberschriften[1].split(" ", 1)[1] # "2." entfernen #kernaussagen = kernaussagen_teil.split("\n")[1:] # Erste Zeile überspringen #kernaussage_1 = kernaussagen[0].split(" ", 1)[1] # "1." entfernen #kernaussage_2 = kernaussagen[1].split(" ", 1)[1] # "2." entfernen #add_to_csv( # url, scraped_html['heute_überschrift'], ueberschrift_1, ueberschrift_2, scraped_html['heute_zsm'], # kernaussage_1, kernaussage_2, csv_name #) else: st.error("Bitte geben Sie eine gültige URL ein.") if st.button("What does GPT need?"): if url: scraped_html = scrape_website(url) scraped_txt = "Headline: " + scraped_html['heute_überschrift'] + " Sub-headline: " + scraped_html['heute_zsm'] + " news text: " + scraped_html['scraped_html'] response = send_to_chatgpt(prompt2, scraped_txt) st.write("Heute-Artikel: ") st.code(scraped_html['heute_überschrift']) st.code(scraped_html['heute_zsm']) st.write(scraped_html['scraped_html']) st.write("ChatGPT says it needs: ") st.code(response) #response2 = send_to_chatgpt(prompt3, response) #st.write("What ChatGPT wrote with this instructions") #st.code(response2) else: st.error("Bitte geben Sie eine gültige URL ein.") if st.sidebar.button("Upload Data"): repo.git_add(os.path.abspath(f'{localdir}/{filename}')) repo.git_commit("Add new headlines.csv") repo.git_push() auswahl = st.sidebar.selectbox("Wählen Sie eine Prediction:", ["None", "Prediction 1", "Prediction 2", "Prediction 3", "Prediction 4", "Prediction 5"]) random_numbers = set() while len(random_numbers) < 5: random_numbers.add(random.randint(0, len(df)-1)) random_numbers = list(random_numbers) if auswahl == "Prediction 1": st.dataframe(df.iloc[random_numbers[0]]) elif auswahl == "Prediction 2": st.dataframe(df.iloc[random_numbers[1]]) elif auswahl == "Prediction 3": st.dataframe(df.iloc[random_numbers[2]]) elif auswahl == "Prediction 4": st.dataframe(df.iloc[random_numbers[3]]) elif auswahl == "Prediction 5": st.dataframe(df.iloc[random_numbers[4]]) if st.sidebar.button("Show Full Data"): df = pd.read_csv(csv_name, encoding='utf-8') st.dataframe(df)