Spaces:
Sleeping
Sleeping
File size: 5,682 Bytes
2671dbc 7d47e73 c21daf5 f254322 2671dbc c21daf5 6dda95f 2671dbc 169395e 2671dbc 66edcd9 2671dbc c21daf5 9d344c0 2671dbc 7d47e73 ad7efe8 7d47e73 b10acfc 9d344c0 2671dbc 0b8e1ef 2671dbc ad7efe8 2671dbc c21daf5 ebd3820 76e4da2 c21daf5 6dda95f 2671dbc 6e1b6f5 9d344c0 c21daf5 2671dbc 15a13d8 2671dbc 15a13d8 2671dbc 15a13d8 2671dbc 15a13d8 2671dbc 7d47e73 76e4da2 4939d77 76e4da2 c21daf5 1f4273d c21daf5 6dda95f c21daf5 ec410c9 c21daf5 7d47e73 c21daf5 7d47e73 c21daf5 7d47e73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
import csv
import os
import pandas as pd
import random
from huggingface_hub import Repository, HfApi, HfFolder
import openai
api = HfApi()
token = os.getenv("token")
tokenread = os.getenv("tokenread")
localdir = "HeadlinePrediction"
repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/HeadlinePrediction", token=token)
def add_to_csv(var1, var2, var3, var4, var5, var6, var7, filename):
# Öffnen der CSV-Datei im Anhängemodus
with open(os.path.abspath(f'{localdir}/{filename}'), 'a', newline='', encoding = "utf-8") as file:
writer = csv.writer(file)
# Hinzufügen der Variablen als neue Zeile in die CSV-Datei
writer.writerow([var1, var2, var3, var4, var5, var6, var7])
# Definiert die Funktion zum Scrapen der Webseite
def scrape_website(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
content = {
'scraped_html': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-8df4b824-0 iGqRWd")),
'heute_überschrift': ' '.join(p.get_text() for p in soup.find_all('h1', class_="sc-8df4b824-0 geJYKG")),
'heute_zsm': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-8df4b824-0 AwWvY"))
}
return content
except Exception as e:
return str(e)
def send_to_chatgpt(prompt_file, transcript):
try:
# Liest den Prompt aus der Datei
#with open(prompt_file, 'r', encoding='utf-8') as file:
# prompt = file.read().strip()
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_type = os.getenv("OPENAI_API_TYPE")
openai.api_version = os.getenv("OPENAI_API_VERSION")
response = openai.ChatCompletion.create(
engine="gpt-4-0613",
messages=[
{"role": "system", "content": prompt_file},
{"role": "system", "content": transcript}
],
)
return response.choices[0].message["content"]
except Exception as e:
return str(e)
st.title("Webseiten-Scraper")
base_prompt = os.getenv("prompt")
prompt2 = os.getenv("prompt2")
prompt3 = os.getenv("prompt3")
csv_name = "results.csv"
df = pd.read_csv(csv_name, encoding='utf-8')
df.columns = [col.replace(' ', '_') for col in df.columns]
# Eingabefelder für URL und p_class
url = st.text_input("URL eingeben", "https://www.beispielwebsite.com")
if st.button("Scrape"):
if url:
scraped_html = scrape_website(url)
st.code(scraped_html)
response = send_to_chatgpt(base_prompt, scraped_html['scraped_html'])
st.write("Heute-Überschrift: ")
st.code(scraped_html['heute_überschrift'])
st.write("Heute-Kernaussage: ")
st.code(scraped_html['heute_zsm'])
st.write("ChatGPT: ")
st.code(response)
#teile = response.split("Kernaussagen:")
#ueberschriften_teil, kernaussagen_teil = teile[0], teile[1]
#ueberschriften = ueberschriften_teil.split("\n")[1:] # Erste Zeile überspringen
#ueberschrift_1 = ueberschriften[0].split(" ", 1)[1] # "1." entfernen
#ueberschrift_2 = ueberschriften[1].split(" ", 1)[1] # "2." entfernen
#kernaussagen = kernaussagen_teil.split("\n")[1:] # Erste Zeile überspringen
#kernaussage_1 = kernaussagen[0].split(" ", 1)[1] # "1." entfernen
#kernaussage_2 = kernaussagen[1].split(" ", 1)[1] # "2." entfernen
#add_to_csv(
# url, scraped_html['heute_überschrift'], ueberschrift_1, ueberschrift_2, scraped_html['heute_zsm'],
# kernaussage_1, kernaussage_2, csv_name
#)
else:
st.error("Bitte geben Sie eine gültige URL ein.")
if st.button("What does GPT need?"):
if url:
scraped_html = scrape_website(url)
scraped_txt = "Headline: " + scraped_html['heute_überschrift'] + " Sub-headline: " + scraped_html['heute_zsm'] + " news text: " + scraped_html['scraped_html']
response = send_to_chatgpt(prompt2, scraped_txt)
st.write("Heute-Artikel: ")
st.code(scraped_html['heute_überschrift'])
st.code(scraped_html['heute_zsm'])
st.write(scraped_html['scraped_html'])
st.write("ChatGPT says it needs: ")
st.code(response)
#response2 = send_to_chatgpt(prompt3, response)
#st.write("What ChatGPT wrote with this instructions")
#st.code(response2)
else:
st.error("Bitte geben Sie eine gültige URL ein.")
if st.sidebar.button("Upload Data"):
repo.git_add(os.path.abspath(f'{localdir}/{filename}'))
repo.git_commit("Add new headlines.csv")
repo.git_push()
auswahl = st.sidebar.selectbox("Wählen Sie eine Prediction:", ["None", "Prediction 1", "Prediction 2", "Prediction 3", "Prediction 4", "Prediction 5"])
random_numbers = set()
while len(random_numbers) < 5:
random_numbers.add(random.randint(0, len(df)-1))
random_numbers = list(random_numbers)
if auswahl == "Prediction 1":
st.dataframe(df.iloc[random_numbers[0]])
elif auswahl == "Prediction 2":
st.dataframe(df.iloc[random_numbers[1]])
elif auswahl == "Prediction 3":
st.dataframe(df.iloc[random_numbers[2]])
elif auswahl == "Prediction 4":
st.dataframe(df.iloc[random_numbers[3]])
elif auswahl == "Prediction 5":
st.dataframe(df.iloc[random_numbers[4]])
if st.sidebar.button("Show Full Data"):
df = pd.read_csv(csv_name, encoding='utf-8')
st.dataframe(df) |