File size: 4,501 Bytes
c604636
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import streamlit as st
import requests
from bs4 import BeautifulSoup
import csv
import os
import pandas as pd
import random
from huggingface_hub import Repository, HfApi, HfFolder
import openai

api = HfApi()
token = os.getenv("token")
tokenread = os.getenv("tokenread")
localdir = "HeadlinePrediction"
repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/HeadlinePrediction", token=token)


def add_to_csv(var1, var2, var3, var4, var5, var6, var7, filename):
    # Öffnen der CSV-Datei im Anhängemodus
    with open(os.path.abspath(f'{localdir}/results.csv'), 'a', newline='', encoding = "utf-8") as file:
        writer = csv.writer(file)

        # Hinzufügen der Variablen als neue Zeile in die CSV-Datei
        writer.writerow([var1, var2, var3, var4, var5, var6, var7])


# Definiert die Funktion zum Scrapen der Webseite
def scrape_website(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        content = {
            'scraped_html': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-beqWaB jOAegM")),
            'heute_überschrift': ' '.join(p.get_text() for p in soup.find_all('h1', class_="sc-beqWaB iTcspr")),
            'heute_zsm': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-beqWaB iOdRIJ"))
        }
        return content
    except Exception as e:
        return str(e)


def send_to_chatgpt(api_key, prompt_file, transcript):
    try:
        # Liest den Prompt aus der Datei
        with open(prompt_file, 'r', encoding='utf-8') as file:
            prompt = file.read().strip()

        openai.api_key = api_key
        response = openai.ChatCompletion.create(
            model="gpt-4-1106-preview",
            messages=[
                {"role": "system", "content": prompt},
                {"role": "system", "content": transcript}
            ],
        )
        return response.choices[0].message["content"]
    except Exception as e:
        return str(e)


st.title("Webseiten-Scraper")
# Beispiel für die Verwendung der Funktiona
api_key = os.getenv("api_key")  # Setzen Sie hier Ihren OpenAI-API-Schlüssel ein
base_prompt = "txt.txt"
csv_name = "results.csv"
df = pd.read_csv(csv_name, encoding='utf-8')
df.columns = [col.replace(' ', '_') for col in df.columns] 
# Eingabefelder für URL und p_class
url = st.text_input("URL eingeben", "https://www.beispielwebsite.com")


if st.button("Scrape"):
    if url:
        scraped_html = scrape_website(url)
        response = send_to_chatgpt(api_key, base_prompt, scraped_html['scraped_html'])
        st.code(response)

        teile = response.split("Kernaussagen:")
        ueberschriften_teil, kernaussagen_teil = teile[0], teile[1]

        ueberschriften = ueberschriften_teil.split("\n")[1:]  # Erste Zeile überspringen
        ueberschrift_1 = ueberschriften[0].split(" ", 1)[1]  # "1." entfernen
        ueberschrift_2 = ueberschriften[1].split(" ", 1)[1]  # "2." entfernen

        kernaussagen = kernaussagen_teil.split("\n")[1:]  # Erste Zeile überspringen
        kernaussage_1 = kernaussagen[0].split(" ", 1)[1]  # "1." entfernen
        kernaussage_2 = kernaussagen[1].split(" ", 1)[1]  # "2." entfernen

        add_to_csv(
            url, scraped_html['heute_überschrift'], ueberschrift_1, ueberschrift_2, scraped_html['heute_zsm'],
            kernaussage_1, kernaussage_2, csv_name
        )

    else:
        st.error("Bitte geben Sie eine gültige URL ein.")


if st.sidebar.button("Upload Data"):
    repo.git_add(os.path.abspath(f'{localdir}/results.csv'))
    repo.git_commit("Add new headlines.csv")
    repo.git_push()
    

auswahl = st.sidebar.selectbox("Wählen Sie eine Prediction:", ["None", "Prediction 1", "Prediction 2", "Prediction 3", "Prediction 4", "Prediction 5"])
random_numbers = set()
while len(random_numbers) < 5:
    random_numbers.add(random.randint(0, len(df)-1))

random_numbers = list(random_numbers)


if auswahl == "Prediction 1":
    st.dataframe(df.iloc[random_numbers[0]])
elif auswahl == "Prediction 2":
    st.dataframe(df.iloc[random_numbers[1]])
elif auswahl == "Prediction 3":
    st.dataframe(df.iloc[random_numbers[2]])
elif auswahl == "Prediction 4":
    st.dataframe(df.iloc[random_numbers[3]])
elif auswahl == "Prediction 5":
    st.dataframe(df.iloc[random_numbers[4]])


if st.sidebar.button("Show Full Data"):
    df = pd.read_csv(csv_name, encoding='utf-8')
    st.dataframe(df)