File size: 5,682 Bytes
2671dbc
 
 
 
 
7d47e73
 
c21daf5
f254322
2671dbc
c21daf5
 
 
 
 
 
 
6dda95f
2671dbc
 
169395e
2671dbc
 
 
 
 
 
 
 
 
 
 
 
 
66edcd9
 
 
2671dbc
 
 
 
 
 
c21daf5
 
 
9d344c0
2671dbc
7d47e73
ad7efe8
 
7d47e73
b10acfc
9d344c0
 
 
2671dbc
0b8e1ef
2671dbc
ad7efe8
2671dbc
 
 
 
 
 
 
c21daf5
ebd3820
76e4da2
 
c21daf5
 
 
 
 
 
6dda95f
2671dbc
 
 
6e1b6f5
9d344c0
c21daf5
 
 
 
 
2671dbc
15a13d8
 
 
2671dbc
15a13d8
 
 
2671dbc
15a13d8
 
 
2671dbc
15a13d8
 
 
 
2671dbc
 
 
7d47e73
76e4da2
 
 
 
 
 
 
 
 
 
 
 
 
 
4939d77
 
 
76e4da2
 
 
 
c21daf5
1f4273d
c21daf5
 
 
6dda95f
c21daf5
ec410c9
c21daf5
 
 
7d47e73
c21daf5
7d47e73
 
c21daf5
 
 
 
 
 
 
 
 
 
7d47e73
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import streamlit as st
import requests
from bs4 import BeautifulSoup
import csv
import os
import pandas as pd
import random
from huggingface_hub import Repository, HfApi, HfFolder
import openai

api = HfApi()
token = os.getenv("token")
tokenread = os.getenv("tokenread")
localdir = "HeadlinePrediction"

repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/HeadlinePrediction", token=token)


def add_to_csv(var1, var2, var3, var4, var5, var6, var7, filename):
    # Öffnen der CSV-Datei im Anhängemodus
    with open(os.path.abspath(f'{localdir}/{filename}'), 'a', newline='', encoding = "utf-8") as file:
        writer = csv.writer(file)

        # Hinzufügen der Variablen als neue Zeile in die CSV-Datei
        writer.writerow([var1, var2, var3, var4, var5, var6, var7])


# Definiert die Funktion zum Scrapen der Webseite
def scrape_website(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        content = {
            'scraped_html': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-8df4b824-0 iGqRWd")),
            'heute_überschrift': ' '.join(p.get_text() for p in soup.find_all('h1', class_="sc-8df4b824-0 geJYKG")),
            'heute_zsm': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-8df4b824-0 AwWvY"))
        }
        return content
    except Exception as e:
        return str(e)





def send_to_chatgpt(prompt_file, transcript):
    try:
        # Liest den Prompt aus der Datei
        #with open(prompt_file, 'r', encoding='utf-8') as file:
        #    prompt = file.read().strip()

        openai.api_key = os.getenv("OPENAI_API_KEY")
        openai.api_base = os.getenv("OPENAI_API_BASE")
        openai.api_type = os.getenv("OPENAI_API_TYPE")
        openai.api_version = os.getenv("OPENAI_API_VERSION")
        response = openai.ChatCompletion.create(
            engine="gpt-4-0613",
            messages=[
                {"role": "system", "content": prompt_file},
                {"role": "system", "content": transcript}
            ],
        )
        return response.choices[0].message["content"]
    except Exception as e:
        return str(e)

st.title("Webseiten-Scraper")
base_prompt = os.getenv("prompt")
prompt2 = os.getenv("prompt2")
prompt3 = os.getenv("prompt3")
csv_name = "results.csv"
df = pd.read_csv(csv_name, encoding='utf-8')
df.columns = [col.replace(' ', '_') for col in df.columns] 

# Eingabefelder für URL und p_class
url = st.text_input("URL eingeben", "https://www.beispielwebsite.com")

if st.button("Scrape"):
    if url:
        scraped_html = scrape_website(url)
        st.code(scraped_html)
        response = send_to_chatgpt(base_prompt, scraped_html['scraped_html'])
        st.write("Heute-Überschrift: ")
        st.code(scraped_html['heute_überschrift'])
        st.write("Heute-Kernaussage: ")
        st.code(scraped_html['heute_zsm'])
        st.write("ChatGPT: ")
        st.code(response)
        
        #teile = response.split("Kernaussagen:")
        #ueberschriften_teil, kernaussagen_teil = teile[0], teile[1]

        #ueberschriften = ueberschriften_teil.split("\n")[1:]  # Erste Zeile überspringen
        #ueberschrift_1 = ueberschriften[0].split(" ", 1)[1]  # "1." entfernen
        #ueberschrift_2 = ueberschriften[1].split(" ", 1)[1]  # "2." entfernen

        #kernaussagen = kernaussagen_teil.split("\n")[1:]  # Erste Zeile überspringen
        #kernaussage_1 = kernaussagen[0].split(" ", 1)[1]  # "1." entfernen
        #kernaussage_2 = kernaussagen[1].split(" ", 1)[1]  # "2." entfernen

        #add_to_csv(
        #    url, scraped_html['heute_überschrift'], ueberschrift_1, ueberschrift_2, scraped_html['heute_zsm'],
        #    kernaussage_1, kernaussage_2, csv_name
        #)

    else:
        st.error("Bitte geben Sie eine gültige URL ein.")



if st.button("What does GPT need?"):
    if url:
        scraped_html = scrape_website(url)
        scraped_txt = "Headline: " + scraped_html['heute_überschrift'] + " Sub-headline: " + scraped_html['heute_zsm'] + " news text: " + scraped_html['scraped_html']
        response = send_to_chatgpt(prompt2, scraped_txt)
        st.write("Heute-Artikel: ")
        st.code(scraped_html['heute_überschrift'])
        st.code(scraped_html['heute_zsm'])
        st.write(scraped_html['scraped_html'])
        st.write("ChatGPT says it needs: ")
        st.code(response)
        
        #response2 = send_to_chatgpt(prompt3, response)
        #st.write("What ChatGPT wrote with this instructions")
        #st.code(response2)
    else:
        st.error("Bitte geben Sie eine gültige URL ein.")


if st.sidebar.button("Upload Data"):
    repo.git_add(os.path.abspath(f'{localdir}/{filename}'))
    repo.git_commit("Add new headlines.csv")
    repo.git_push()
    

auswahl = st.sidebar.selectbox("Wählen Sie eine Prediction:", ["None", "Prediction 1", "Prediction 2", "Prediction 3", "Prediction 4", "Prediction 5"])

random_numbers = set()
while len(random_numbers) < 5:
    random_numbers.add(random.randint(0, len(df)-1))

random_numbers = list(random_numbers)


if auswahl == "Prediction 1":
    st.dataframe(df.iloc[random_numbers[0]])
elif auswahl == "Prediction 2":
    st.dataframe(df.iloc[random_numbers[1]])
elif auswahl == "Prediction 3":
    st.dataframe(df.iloc[random_numbers[2]])
elif auswahl == "Prediction 4":
    st.dataframe(df.iloc[random_numbers[3]])
elif auswahl == "Prediction 5":
    st.dataframe(df.iloc[random_numbers[4]])



if st.sidebar.button("Show Full Data"):
    df = pd.read_csv(csv_name, encoding='utf-8')
    st.dataframe(df)