Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,125 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
#import sounddevice as sd
|
3 |
import numpy as np
|
4 |
import wavio
|
5 |
import speech_recognition as sr
|
6 |
|
|
|
7 |
st.title("Audio Recorder und Transkription")
|
8 |
|
9 |
# Aufnahmeparameter
|
@@ -40,7 +156,6 @@ st.write("Klicke auf 'Aufnahme starten', um die Aufnahme zu beginnen.")
|
|
40 |
|
41 |
|
42 |
|
43 |
-
'''
|
44 |
|
45 |
import streamlit as st
|
46 |
import pydub
|
|
|
1 |
import streamlit as st
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
from urllib.parse import urljoin
|
5 |
+
import json
|
6 |
+
import csv
|
7 |
+
import pandas as pd
|
8 |
+
import os
|
9 |
+
|
10 |
+
#api_key = os.getenv('groq')
|
11 |
+
|
12 |
+
def parse_links_and_content(ort):
|
13 |
+
base_url = "https://vereine-in-deutschland.net"
|
14 |
+
all_links = []
|
15 |
+
# Konstruiere die vollständige URL
|
16 |
+
initial_url = f"{base_url}/vereine/Bayern/{ort}/"
|
17 |
+
|
18 |
+
try:
|
19 |
+
# Senden der Anfrage an die initiale URL
|
20 |
+
response = requests.get(initial_url)
|
21 |
+
response.raise_for_status() # Überprüfen, ob die Anfrage erfolgreich war
|
22 |
+
|
23 |
+
# Parse the HTML content using BeautifulSoup
|
24 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
25 |
+
|
26 |
+
# Ermittle die letzte Seite
|
27 |
+
link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
|
28 |
+
|
29 |
+
if link_element and 'href' in link_element.attrs:
|
30 |
+
href = link_element['href']
|
31 |
+
# Extrahiere die letzten beiden Zeichen der URL
|
32 |
+
last_two_chars = href[-2:]
|
33 |
+
|
34 |
+
# Konvertiere die letzten beiden Zeichen in einen Integer
|
35 |
+
last_two_chars_int = int(last_two_chars)
|
36 |
+
else:
|
37 |
+
last_two_chars_int = 1 # Falls die letzte Seite nicht gefunden wird, nimm an, dass es nur eine Seite gibt
|
38 |
+
|
39 |
+
# Schleife durch alle Seiten und sammle Links
|
40 |
+
for page_number in range(1, last_two_chars_int + 1):
|
41 |
+
page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
|
42 |
+
response = requests.get(page_url)
|
43 |
+
response.raise_for_status()
|
44 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
45 |
+
target_div = soup.select_one('div.row-cols-1:nth-child(4)')
|
46 |
+
|
47 |
+
if target_div:
|
48 |
+
texts = [a.text for a in target_div.find_all('a', href=True)]
|
49 |
+
all_links.extend(texts)
|
50 |
+
else:
|
51 |
+
st.write(f"Target div not found on page {page_number}")
|
52 |
+
|
53 |
+
except Exception as e:
|
54 |
+
return str(e), []
|
55 |
+
|
56 |
+
all_links = all_links[0::2]
|
57 |
+
return all_links
|
58 |
+
|
59 |
+
def scrape_links(links):
|
60 |
+
contact_details = []
|
61 |
+
client = Client("mgokg/PerplexicaApi")
|
62 |
+
for verein in links:
|
63 |
+
result = client.predict(
|
64 |
+
prompt=f"{verein}",
|
65 |
+
api_name="/parse_links"
|
66 |
+
)
|
67 |
+
contact_details.append(result)
|
68 |
+
|
69 |
+
return contact_details
|
70 |
+
|
71 |
+
# Speichere die JSON-Daten in eine CSV-Datei
|
72 |
+
def save_to_csv(data, filename):
|
73 |
+
keys = data[0].keys()
|
74 |
+
with open(filename, 'w', newline='', encoding='utf-8') as output_file:
|
75 |
+
dict_writer = csv.DictWriter(output_file, fieldnames=keys)
|
76 |
+
dict_writer.writeheader()
|
77 |
+
dict_writer.writerows(data)
|
78 |
+
|
79 |
+
# Streamlit App
|
80 |
+
st.title("Vereinsinformationen abrufen")
|
81 |
+
|
82 |
+
ort_input = st.text_input("Ort", placeholder="Gib den Namen des Ortes ein")
|
83 |
+
|
84 |
+
if st.button("Senden"):
|
85 |
+
links = parse_links_and_content(ort_input)
|
86 |
+
contact_details = scrape_links(links)
|
87 |
+
json_data = [json.loads(item) for item in contact_details]
|
88 |
+
|
89 |
+
# Zeige die Ergebnisse an
|
90 |
+
st.json(json_data)
|
91 |
+
|
92 |
+
# Speichere die Daten in einer CSV-Datei
|
93 |
+
save_to_csv(json_data, 'contact_details.csv')
|
94 |
+
|
95 |
+
# Bereitstellung des Download-Links
|
96 |
+
with open('contact_details.csv', 'rb') as file:
|
97 |
+
st.download_button(
|
98 |
+
label="CSV-Datei herunterladen",
|
99 |
+
data=file,
|
100 |
+
file_name='contact_details.csv',
|
101 |
+
mime='text/csv'
|
102 |
+
)
|
103 |
+
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
'''
|
116 |
+
import streamlit as st
|
117 |
#import sounddevice as sd
|
118 |
import numpy as np
|
119 |
import wavio
|
120 |
import speech_recognition as sr
|
121 |
|
122 |
+
|
123 |
st.title("Audio Recorder und Transkription")
|
124 |
|
125 |
# Aufnahmeparameter
|
|
|
156 |
|
157 |
|
158 |
|
|
|
159 |
|
160 |
import streamlit as st
|
161 |
import pydub
|