mgokg commited on
Commit
93a1282
·
verified ·
1 Parent(s): 999cad9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -318
app.py CHANGED
@@ -1,331 +1,67 @@
1
  import streamlit as st
2
- import requests
3
- from bs4 import BeautifulSoup
4
- from urllib.parse import urljoin
5
- import json
6
- import csv
7
- import pandas as pd
8
  import os
9
- from gradio_client import Client
 
 
10
 
11
- #api_key = os.getenv('groq')
 
12
 
13
- def parse_links_and_content(ort):
14
- base_url = "https://vereine-in-deutschland.net"
15
- all_links = []
16
- # Konstruiere die vollständige URL
17
- initial_url = f"{base_url}/vereine/Bayern/{ort}/"
18
-
 
19
  try:
20
- # Senden der Anfrage an die initiale URL
21
- response = requests.get(initial_url)
22
- response.raise_for_status() # Überprüfen, ob die Anfrage erfolgreich war
23
-
24
- # Parse the HTML content using BeautifulSoup
25
- soup = BeautifulSoup(response.content, 'html.parser')
26
 
27
- # Ermittle die letzte Seite
28
- link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
29
-
30
- if link_element and 'href' in link_element.attrs:
31
- href = link_element['href']
32
- # Extrahiere die letzten beiden Zeichen der URL
33
- last_two_chars = href[-2:]
34
 
35
- # Konvertiere die letzten beiden Zeichen in einen Integer
36
- last_two_chars_int = int(last_two_chars)
37
- else:
38
- last_two_chars_int = 1 # Falls die letzte Seite nicht gefunden wird, nimm an, dass es nur eine Seite gibt
39
-
40
- # Schleife durch alle Seiten und sammle Links
41
- for page_number in range(1, last_two_chars_int + 1):
42
- page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
43
- response = requests.get(page_url)
44
- response.raise_for_status()
45
- soup = BeautifulSoup(response.content, 'html.parser')
46
- target_div = soup.select_one('div.row-cols-1:nth-child(4)')
47
 
48
- if target_div:
49
- texts = [a.text for a in target_div.find_all('a', href=True)]
50
- all_links.extend(texts)
51
- else:
52
- st.write(f"Target div not found on page {page_number}")
53
-
54
  except Exception as e:
55
- return str(e), []
56
-
57
- all_links = all_links[0::2]
58
- return all_links
59
-
60
- def scrape_links(links):
61
- contact_details = []
62
- client = Client("mgokg/PerplexicaApi")
63
- for verein in links:
64
- result = client.predict(
65
- prompt=f"{verein}",
66
- api_name="/parse_links"
67
- )
68
- contact_details.append(result)
69
-
70
- return contact_details
71
-
72
- # Speichere die JSON-Daten in eine CSV-Datei
73
- def save_to_csv(data, filename):
74
- keys = data[0].keys()
75
- with open(filename, 'w', newline='', encoding='utf-8') as output_file:
76
- dict_writer = csv.DictWriter(output_file, fieldnames=keys)
77
- dict_writer.writeheader()
78
- dict_writer.writerows(data)
79
-
80
- # Streamlit App
81
- st.title("Vereinsinformationen abrufen")
82
-
83
- ort_input = st.text_input("Ort", placeholder="Gib den Namen des Ortes ein")
84
-
85
- if st.button("Senden"):
86
- links = parse_links_and_content(ort_input)
87
- contact_details = scrape_links(links)
88
- json_data = [json.loads(item) for item in contact_details]
89
-
90
- # Zeige die Ergebnisse an
91
- st.json(json_data)
92
-
93
- # Speichere die Daten in einer CSV-Datei
94
- save_to_csv(json_data, 'contact_details.csv')
95
-
96
- # Bereitstellung des Download-Links
97
- with open('contact_details.csv', 'rb') as file:
98
- st.download_button(
99
- label="CSV-Datei herunterladen",
100
- data=file,
101
- file_name='contact_details.csv',
102
- mime='text/csv'
103
- )
104
-
105
-
106
-
107
-
108
-
109
-
110
-
111
-
112
-
113
-
114
-
115
-
116
- '''
117
- import streamlit as st
118
- #import sounddevice as sd
119
- import numpy as np
120
- import wavio
121
- import speech_recognition as sr
122
-
123
-
124
- st.title("Audio Recorder und Transkription")
125
-
126
- # Aufnahmeparameter
127
- duration = st.slider("Aufnahmedauer (Sekunden)", 1, 10, 5)
128
- fs = 44100 # Abtastrate
129
-
130
- if st.button("Aufnahme starten"):
131
- st.write("Aufnahme läuft...")
132
- #recording = sd.rec(int(duration * fs), samplerate=fs, channels=2)
133
- #sd.wait() # Aufnahme beenden
134
-
135
- # Speichern der Aufnahme
136
- wavio.write("aufnahme.wav", recording, fs, sampwidth=2)
137
- st.write("Aufnahme abgeschlossen!")
138
-
139
- # Transkription
140
- recognizer = sr.Recognizer()
141
- with sr.AudioFile("aufnahme.wav") as source:
142
- audio_data = recognizer.record(source)
143
- try:
144
- text = recognizer.recognize_google(audio_data, language="de-DE")
145
- st.write("Transkribierter Text:")
146
- st.write(text)
147
- except sr.UnknownValueError:
148
- st.write("Audio konnte nicht erkannt werden.")
149
- except sr.RequestError as e:
150
- st.write(f"Fehler bei der Anfrage an den Google Speech Recognition Service: {e}")
151
-
152
- # Hinweis für Benutzer
153
- st.write("Klicke auf 'Aufnahme starten', um die Aufnahme zu beginnen.")
154
-
155
-
156
-
157
-
158
-
159
-
160
-
161
- import streamlit as st
162
- import pydub
163
- import speech_recognition as sr
164
- from io import BytesIO
165
-
166
- st.title("Audio Recorder und Transkription")
167
-
168
- # Audioaufnahme
169
- audio_file = st.file_uploader("Lade eine Audiodatei hoch", type=["wav", "mp3"])
170
-
171
- if audio_file is not None:
172
- audio_bytes = audio_file.read()
173
- audio = pydub.AudioSegment.from_file(BytesIO(audio_bytes))
174
 
175
- # Audio in das richtige Format konvertieren
176
- audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
177
- audio_bytes = audio.raw_data
178
-
179
- # Audio transkribieren
180
- recognizer = sr.Recognizer()
181
- audio_source = sr.AudioData(audio_bytes, frame_rate=16000, sample_width=2, channels=1)
182
  try:
183
- text = recognizer.recognize_google(audio_source)
184
- st.write("Transkribierter Text:")
185
- st.write(text)
186
- except sr.UnknownValueError:
187
- st.write("Audio konnte nicht erkannt werden.")
188
- except sr.RequestError as e:
189
- st.write(f"Fehler bei der Anfrage an den Google Speech Recognition Service: {e}")
190
-
191
- # Hinweis für Benutzer
192
- st.write("Lade eine Audiodatei hoch, um sie zu transkribieren.")
193
-
194
-
195
-
196
-
197
-
198
-
199
- import streamlit as st
200
- from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
201
-
202
- class AudioProcessor(AudioProcessorBase):
203
- def recv(self, frame):
204
- # Hier kannst du die Audioverarbeitung hinzufügen
205
- return frame
206
-
207
- st.title("Audio Recorder")
208
-
209
- webrtc_ctx = webrtc_streamer(
210
- key="audio",
211
- mode=WebRtcMode.SENDRECV,
212
- audio_processor_factory=AudioProcessor,
213
- media_stream_constraints={"audio": True},
214
- async_processing=True,
215
- )
216
-
217
- if webrtc_ctx.state.playing:
218
- st.write("Recording...")
219
- else:
220
- st.write("Click on Start to record audio.")
221
-
222
-
223
-
224
- import streamlit as st
225
- import os
226
- import time
227
- import pandas as pd
228
- from pandasai import SmartDatalake
229
- from pandasai import SmartDataframe
230
- from pandasai.responses.streamlit_response import StreamlitResponse
231
- import numpy as np
232
- #from pandasai import Agent
233
- import json
234
- import matplotlib.pyplot as plt
235
-
236
-
237
-
238
- os.environ['PANDASAI_API_KEY'] = "$2a$10$2s0v3C29vItNS2CO4QX10OV51/OONFCUNa4e9EU90w2Gozw88f4vK"
239
- st.set_page_config(page_title="SAP Data Analysis", layout="wide")
240
- st.image('Pandas-AI-Logo.png', caption=None)
241
- uploaded_file = st.file_uploader("Upload CSV data for analysis", type=['csv'])
242
- #uploaded_file = st.file_uploader("Upload EXcel data for analysis", type=['xlsx'])
243
-
244
- df1 = ""
245
- sdf = ""
246
- data = [{"Feb 24":"","1.":"Do","2.":"Fr","3.":"Sa","4.":"So","5.":"Mo","6.":"Di","7.":"Mi","8.":"Do","9.":"Fr","10.":"Sa","11.":"So","12.":"Mo","13.":"Di","14.":"Mi","15.":"Do","16.":"Fr","17.":"Sa","18.":"So","19.":"Mo","20.":"Di","21.":"Mi","22.":"Do","23.":"Fr","24.":"Sa","25.":"So","26.":"Mo","27.":"Di","28.":"Mi","29.":"Do"},{"Feb 24":"Standke Steffen","1.":"F","2.":"F","3.":"","4.":"","5.":"","6.":"","7.":"","8.":"","9.":"","10.":"","11.":"","12.":"","13.":"","14.":"UA","15.":"UA","16.":"","17.":"SD","18.":"SD","19.":"","20.":"","21.":"","22.":"","23.":"","24.":"","25.":"","26.":"","27.":"","28.":"","29.":""},{"Feb 24":"Will Susanne","1.":"","2.":"TZ","3.":"","4.":"","5.":"UA","6.":"","7.":"","8.":"","9.":"TZ","10.":"","11.":"","12.":"","13.":"","14.":"","15.":"","16.":"TZ","17.":"","18.":"","19.":"","20.":"","21.":"","22.":"","23.":"TZ","24.":"","25.":"","26.":"","27.":"","28.":"","29.":""},{"Feb 24":"Raab Julia","1.":"TZ","2.":"TZ","3.":"","4.":"","5.":"","6.":"","7.":"","8.":"TZ","9.":"TZ","10.":"BLOCKER","11.":"","12.":"Ü","13.":"Ü","14.":"Ü","15.":"TZ","16.":"TZ","17.":"BLOCKER","18.":"","19.":"","20.":"","21.":"","22.":"TZ","23.":"TZ","24.":"","25.":"SD","26.":"","27.":"","28.":"","29.":"TZ"},{"Feb 24":"Eckert Marion","1.":"","2.":"","3.":"","4.":"","5.":"","6.":"","7.":"","8.":"","9.":"Ü","10.":"","11.":"","12.":"S","13.":"S","14.":"S","15.":"S","16.":"S","17.":"","18.":"","19.":"","20.":"","21.":"","22.":"","23.":"","24.":"","25.":"","26.":"S","27.":"S","28.":"S","29.":"S"},{"Feb 24":"Meder, Milena","1.":"","2.":"","3.":"","4.":"","5.":"","6.":"","7.":"","8.":"","9.":"","10.":"","11.":"","12.":"F","13.":"F","14.":"","15.":"F","16.":"F","17.":"","18.":"","19.":"","20.":"","21.":"","22.":"","23.":"","24.":"","25.":"","26.":"Voloreise","27.":"","28.":"","29.":""},{"Feb 24":"Despang Angelika","1.":"","2.":"","3.":"SD","4.":"","5.":"","6.":"","7.":"","8.":"","9.":"","10.":"","11.":"","12.":"UA","13.":"UA","14.":"UA","15.":"","16.":"","17.":"","18.":"","19.":"F","20.":"F","21.":"F","22.":"F","23.":"F","24.":"","25.":"","26.":"","27.":"","28.":"","29.":""},{"Feb 24":"Heike Beudert","1.":"TZ","2.":"0,5 U","3.":"","4.":"","5.":"TZ","6.":"","7.":"","8.":"","9.":"","10.":"SD","11.":"SD","12.":"UA","13.":"UA","14.":"TZ","15.":"TZ","16.":"TZ","17.":"","18.":"","19.":"TZ","20.":"TZ","21.":"TZ","22.":"TZ","23.":"TZ","24.":"","25.":"","26.":"F","27.":"F","28.":"F","29.":"F"},{"Feb 24":"Borst Benedikt","1.":"","2.":"","3.":"","4.":"SD","5.":"F","6.":"F","7.":"F","8.":"F","9.":"F","10.":"BLOCKER","11.":"","12.":"UA","13.":"UA","14.":"F","15.":"","16.":"","17.":"","18.":"","19.":"","20.":"","21.":"","22.":"","23.":"","24.":"BLOCKER","25.":"","26.":"","27.":"","28.":"","29.":""}]
247
-
248
- #df = pd.DataFrame(data)
249
- #sdf = SmartDataframe(df)
250
- #df1
251
-
252
- if uploaded_file is not None:
253
-
254
- #Dateien im CSV Format
255
- df1 = pd.read_csv(uploaded_file)
256
-
257
- # Dateien im XLSX Format
258
- #df1 = pd.read_excel(uploaded_file, sheet_name=NONE)
259
- #st.table(df1)
260
- df1 = pd.DataFrame(df1)
261
- st.success("Daten erfolgreich geladen!")
262
- df1
263
- #sdf = SmartDataframe(df1)
264
-
265
-
266
- bild = st.empty()
267
- bild.subheader("Datenanalyse & Datenvisualisierung")
268
- c = st.container(border=True)
269
- prompt = st.text_area("Enter your prompt:")
270
- if st.button("Generate"):
271
-
272
- if prompt:
273
- #c.text("Generating response...")
274
- if os.path.isfile('./exports/charts/temp_chart.png'):
275
- os.remove('./exports/charts/temp_chart.png')
276
- #spin = st.spinner
277
- with c:
278
- with st.spinner("Generating response..."):
279
-
280
- #bar = st.progress(20)
281
-
282
- #bar = st.progress(100)
283
-
284
- with bild:
285
- sdf = SmartDataframe(df1)
286
- st.write(sdf.chat(prompt))
287
- #with st.spinner("Generating response..."):
288
-
289
- if os.path.isfile('./exports/charts/temp_chart.png'):
290
- st.image('./exports/charts/temp_chart.png')
291
-
292
-
293
- #st.success('Done!')
294
- #bar.progress(100)
295
-
296
- #c.write(bar)
297
- #c.write(st.spinner)
298
- #bild.empty()
299
-
300
- #st.write(sdf.chat(prompt))
301
- #bar.progress(100)
302
-
303
-
304
- else:
305
-
306
- st.error("Please enter a prompt.")
307
-
308
-
309
-
310
- #with placeholder.container():
311
- #st.write("This is one element")
312
- #st.write("This is another")
313
-
314
-
315
-
316
- #agent = Agent(df)
317
- #result = agent.chat("erstelle balkendiagramm")
318
- #st.write(result)
319
- #sdf = SmartDataframe(df)
320
- #sdf.chat("draw chart")
321
- #pandas_ai = PandasAI(llm, verbose=True, save_charts=True)
322
- #st.write(sdf.chat("Plot a chart"))
323
- #st.write(st.bar_chart(data))
324
- '''
325
-
326
-
327
-
328
-
329
-
330
 
 
 
 
 
331
 
 
 
 
 
 
1
  import streamlit as st
 
 
 
 
 
 
2
  import os
3
+ from groq import Groq
4
+ import soundfile as sf
5
+ from tempfile import NamedTemporaryFile
6
 
7
+ # Load the API key from the environment variable
8
+ api_key = os.getenv('groq_whisper')
9
 
10
+ if api_key is None:
11
+ raise ValueError("groq_whisper environment variable is not set")
12
+
13
+ # Initialize the Groq client
14
+ client = Groq(api_key=api_key)
15
+
16
+ def processaudio(audio_data):
17
  try:
18
+ # Entpacken der Audiodaten (Sample-Rate und Numpy-Array)
19
+ sample_rate, samples = audio_data
 
 
 
 
20
 
21
+ # Temporäre Audiodatei erstellen
22
+ with NamedTemporaryFile(suffix=".wav", delete=True) as tmpfile:
23
+ # Audio als WAV-Datei speichern
24
+ sf.write(tmpfile.name, samples, sample_rate)
 
 
 
25
 
26
+ # Datei erneut öffnen und an Groq senden
27
+ with open(tmpfile.name, "rb") as file:
28
+ transcription = client.audio.transcriptions.create(
29
+ file=(os.path.basename(tmpfile.name), file.read()),
30
+ model="whisper-large-v3-turbo",
31
+ prompt="transcribe",
32
+ language="de",
33
+ response_format="json",
34
+ temperature=0.0
35
+ )
36
+ return transcription.text
 
37
 
 
 
 
 
 
 
38
  except Exception as e:
39
+ return f"Ein Fehler ist aufgetreten: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ def process_audio(file_path):
 
 
 
 
 
 
42
  try:
43
+ # Open the audio file
44
+ with open(file_path, "rb") as file:
45
+ # Create a transcription of the audio file
46
+ transcription = client.audio.transcriptions.create(
47
+ file=(os.path.basename(file_path), file.read()), # Correct passing of filename
48
+ model="whisper-large-v3-turbo", # Required model to use for transcription
49
+ prompt="transcribe", # Optional
50
+ language="de", # Optional
51
+ response_format="json", # Optional
52
+ temperature=0.0 # Optional
53
+ )
54
+ # Return the transcription text
55
+ return transcription.text
56
+ except Exception as e:
57
+ return f"Ein Fehler ist aufgetreten: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ # Streamlit Interface
60
+ st.title("Audio Transkription")
61
+ sr_outputs = st.empty() # Platzhalter für die Transkription
62
+ sr_inputs = st.file_uploader("Laden Sie eine Audiodatei hoch", type=["wav", "mp3"])
63
 
64
+ if sr_inputs is not None:
65
+ audio_data = sf.read(sr_inputs)
66
+ transcription = processaudio(audio_data)
67
+ sr_outputs.text(transcription)