Spaces:
Running
Running
File size: 4,284 Bytes
ca5f84f 62040f2 b2ba4fb 9e7dfc2 b2ba4fb dc25526 b2ba4fb 6cd0567 b2ba4fb 8e85569 fcb8fe0 8e85569 b764634 cf04987 b2ba4fb 09e15cd b2ba4fb 1e4e902 b2ba4fb ca5f84f 6c2fae1 af406c2 6c2fae1 cd0f9aa 10ae401 cd0f9aa 10ae401 99c69ca ba251cc 99c69ca 10ae401 cd0f9aa 8e85569 ab48c87 8e85569 2a1f4e1 8e85569 ab48c87 ca5f84f 6cd0567 8e85569 6cd0567 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
from gradio_client import Client
from urllib.parse import urljoin
import pandas as pd
from io import StringIO
import json
import groq
import os
custom_css = """
#md {
height: 400px;
font-size: 30px;
background: #202020;
padding: 20px;
color: white;
border: 1 px solid white;
}
"""
api_key = os.getenv('groq')
client = groq.Client(api_key=api_key)
def qwen(jsondata):
client = Client("Qwen/Qwen2.5-72B-Instruct")
result = client.predict(
query= f"return a valid json object \n {jsondata}",
history=[],
system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
api_name="/model_chat"
)
return result
def llm(message):
message = f"return a json object with the keys: name,email,phone,website \n the values can be found here, leave blank if value is not available:\n {message} \n return a json object only. no text, no explanaition"
try:
completion = client.chat.completions.create(
model="llama3-70b-8192",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"{message}"}
],
)
return completion.choices[0].message.content
except Exception as e:
return f"Error in response generation: {str(e)}"
def list_of_clubs(ort):
base_url = "https://vereine-in-deutschland.net"
all_links_text = []
initial_url = f"{base_url}/vereine/Bayern/{ort}"
try:
response = requests.get(initial_url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
# Determine the last page
link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
last_page = 10
if link_element and 'href' in link_element.attrs:
href = link_element['href']
last_page = int(href.split('/')[-1])
# Loop through all pages and collect links
for page_number in range(1, last_page + 1):
page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
response = requests.get(page_url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
target_div = soup.select_one('div.row-cols-1:nth-child(4)')
if target_div:
texts = [a.text for a in target_div.find_all('a', href=True)]
all_links_text.extend(texts)
else:
print(f"Target div not found on page {page_number}")
except Exception as e:
return str(e), []
all_links_text = all_links_text[0::2]
return all_links_text
def process_ort(ort):
links_text = list_of_clubs(ort)
vereine = []
for verein in links_text:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
url = f"https://www.google.com/search?q=impressum {verein}"
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
impressum_div = soup.find('body')
contact_detailes = impressum_div.text
json_object = llm(contact_detailes)
#vereine.append(contact_detailes)
vereine.append(json_object)
valid_json = qwen(vereine)
# Convert JSON string to Python dictionary
data_dict = json.loads(valid_json)
#Convert dictionary to DataFrame
df = pd.DataFrame(data_dict)
return df
return vereine
# Create the Gradio interface
with gr.Blocks(css=custom_css) as demo:
with gr.Row():
details_output = gr.DataFrame(label="answer", elem_id="md")
#details_output = gr.Textbox(label="Ausgabe", value = f"\n\n\n\n")
with gr.Row():
ort_input = gr.Textbox(label="prompt", placeholder="ask anything...")
with gr.Row():
button = gr.Button("Senden")
# Connect the button to the function
button.click(fn=process_ort, inputs=ort_input, outputs=details_output)
# Launch the Gradio application
demo.launch()
|