File size: 4,416 Bytes
ca5f84f
62040f2
 
b2ba4fb
9e7dfc2
 
 
 
b2ba4fb
dc25526
b2ba4fb
6cd0567
 
 
 
 
 
 
 
304b184
6cd0567
 
 
b2ba4fb
 
 
8e85569
2139770
 
db8fb45
8e85569
 
 
fcb8fe0
 
8e85569
b764634
cf04987
b2ba4fb
 
09e15cd
b2ba4fb
 
1e4e902
b2ba4fb
 
 
 
 
ca5f84f
6c2fae1
 
 
 
 
 
 
 
 
af406c2
6c2fae1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235727d
cd0f9aa
2d583fc
 
 
 
 
 
 
 
99c69ca
 
2139770
ba251cc
99c69ca
 
 
 
10ae401
2d583fc
10ae401
 
cd0f9aa
304b184
ff5d881
 
 
ca5f84f
6cd0567
 
 
61b658f
 
6cd0567
db8fb45
6cd0567
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import gradio as gr
import requests
from bs4 import BeautifulSoup
from gradio_client import Client
from urllib.parse import urljoin
import pandas as pd
from io import StringIO
import json
import groq
import os

custom_css = """
#md {
    height: 400px;  
    font-size: 30px;
    background: #202020;
    padding: 20px;
    color: white;
    border: 1 px solid white;
    font-size:10px;
}
"""

api_key = os.getenv('groq')
client = groq.Client(api_key=api_key)

def qwen(jsondata):
    client = Client("Qwen/Qwen2.5-72B-Instruct")
    result = client.predict(
		query= f"return valid json \n {jsondata}",
		history=[],
		system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
		api_name="/model_chat"
    )
    return result

def llm(message):
    message = f"return a json object with the keys: name,email,phone,website \n the values can be found here, leave blank if value is not available:\n {message} \n return a json object only. no text, no explanaition"
    try:        
        completion = client.chat.completions.create(
            model="llama3-70b-8192",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"{message}"}
            ],
        )       
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error in response generation: {str(e)}"

def list_of_clubs(ort):
    base_url = "https://vereine-in-deutschland.net"
    all_links_text = []
    initial_url = f"{base_url}/vereine/Bayern/{ort}"

    try:
        response = requests.get(initial_url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Determine the last page
        link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
        last_page = 10
        if link_element and 'href' in link_element.attrs:
            href = link_element['href']
            last_page = int(href.split('/')[-1])

        # Loop through all pages and collect links
        for page_number in range(1, last_page + 1):
            page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
            response = requests.get(page_url)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')
            target_div = soup.select_one('div.row-cols-1:nth-child(4)')

            if target_div:
                texts = [a.text for a in target_div.find_all('a', href=True)]
                all_links_text.extend(texts)
            else:
                print(f"Target div not found on page {page_number}")

    except Exception as e:
        return str(e), []

    all_links_text = all_links_text[0::2]
    return all_links_text

def process_ort(ort):
    links_text = list_of_clubs(ort)
    return links_text
    vereine = []
    client_gemini = Client("mgokg/gemini-2.0-flash-exp")
    for verein in links_text:       
        result = client_gemini.predict(
    		prompt=f"impressum {verein}",
    		api_name="/perform_search"
        )
        json_object = llm(result)
        """
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        }    
        url = f"https://www.google.com/search?q=impressum {verein}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        impressum_div = soup.find('body')
        contact_detailes = impressum_div.text
        json_object = llm(contact_detailes)
        """
        #vereine.append(contact_detailes)
        vereine.append(json_object)
        
    dicts = [json.loads(item) for item in vereine] 
    df = pd.DataFrame(dicts)
    return df
    #return dicts

# Create the Gradio interface
with gr.Blocks(css=custom_css) as demo:
    with gr.Row():
        #details_output = gr.DataFrame(label="Ausgabe", elem_id="md")        
        details_output = gr.Textbox(label="Ausgabe")  
    with gr.Row():
        ort_input = gr.Textbox(label="Ort eingeben", placeholder="ask anything...")      
    with gr.Row():         
        button = gr.Button("Senden")    

    # Connect the button to the function
    button.click(fn=process_ort, inputs=ort_input, outputs=details_output)   

# Launch the Gradio application
demo.launch()