File size: 5,253 Bytes
ca5f84f
62040f2
 
b2ba4fb
9e7dfc2
 
 
 
b2ba4fb
dc25526
b2ba4fb
9edd3cc
 
 
 
 
6cd0567
 
9edd3cc
6cd0567
9edd3cc
6cd0567
 
 
304b184
6cd0567
 
 
9edd3cc
 
 
b2ba4fb
9edd3cc
 
 
 
 
 
 
 
 
 
 
 
 
 
8e85569
b764634
cf04987
b2ba4fb
 
09e15cd
b2ba4fb
 
1e4e902
b2ba4fb
 
 
 
 
ca5f84f
9edd3cc
 
 
 
 
 
 
 
 
 
6c2fae1
 
 
 
 
 
 
 
 
af406c2
6c2fae1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3d2941
cd0f9aa
9edd3cc
 
 
 
 
 
fbd0747
 
f68fbd1
2d583fc
 
 
f68fbd1
2d583fc
99c69ca
 
2139770
ba251cc
99c69ca
 
 
 
10ae401
2d583fc
10ae401
f68fbd1
 
1d394b2
 
 
 
ca5f84f
6cd0567
 
 
61b658f
 
6cd0567
db8fb45
6cd0567
 
 
 
9edd3cc
6cd0567
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
import requests
from bs4 import BeautifulSoup
from gradio_client import Client
from urllib.parse import urljoin
import pandas as pd
from io import StringIO
import json
import groq
import os

google_api_key = os.getenv('google_search')
API_URL = "https://blavken-flowiseblav.hf.space/api/v1/prediction/fbc118dc-ec00-4b59-acff-600648958be3"
api_key = os.getenv('groq')
client = groq.Client(api_key=api_key)

custom_css = """
#md {
    height: 200px;  
    font-size: 30px;
    background: #121212;
    padding: 20px;
    color: white;
    border: 1 px solid white;
    font-size:10px;
}
"""

def query(payload):
    response = requests.post(API_URL, json=payload)
    return response.json()

def google_search(payloads):
    output = query({
        "question": f"{payloads}",
    })
    print(output)
    #return result_text
      
    # Formuliere die Antwort
    #search_query = f"{payloads} antworte kurz und knapp. antworte auf deutsch. du findest die antwort hier:\n {output}"
    #result = predict(search_query)
    texte=""
    for o in output:
        texte +=o
    return texte

def llm(message):
    message = f"return a json object with the keys: name,email,phone,website \n the values can be found here, leave blank if value is not available:\n {message} \n return a json object only. no text, no explanaition"
    try:        
        completion = client.chat.completions.create(
            model="llama3-70b-8192",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"{message}"}
            ],
        )       
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error in response generation: {str(e)}"

def qwen(jsondata):
    client = Client("Qwen/Qwen2.5-72B-Instruct")
    result = client.predict(
		query= f"return valid json \n {jsondata}",
		history=[],
		system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
		api_name="/model_chat"
    )
    return result

def list_of_clubs(ort):
    base_url = "https://vereine-in-deutschland.net"
    all_links_text = []
    initial_url = f"{base_url}/vereine/Bayern/{ort}"

    try:
        response = requests.get(initial_url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Determine the last page
        link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
        last_page = 10
        if link_element and 'href' in link_element.attrs:
            href = link_element['href']
            last_page = int(href.split('/')[-1])

        # Loop through all pages and collect links
        for page_number in range(1, last_page + 1):
            page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
            response = requests.get(page_url)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')
            target_div = soup.select_one('div.row-cols-1:nth-child(4)')

            if target_div:
                texts = [a.text for a in target_div.find_all('a', href=True)]
                all_links_text.extend(texts)
            else:
                print(f"Target div not found on page {page_number}")

    except Exception as e:
        return str(e), []

    all_links_text = all_links_text[0::2]
    return all_links_text

def process_ort(ort):
    links_text = list_of_clubs(ort)
    return links_text
    vereine = []
    for verein in links_text:
        prompt=f"impressum {verein}",
        result = google_search(prompt)
        json_data = llm(result)
        vereine.append(result)
        
    for verein in links_text:
        client = Client("mgokg/gemini-2.0-flash-exp")
        result = client.predict(
    		prompt=f"impressum {verein}",
    		api_name="/perform_search"
        )
        #json_object = llm(result)
        """
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        }    
        url = f"https://www.google.com/search?q=impressum {verein}"
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        impressum_div = soup.find('body')
        contact_detailes = impressum_div.text
        json_object = llm(contact_detailes)
        """
        #vereine.append(contact_detailes)
        vereine.append(result)
        #vereine.append(json_object)
    #dicts = [json.loads(item) for item in vereine] 
    #df = pd.DataFrame(dicts)
    #return df
    return vereine

# Create the Gradio interface
with gr.Blocks(css=custom_css) as demo:
    with gr.Row():
        #details_output = gr.DataFrame(label="Ausgabe", elem_id="md")        
        details_output = gr.Textbox(label="Ausgabe")  
    with gr.Row():
        ort_input = gr.Textbox(label="Ort eingeben", placeholder="ask anything...")      
    with gr.Row():         
        button = gr.Button("Senden")    

    # Connect the button to the function
    button.click(fn=google_search, inputs=ort_input, outputs=details_output)   

# Launch the Gradio application
demo.launch()