google_search / app.py
mgokg's picture
Update app.py
3f09122 verified
raw
history blame
8.52 kB
import gradio as gr
import requests
from bs4 import BeautifulSoup
from gradio_client import Client
from urllib.parse import urljoin
import pandas as pd
from io import StringIO
import json
import groq
import os
google_api_key = os.getenv('google_search')
API_URL = "https://blavken-flowiseblav.hf.space/api/v1/prediction/fbc118dc-ec00-4b59-acff-600648958be3"
api_key = os.getenv('groq')
client = groq.Client(api_key=api_key)
custom_css = """
#md {
height: 200px;
font-size: 30px;
background: #121212;
padding: 20px;
color: white;
border: 1 px solid white;
font-size:10px;
}
"""
def perplexica_search(payloads):
client = Client("mgokg/PerplexicaApi")
result = client.predict(
prompt=f"{payloads}",
optimization_mode="balanced",
api_name="/question"
)
return result
def query(payload):
response = requests.post(API_URL, json=payload)
return response.json()
def google_search(payloads):
output = query({
"question": f"{payloads}",
})
#search_query = f"{payloads} antworte kurz und knapp. antworte auf deutsch. du findest die antwort hier:\n {output}"
texte=""
for o in output:
texte +=o
return output
scheme = """
{"name":"","email":"","website":""}
"""
def llama(messages):
client = Client("mgokg/selenium-screenshot-gradio")
result = client.predict(
message=f"{messages}",
api_name="/predict"
)
return result
client = Client("AiActivity/AI-Assistant")
result = client.predict(
message={"text":f"instruction: return a valid json object only, no comments or explanaition, fill in the missing information. use this json scheme.\n {scheme}\n leave blank if information is not verfügbar. here is the information for the values:\n{message}","files":[]},
api_name="/chat"
)
print(result)
def llm(message):
message = f'return a json object with the keys: name,email,phone,website \n the values can be found here, leave blank if value is not available:\n {message} \n return a json object only. no text, no explanaition'
try:
completion = client.chat.completions.create(
model="llama3-70b-8192",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"{message}"}
],
)
return completion.choices[0].message.content
except Exception as e:
return f"Error in response generation: {str(e)}"
def qwen(jsondata):
client = Client("Qwen/Qwen2.5-72B-Instruct")
result = client.predict(
query= f'return a json object with the keys: name,email,phone,website for each verein \n the values can be found here, leave blank if value is not available:\n {jsondata} \n return a json object only. no text, no explanaition',
history=[],
system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
api_name="/model_chat"
)
return result
def list_of_clubs(ort):
base_url = "https://vereine-in-deutschland.net"
all_links_text = []
initial_url = f"{base_url}/vereine/Bayern/{ort}"
try:
response = requests.get(initial_url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
# Determine the last page
link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
last_page = 10
if link_element and 'href' in link_element.attrs:
href = link_element['href']
last_page = int(href.split('/')[-1])
# Loop through all pages and collect links
for page_number in range(1, last_page + 1):
page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
response = requests.get(page_url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
target_div = soup.select_one('div.row-cols-1:nth-child(4)')
if target_div:
texts = [a.text for a in target_div.find_all('a', href=True)]
all_links_text.extend(texts)
else:
print(f"Target div not found on page {page_number}")
except Exception as e:
return str(e), []
all_links_text = all_links_text[0::2]
return all_links_text
def process_ort(ort):
#links_text = list_of_clubs(ort)
#return links_text
linkstext = [
"Angels Wallenfels e.V.",
"Bärenstarke Typen e.V.",
"BIZ for KIDS e.V.",
"Blasmusik Neuengrün/Schlegelshaid",
"CSU Ortsverband Neuengrün/Schlegelshaid",
"CSU Ortsverband Wallenfels",
"Dorfgemeinschaft Schnaid e.V.",
"Elternbeirat Grundschule Wallenfels",
"Erholungsverein Schnappenhammer",
"FC Bayern Fanclub Wolfersgrün",
"FC Wallenfels 1920 e.V.",
"Flößergemeinschaft Wallenfels",
"Förderkreis Bingham Wallenfels",
"Frankenlust e.V.",
"Frankenwaldverein Ortsgruppe Schnaid",
"Frankenwaldverein Ortsgruppe Wallenfels",
"Frankenwaldfreunde Wolfersgrün",
"Frauenclub Schnaid",
"Frauenunion Wallenfels",
"Freie Wähler Wallenfels",
"Freiwillige Feuerwehr Geuser",
"Freiwillige Feuerwehr Neuengrün",
"Freiwillige Feuerwehr Schnaid",
"Freiwillige Feuerwehr Wallenfels e.V.",
"Freiwillige Feuerwehr Wolfersgrün",
"Gartenbauverein Neuengrün",
"Gartenbauverein Wallenfels",
'Gesangverein "Cäcilia" 1885 e.V.',
"Gymnastikgruppe",
"Heimatgeschichtlicher Arbeitskreis e.V.",
"Imkerverein Wallenfels",
"Jugendfördergemeinschaft Oberes Rodachtal e.V.",
"JU Ortsverband Wallenfels",
"KAB Ortsverband Wallenfels",
"KAB Ortsverband Wolfersgrün",
"Kath. Frauenbund Wallenfels",
"Kath. Jugend Neuengrün/Schlegelshaid",
"Kath. Jugend Wallenfels",
"Kath. Jugend Wolfersgrün",
"Kindergarten-Beirat Wallenfels",
"Kirchweihgesellschaft Wallenfels e.V.",
"Musikverein Wallenfels",
"Ortsgruppe Frankenwald der Dt. Kakteengesellschaft",
"Pfarrgemeinderat Neuengrün",
"Pfarrgemeinderat Schnaid",
"Pfarrgemeinderat Wallenfels",
"Pfarrgemeinderat Wolfersgrün",
"RV Wilderodachbote",
"Seniorenclub Wallenfels",
"Soldatenkameradschaft Neuengrün/Schlegelshaid",
"Soldatenkameradschaft Wallenfels e.V.",
"SPD Ortsverband Wallenfels",
"SV Wolfersgrün/Neuengrün",
"Tennisverein 1982 Wallenfels e.V.",
"Theaterfreunde Wallenfels e.V.",
"Trommlerzug Wallenfels",
"TTC Wallenfels 1963 e.V.",
"Unterstützungskasse Wallenfels",
"VdK Schnaid",
"VdK Wallenfels",
"Vereinigte Schützengesellschaft 1860 Wallenfels e.V.",
"Wallnfelse Fousanoacht e.V.",
"Wasserwacht Ortsgruppe Wallenfels"
]
vereine = []
for verein in linkstext:
prompt=f"{verein}",
result = llama(prompt)
vereine.append(result)
print(result)
#data = json.loads(vereine)
#df = pd.DataFrame(vereine)
return vereine
for verein in links_text:
client = Client("mgokg/gemini-2.0-flash-exp")
result = client.predict(
prompt=f"impressum {verein}",
api_name="/perform_search"
)
#json_object = llm(result)
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
url = f"https://www.google.com/search?q=impressum {verein}"
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
impressum_div = soup.find('body')
contact_detailes = impressum_div.text
json_object = llm(contact_detailes)
"""
vereine.append(result)
#dicts = [json.loads(item) for item in vereine]
#df = pd.DataFrame(dicts)
#return df
return vereine
# Create the Gradio interface
with gr.Blocks(css=custom_css) as demo:
with gr.Row():
#details_output = gr.DataFrame(label="Ausgabe", elem_id="md")
details_output = gr.Textbox(label="Ausgabe")
with gr.Row():
ort_input = gr.Textbox(label="Ort eingeben", placeholder="ask anything...")
with gr.Row():
button = gr.Button("Senden")
# Connect the button to the function
button.click(fn=process_ort, inputs=ort_input, outputs=details_output)
# Launch the Gradio application
demo.launch()