Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -62,7 +62,6 @@ def list_of_clubs(ort):
|
|
62 |
target_div = soup.select_one('div.row-cols-1:nth-child(4)')
|
63 |
|
64 |
if target_div:
|
65 |
-
#links = [urljoin(base_url, a['href']) for a in target_div.find_all('a', href=True)]
|
66 |
texts = [a.text for a in target_div.find_all('a', href=True)]
|
67 |
all_links_text.extend(texts)
|
68 |
else:
|
@@ -74,56 +73,28 @@ def list_of_clubs(ort):
|
|
74 |
all_links_text = all_links_text[0::2]
|
75 |
return all_links_text
|
76 |
|
77 |
-
def google_search(query):
|
78 |
-
headers = {
|
79 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
80 |
-
}
|
81 |
-
# Führt die Suche durch und erhält das erste Ergebnis
|
82 |
-
for result in search(query, num_results=1):
|
83 |
-
url = result
|
84 |
-
break
|
85 |
-
|
86 |
-
response = requests.get(url)
|
87 |
-
soup = BeautifulSoup(response.text, 'html.parser')
|
88 |
-
#first_div = soup.find('div', class_='MjjYud')
|
89 |
-
first_div = soup.find('body')
|
90 |
-
return first_div.text.strip()
|
91 |
-
|
92 |
def process_ort(ort):
|
93 |
links_text = list_of_clubs(ort)
|
94 |
vereine = []
|
|
|
95 |
for verein in links_text:
|
96 |
-
|
97 |
-
|
98 |
headers = {
|
99 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
100 |
}
|
101 |
|
102 |
-
#search_results = google_search(search_term)
|
103 |
url = f"https://www.google.com/search?q=impressum {verein}"
|
104 |
response = requests.get(url, headers=headers)
|
105 |
soup = BeautifulSoup(response.content, 'html.parser')
|
106 |
impressum_div = soup.find('body')
|
107 |
-
#return impressum_div.text
|
108 |
-
#uri = f"https://www.google.com/search?q={query}"
|
109 |
-
#response = requests.get(uri)
|
110 |
-
#soup = BeautifulSoup(response.text, 'html.parser')
|
111 |
-
#first_div = soup.find('body')
|
112 |
-
#erg = llm(first_div.text)
|
113 |
-
#return first_div.text
|
114 |
contact_detailes = impressum_div.text
|
115 |
-
|
116 |
-
vereine.append(contact_detailes)
|
117 |
-
|
118 |
-
#vereine.extend(json_object)
|
119 |
|
120 |
return vereine
|
121 |
|
122 |
-
|
123 |
-
return links_text
|
124 |
-
|
125 |
demo = gr.Interface(
|
126 |
-
#fn=google_search,
|
127 |
fn=process_ort,
|
128 |
inputs=gr.Textbox(lines=1, placeholder="Geben Sie Ihre Suchanfrage ein..."),
|
129 |
outputs="text",
|
|
|
62 |
target_div = soup.select_one('div.row-cols-1:nth-child(4)')
|
63 |
|
64 |
if target_div:
|
|
|
65 |
texts = [a.text for a in target_div.find_all('a', href=True)]
|
66 |
all_links_text.extend(texts)
|
67 |
else:
|
|
|
73 |
all_links_text = all_links_text[0::2]
|
74 |
return all_links_text
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
def process_ort(ort):
|
77 |
links_text = list_of_clubs(ort)
|
78 |
vereine = []
|
79 |
+
|
80 |
for verein in links_text:
|
81 |
+
|
|
|
82 |
headers = {
|
83 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
84 |
}
|
85 |
|
|
|
86 |
url = f"https://www.google.com/search?q=impressum {verein}"
|
87 |
response = requests.get(url, headers=headers)
|
88 |
soup = BeautifulSoup(response.content, 'html.parser')
|
89 |
impressum_div = soup.find('body')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
contact_detailes = impressum_div.text
|
91 |
+
json_object = llm(contact_detailes)
|
92 |
+
#vereine.append(contact_detailes)
|
93 |
+
vereine.append(json_object)
|
|
|
94 |
|
95 |
return vereine
|
96 |
|
|
|
|
|
|
|
97 |
demo = gr.Interface(
|
|
|
98 |
fn=process_ort,
|
99 |
inputs=gr.Textbox(lines=1, placeholder="Geben Sie Ihre Suchanfrage ein..."),
|
100 |
outputs="text",
|