LejobuildYT commited on
Commit
99fa459
·
verified ·
1 Parent(s): bb4754a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -46
app.py CHANGED
@@ -1,7 +1,10 @@
 
1
  import re
2
  import requests
3
  from bs4 import BeautifulSoup
4
- from duckduckgo_search import ddg
 
 
5
  from googlesearch import search as google_search
6
  # For Bing we use SerpAPI (requires SERPAPI_API_KEY env var)
7
  from serpapi import GoogleSearch as SerpBing
@@ -9,63 +12,71 @@ from rake_nltk import Rake
9
  import gradio as gr
10
  from transformers import pipeline
11
 
12
- # 1) Keyword extractor
13
  rake = Rake()
14
  def extract_keywords(text):
15
  rake.extract_keywords_from_text(text)
16
- return [kw for kw, score in rake.get_ranked_phrases_with_scores()[:5]]
 
17
 
18
  # 2) Search functions
 
 
 
 
 
 
19
 
20
- def bing_search(query, api_key, num=5):
 
 
 
21
  params = {"engine": "bing", "q": query, "api_key": api_key}
22
  client = SerpBing(params)
23
  results = client.get_dict().get('organic_results', [])
 
24
  return [r['link'] for r in results if not r.get('sponsored')][:num]
25
 
26
- def google_search_links(query, num=5):
27
- return list(google_search(query, num_results=num))
28
-
29
- def ddg_search_links(query, num=5):
30
- return [r['href'] for r in ddg(query, max_results=num)]
31
-
32
- # 3) Fetch page text
33
-
34
  def fetch_text(url):
35
  try:
36
  resp = requests.get(url, timeout=3)
37
  soup = BeautifulSoup(resp.text, 'html.parser')
38
  texts = soup.find_all(['p', 'h1', 'h2', 'h3'])
39
- return ' '.join([t.get_text() for t in texts])
40
  except:
41
  return ''
42
 
43
- # 4) Model loader
44
  generator = pipeline('text-generation', model='google/flan-t5-small', trust_remote_code=True)
45
 
46
  def model_answer(prompt):
47
  return generator(prompt, max_length=256, do_sample=False)[0]['generated_text']
48
 
49
- # 5) Check for forbidden search
50
- VERBOT = [
51
  "bitte nicht im internet suchen", "keine websuche", "mach das ohne web",
52
  "ohne online", "nur dein wissen", "nicht googeln", "such nicht"
53
  ]
54
- def search_forbidden(prompt):
55
  pl = prompt.lower()
56
- return any(v in pl for v in VERBOT)
57
 
58
- # 6) Check uncertainty
59
- UNCERT = ["ich weiß nicht", "nicht in meinen daten", "keine information", "ich bin mir nicht sicher"]
60
- def is_uncertain(answer):
 
 
61
  al = answer.lower()
62
- return any(u in al for u in UNCERT)
63
 
64
- # 7) Combined logic
65
  def process(prompt, web_enabled, serpapi_key):
66
- # Extract keywords
67
- keys = extract_keywords(prompt)
68
- # Base answer
 
 
69
  if search_forbidden(prompt):
70
  ans = model_answer(prompt)
71
  if is_uncertain(ans):
@@ -75,39 +86,47 @@ def process(prompt, web_enabled, serpapi_key):
75
  "aber es kann ungenau sein.\n\n" + ans
76
  )
77
  return ans
 
 
78
  if not web_enabled:
79
  return model_answer(prompt)
80
- # Web enabled, try model first
 
81
  ans = model_answer(prompt)
 
82
  if not is_uncertain(ans):
83
  return ans
84
- # Uncertain: perform multi-search
85
- # Google
86
- g = google_search_links(' '.join(keys))
87
- # DuckDuckGo
88
- d = ddg_search_links(' '.join(keys))
89
- # Bing
90
- b = bing_search(' '.join(keys), serpapi_key)
91
- urls = list(dict.fromkeys(g + d + b))
92
- # Fetch and combine texts
93
- texts = [fetch_text(u) for u in urls[:3]]
 
94
  combined = '\n'.join(texts)
95
- # Summarize
96
- summary = generator(combined, max_length=256)[0]['generated_text']
97
  return summary
98
 
99
  # 8) Gradio UI
100
- def main(prompt, web_enabled, serpapi_key):
101
- return process(prompt, web_enabled, serpapi_key)
102
-
103
  with gr.Blocks() as demo:
104
  gr.Markdown("# Intelligente KI mit Multi-Engine-Websuche")
105
  with gr.Row():
106
- prompt = gr.Textbox(label="Dein Prompt", lines=3)
107
- web = gr.Checkbox(label="Websuche aktivieren", value=False)
108
- serp = gr.Textbox(label="SerpAPI Key (für Bing)", placeholder="Optional für Bing-Suche")
109
  btn = gr.Button("Antwort generieren")
110
  output = gr.Textbox(label="Antwort", lines=10)
111
- btn.click(main, inputs=[prompt, web, serp], outputs=output)
 
 
 
 
 
112
 
113
  demo.launch()
 
 
1
+ ```python
2
  import re
3
  import requests
4
  from bs4 import BeautifulSoup
5
+ # DuckDuckGo Search: use DDGS class
6
+ from duckduckgo_search import DDGS
7
+ # Google search
8
  from googlesearch import search as google_search
9
  # For Bing we use SerpAPI (requires SERPAPI_API_KEY env var)
10
  from serpapi import GoogleSearch as SerpBing
 
12
  import gradio as gr
13
  from transformers import pipeline
14
 
15
+ # 1) Keyword extractor using RAKE
16
  rake = Rake()
17
  def extract_keywords(text):
18
  rake.extract_keywords_from_text(text)
19
+ # return top 5 keywords
20
+ return [kw for kw in rake.get_ranked_phrases()[:5]]
21
 
22
  # 2) Search functions
23
+ # DuckDuckGo using DDGS
24
+ def ddg_search_links(query, num=5):
25
+ ddgs = DDGS()
26
+ results = ddgs.text(query, max_results=num)
27
+ # results are dicts with 'href'
28
+ return [r['href'] for r in results]
29
 
30
+ def google_search_links(query, num=5):
31
+ return list(google_search(query, num_results=num))
32
+
33
+ def bing_search_links(query, api_key, num=5):
34
  params = {"engine": "bing", "q": query, "api_key": api_key}
35
  client = SerpBing(params)
36
  results = client.get_dict().get('organic_results', [])
37
+ # filter sponsored and return links
38
  return [r['link'] for r in results if not r.get('sponsored')][:num]
39
 
40
+ # 3) Fetch page text for summarization
 
 
 
 
 
 
 
41
  def fetch_text(url):
42
  try:
43
  resp = requests.get(url, timeout=3)
44
  soup = BeautifulSoup(resp.text, 'html.parser')
45
  texts = soup.find_all(['p', 'h1', 'h2', 'h3'])
46
+ return ' '.join(t.get_text() for t in texts)
47
  except:
48
  return ''
49
 
50
+ # 4) Model loader: lightweight HF model
51
  generator = pipeline('text-generation', model='google/flan-t5-small', trust_remote_code=True)
52
 
53
  def model_answer(prompt):
54
  return generator(prompt, max_length=256, do_sample=False)[0]['generated_text']
55
 
56
+ # 5) Detect forbidden search phrases
57
+ FORBID_PATTERNS = [
58
  "bitte nicht im internet suchen", "keine websuche", "mach das ohne web",
59
  "ohne online", "nur dein wissen", "nicht googeln", "such nicht"
60
  ]
61
+ def search_forbidden(prompt: str) -> bool:
62
  pl = prompt.lower()
63
+ return any(phrase in pl for phrase in FORBID_PATTERNS)
64
 
65
+ # 6) Check if answer is uncertain
66
+ UNCERTAIN_MARKERS = [
67
+ "ich weiß nicht", "nicht in meinen daten", "keine information", "ich bin mir nicht sicher"
68
+ ]
69
+ def is_uncertain(answer: str) -> bool:
70
  al = answer.lower()
71
+ return any(marker in al for marker in UNCERTAIN_MARKERS)
72
 
73
+ # 7) Core processing logic
74
  def process(prompt, web_enabled, serpapi_key):
75
+ # Extract keywords for search
76
+ keywords = extract_keywords(prompt)
77
+ query = ' '.join(keywords)
78
+
79
+ # If user forbids search
80
  if search_forbidden(prompt):
81
  ans = model_answer(prompt)
82
  if is_uncertain(ans):
 
86
  "aber es kann ungenau sein.\n\n" + ans
87
  )
88
  return ans
89
+
90
+ # If websearch disabled, just use model
91
  if not web_enabled:
92
  return model_answer(prompt)
93
+
94
+ # Websearch enabled: model first
95
  ans = model_answer(prompt)
96
+ # If model confident, return
97
  if not is_uncertain(ans):
98
  return ans
99
+
100
+ # Model uncertain: perform multi-search
101
+ links = []
102
+ links += google_search_links(query)
103
+ links += ddg_search_links(query)
104
+ links += bing_search_links(query, serpapi_key)
105
+ # Deduplicate
106
+ unique_links = list(dict.fromkeys(links))
107
+
108
+ # Fetch top 3 pages
109
+ texts = [fetch_text(u) for u in unique_links[:3]]
110
  combined = '\n'.join(texts)
111
+ # Summarize combined content
112
+ summary = generator(combined, max_length=256, do_sample=False)[0]['generated_text']
113
  return summary
114
 
115
  # 8) Gradio UI
 
 
 
116
  with gr.Blocks() as demo:
117
  gr.Markdown("# Intelligente KI mit Multi-Engine-Websuche")
118
  with gr.Row():
119
+ prompt_input = gr.Textbox(label="Dein Prompt", lines=3)
120
+ web_switch = gr.Checkbox(label="Websuche aktivieren", value=False)
121
+ serp_input = gr.Textbox(label="SerpAPI Key (für Bing)", placeholder="Optional für Bing-Suche")
122
  btn = gr.Button("Antwort generieren")
123
  output = gr.Textbox(label="Antwort", lines=10)
124
+
125
+ btn.click(
126
+ fn=process,
127
+ inputs=[prompt_input, web_switch, serp_input],
128
+ outputs=output
129
+ )
130
 
131
  demo.launch()
132
+ ```