openfree commited on
Commit
dd4c857
ยท
verified ยท
1 Parent(s): 0dd51fb

Delete app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +0 -202
app-backup.py DELETED
@@ -1,202 +0,0 @@
1
- import gradio as gr
2
- import requests
3
- import json
4
- import os
5
- from datetime import datetime, timedelta
6
- from bs4 import BeautifulSoup # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๊ธฐ ์œ„ํ•ด ์‚ฌ์šฉ
7
- from huggingface_hub import InferenceClient # LLM ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”
8
-
9
- # ํ•„์š”ํ•œ ํŒจํ‚ค์ง€ ์„ค์น˜ (ํ•„์š”ํ•œ ๊ฒฝ์šฐ ์ฃผ์„์„ ์ œ๊ฑฐํ•˜๊ณ  ์‹คํ–‰)
10
- # !pip install bs4 huggingface_hub
11
-
12
- # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ (API ํ‚ค๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๊ด€๋ฆฌ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
13
- API_KEY = os.getenv("SERPHOUSE_API_KEY") # ๋ณธ์ธ์˜ SerpHouse API ํ‚ค๋ฅผ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
14
- HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API ํ† ํฐ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
15
-
16
- MAJOR_COUNTRIES = [
17
- "United States", "United Kingdom", "Canada", "Australia", "Germany",
18
- "France", "Japan", "South Korea", "China", "India",
19
- "Brazil", "Mexico", "Russia", "Italy", "Spain",
20
- "Netherlands", "Sweden", "Switzerland", "Norway", "Denmark",
21
- "Finland", "Belgium", "Austria", "New Zealand", "Ireland",
22
- "Singapore", "Hong Kong", "Israel", "United Arab Emirates", "Saudi Arabia",
23
- "South Africa", "Turkey", "Egypt", "Poland", "Czech Republic",
24
- "Hungary", "Greece", "Portugal", "Argentina", "Chile",
25
- "Colombia", "Peru", "Venezuela", "Thailand", "Malaysia",
26
- "Indonesia", "Philippines", "Vietnam", "Pakistan", "Bangladesh"
27
- ]
28
-
29
- def search_serphouse(query, country, page=1, num_result=10):
30
- url = "https://api.serphouse.com/serp/live"
31
-
32
- now = datetime.utcnow()
33
- yesterday = now - timedelta(days=1)
34
- date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
35
-
36
- payload = {
37
- "data": {
38
- "q": query,
39
- "domain": "google.com",
40
- "loc": country,
41
- "lang": "en",
42
- "device": "desktop",
43
- "serp_type": "news",
44
- "page": str(page),
45
- "verbatim": "1",
46
- "num": str(num_result),
47
- "date_range": date_range
48
- }
49
- }
50
-
51
- headers = {
52
- "accept": "application/json",
53
- "content-type": "application/json",
54
- "authorization": f"Bearer {API_KEY}"
55
- }
56
-
57
- try:
58
- response = requests.post(url, json=payload, headers=headers)
59
- response.raise_for_status()
60
- return response.json()
61
- except requests.RequestException as e:
62
- error_msg = f"Error: {str(e)}"
63
- if response.text:
64
- error_msg += f"\nResponse content: {response.text}"
65
- return {"error": error_msg}
66
-
67
- def format_results_from_raw(results):
68
- try:
69
- if isinstance(results, dict) and "error" in results:
70
- return "Error: " + results["error"], []
71
-
72
- if not isinstance(results, dict):
73
- raise ValueError("๊ฒฐ๊ณผ๊ฐ€ ์‚ฌ์ „ ํ˜•์‹์ด ์•„๋‹™๋‹ˆ๋‹ค.")
74
-
75
- # 'results' ํ‚ค ๋‚ด๋ถ€์˜ ๊ตฌ์กฐ ํ™•์ธ (์ค‘์ฒฉ๋œ 'results' ์ฒ˜๋ฆฌ)
76
- if 'results' in results:
77
- results_content = results['results']
78
- if 'results' in results_content:
79
- results_content = results_content['results']
80
- # 'news' ํ‚ค ํ™•์ธ
81
- if 'news' in results_content:
82
- news_results = results_content['news']
83
- else:
84
- news_results = []
85
- else:
86
- news_results = []
87
- else:
88
- news_results = []
89
-
90
- if not news_results:
91
- return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
92
-
93
- articles = []
94
-
95
- for idx, result in enumerate(news_results, 1):
96
- title = result.get("title", "์ œ๋ชฉ ์—†์Œ")
97
- link = result.get("url", result.get("link", "#"))
98
- snippet = result.get("snippet", "๋‚ด์šฉ ์—†์Œ")
99
- channel = result.get("channel", result.get("source", "์•Œ ์ˆ˜ ์—†์Œ"))
100
- time = result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„"))
101
- image_url = result.get("img", result.get("thumbnail", ""))
102
-
103
- articles.append({
104
- "title": title,
105
- "link": link,
106
- "snippet": snippet,
107
- "channel": channel,
108
- "time": time,
109
- "image_url": image_url
110
- })
111
-
112
- return "", articles
113
-
114
- except Exception as e:
115
- error_message = f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
116
- return "Error: " + error_message, []
117
-
118
- def serphouse_search(query, country):
119
- # ํŽ˜์ด์ง€์™€ ๊ฒฐ๊ณผ ์ˆ˜์˜ ๊ธฐ๋ณธ๊ฐ’์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
120
- page = 1
121
- num_result = 10
122
- results = search_serphouse(query, country, page, num_result)
123
- error_message, articles = format_results_from_raw(results)
124
- return error_message, articles
125
-
126
- # LLM ์„ค์ •
127
- hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
128
-
129
- def summarize_article(url):
130
- try:
131
- # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ
132
- response = requests.get(url)
133
- response.raise_for_status()
134
- soup = BeautifulSoup(response.text, 'html.parser')
135
- # ๋ชจ๋“  ํ…์ŠคํŠธ๋ฅผ ๏ฟฝ๏ฟฝ๏ฟฝ์ถœ (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)
136
- text = ' '.join([p.get_text() for p in soup.find_all('p')])
137
- if not text.strip():
138
- return "๊ธฐ์‚ฌ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
139
-
140
- # ์š”์•ฝ ์ƒ์„ฑ
141
- prompt = f"๋‹ค์Œ ์˜์–ด ๊ธฐ์‚ฌ๋ฅผ ํ•œ๊ตญ์–ด๋กœ 3๋ฌธ์žฅ์œผ๋กœ ์š”์•ฝํ•˜์„ธ์š”:\n{text}"
142
- summary = hf_client.text_generation(prompt, max_new_tokens=500)
143
- return summary
144
- except Exception as e:
145
- return f"์š”์•ฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
146
-
147
- css = """
148
- footer {
149
- visibility: hidden;
150
- }
151
- """
152
-
153
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
154
- with gr.Blocks(css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
155
- gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 10๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
156
-
157
- with gr.Column():
158
- with gr.Row():
159
- query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
160
- country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
161
- search_button = gr.Button("๊ฒ€์ƒ‰")
162
-
163
- article_outputs = []
164
-
165
- def search_and_display(query, country):
166
- error_message, articles = serphouse_search(query, country)
167
- if error_message:
168
- return gr.update(visible=True, value=error_message)
169
- else:
170
- # ๊ธฐ์กด ์ถœ๋ ฅ๋ฌผ ์ œ๊ฑฐ
171
- for components in article_outputs:
172
- for component in components:
173
- component.visible = False
174
- article_outputs.clear()
175
-
176
- # ๊ฐ ๊ธฐ์‚ฌ์— ๋Œ€ํ•ด ์ถœ๋ ฅ ์ƒ์„ฑ
177
- for article in articles:
178
- with gr.Column():
179
- title = gr.Markdown(f"### [{article['title']}]({article['link']})")
180
- image = gr.Image(value=article['image_url'], visible=bool(article['image_url']), shape=(200, 150))
181
- snippet = gr.Markdown(f"**์š”์•ฝ:** {article['snippet']}")
182
- info = gr.Markdown(f"**์ถœ์ฒ˜:** {article['channel']} | **์‹œ๊ฐ„:** {article['time']}")
183
- analyze_button = gr.Button("๋ถ„์„")
184
- summary_output = gr.Markdown(visible=False)
185
-
186
- def analyze_article(url):
187
- summary = summarize_article(url)
188
- summary_output.update(value=summary, visible=True)
189
-
190
- analyze_button.click(analyze_article, inputs=gr.State(article['link']), outputs=summary_output)
191
-
192
- article_outputs.append([title, image, snippet, info, analyze_button, summary_output])
193
-
194
- return gr.update()
195
-
196
- search_button.click(
197
- search_and_display,
198
- inputs=[query, country],
199
- outputs=[]
200
- )
201
-
202
- iface.launch(auth=("gini", "pick"))