Spaces:
Sleeping
Sleeping
feat: dayli
Browse files- app.py +3 -1
- requirements.txt +1 -0
- tools/feed_processor.py +117 -0
- tools/rss_get_papers.py +55 -0
app.py
CHANGED
@@ -3,6 +3,7 @@ import datetime
|
|
3 |
import requests
|
4 |
import pytz
|
5 |
import yaml
|
|
|
6 |
from tools.final_answer import FinalAnswerTool
|
7 |
from tools.visit_webpage import VisitWebpageTool
|
8 |
|
@@ -37,6 +38,7 @@ def get_current_time_in_timezone(timezone: str) -> str:
|
|
37 |
|
38 |
final_answer = FinalAnswerTool()
|
39 |
visit_page = VisitWebpageTool()
|
|
|
40 |
|
41 |
model = HfApiModel(
|
42 |
max_tokens=2096,
|
@@ -54,7 +56,7 @@ with open("prompts.yaml", 'r') as stream:
|
|
54 |
|
55 |
agent = CodeAgent(
|
56 |
model=model,
|
57 |
-
tools=[final_answer, image_generation_tool, visit_page], ## add your tools here (don't remove final answer)
|
58 |
max_steps=6,
|
59 |
verbosity_level=1,
|
60 |
grammar=None,
|
|
|
3 |
import requests
|
4 |
import pytz
|
5 |
import yaml
|
6 |
+
from tools.rss_get_papers import HFDaylyPapperTool
|
7 |
from tools.final_answer import FinalAnswerTool
|
8 |
from tools.visit_webpage import VisitWebpageTool
|
9 |
|
|
|
38 |
|
39 |
final_answer = FinalAnswerTool()
|
40 |
visit_page = VisitWebpageTool()
|
41 |
+
daily_paper = HFDaylyPapperTool()
|
42 |
|
43 |
model = HfApiModel(
|
44 |
max_tokens=2096,
|
|
|
56 |
|
57 |
agent = CodeAgent(
|
58 |
model=model,
|
59 |
+
tools=[final_answer, image_generation_tool, visit_page, daily_paper], ## add your tools here (don't remove final answer)
|
60 |
max_steps=6,
|
61 |
verbosity_level=1,
|
62 |
grammar=None,
|
requirements.txt
CHANGED
@@ -3,3 +3,4 @@ smolagents
|
|
3 |
requests
|
4 |
duckduckgo_search
|
5 |
pandas
|
|
|
|
3 |
requests
|
4 |
duckduckgo_search
|
5 |
pandas
|
6 |
+
feedparser
|
tools/feed_processor.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
import feedparser
|
3 |
+
from dataclasses import dataclass
|
4 |
+
from typing import List, Optional, Dict, Set
|
5 |
+
from abc import ABC, abstractmethod
|
6 |
+
|
7 |
+
@dataclass
|
8 |
+
class Article:
|
9 |
+
title: str
|
10 |
+
link: str
|
11 |
+
summary: str
|
12 |
+
published: str
|
13 |
+
authors: str
|
14 |
+
pdf_link: Optional[str] = None
|
15 |
+
source: str = None
|
16 |
+
|
17 |
+
class BaseRSSParser(ABC):
|
18 |
+
@abstractmethod
|
19 |
+
def parse_feed(self, feed: feedparser.FeedParserDict) -> List[Article]:
|
20 |
+
"""Парсит RSS-ленту и возвращает список объектов Article."""
|
21 |
+
pass
|
22 |
+
|
23 |
+
class ArxivRSSParser(BaseRSSParser):
|
24 |
+
def parse_feed(self, feed: feedparser.FeedParserDict) -> List[Article]:
|
25 |
+
articles: List[Article] = []
|
26 |
+
for entry in feed.entries:
|
27 |
+
try:
|
28 |
+
title: str = entry.get('title', 'Без названия')
|
29 |
+
link: str = entry.get('link', '')
|
30 |
+
summary: str = entry.get('summary', '')
|
31 |
+
published: str = entry.get('published', 'Неизвестно')
|
32 |
+
authors_list = entry.get('authors', [])
|
33 |
+
authors: str = ', '.join([author.name for author in authors_list]) if authors_list else 'Неизвестно'
|
34 |
+
pdf_link: Optional[str] = next(
|
35 |
+
(l.href for l in entry.get('links', []) if l.type == 'application/pdf'), None
|
36 |
+
)
|
37 |
+
|
38 |
+
article = Article(
|
39 |
+
title=title,
|
40 |
+
link=link,
|
41 |
+
summary=summary,
|
42 |
+
published=published,
|
43 |
+
authors=authors,
|
44 |
+
pdf_link=pdf_link
|
45 |
+
)
|
46 |
+
articles.append(article)
|
47 |
+
except Exception as e:
|
48 |
+
print(f"Ошибка при парсинге записи: {e}")
|
49 |
+
return articles
|
50 |
+
|
51 |
+
## Пример другого парсера для другого RSS-источника
|
52 |
+
class DailyHFRSSParser(BaseRSSParser):
|
53 |
+
def parse_feed(self, feed: feedparser.FeedParserDict) -> List[Article]:
|
54 |
+
# Реализуйте специфическую логику парсинга для другого источника
|
55 |
+
articles: List[Article] = []
|
56 |
+
for entry in feed.entries:
|
57 |
+
# Пример парсинга, замените на актуальные поля
|
58 |
+
title: str = entry.get('title', 'Без названия')
|
59 |
+
link: str = entry.get('link', '')
|
60 |
+
summary: str = entry.get('description', '')
|
61 |
+
published: str = entry.get('pubDate', 'Неизвестно')
|
62 |
+
authors: str = entry.get('author', 'Неизвестно')
|
63 |
+
|
64 |
+
article = Article(
|
65 |
+
title=title,
|
66 |
+
link=link,
|
67 |
+
summary=summary,
|
68 |
+
published=published,
|
69 |
+
authors=authors,
|
70 |
+
source="Daily papers"
|
71 |
+
)
|
72 |
+
articles.append(article)
|
73 |
+
return articles
|
74 |
+
|
75 |
+
|
76 |
+
class RSSFeedFetcher:
|
77 |
+
def __init__(self, feed_url: str):
|
78 |
+
self.feed_url = feed_url
|
79 |
+
|
80 |
+
def fetch_feed(self) -> feedparser.FeedParserDict:
|
81 |
+
"""Загружает и парсит RSS-ленту."""
|
82 |
+
try:
|
83 |
+
feed = feedparser.parse(self.feed_url)
|
84 |
+
if feed.bozo:
|
85 |
+
raise ValueError(f"Ошибка при парсинге RSS-ленты: {feed.bozo_exception}")
|
86 |
+
return feed
|
87 |
+
except Exception as e:
|
88 |
+
print(f"Ошибка при загрузке ленты: {e}")
|
89 |
+
return feedparser.FeedParserDict()
|
90 |
+
|
91 |
+
class RSSFeedProcessor:
|
92 |
+
def __init__(self):
|
93 |
+
self.feed_parsers: Dict[str, BaseRSSParser] = {}
|
94 |
+
self.feed_urls: Dict[str, str] = {}
|
95 |
+
|
96 |
+
def register_feed(self, source_key: str, feed_url: str, parser: BaseRSSParser):
|
97 |
+
self.feed_parsers[source_key] = parser
|
98 |
+
self.feed_urls[source_key] = feed_url
|
99 |
+
|
100 |
+
def get_latest_articles(self, sources: Set[str], count: int = 1) -> List[Article]:
|
101 |
+
all_articles: List[List[Article]] = []
|
102 |
+
for source_key in sources:
|
103 |
+
parser = self.feed_parsers.get(source_key)
|
104 |
+
feed_url = self.feed_urls.get(source_key)
|
105 |
+
if parser and feed_url:
|
106 |
+
fetcher = RSSFeedFetcher(feed_url)
|
107 |
+
feed = fetcher.fetch_feed()
|
108 |
+
articles = parser.parse_feed(feed)
|
109 |
+
all_articles.append(articles[:count])
|
110 |
+
else:
|
111 |
+
print(f"Источник {source_key} не найден или не имеет парсера")
|
112 |
+
|
113 |
+
|
114 |
+
random.shuffle(all_articles)
|
115 |
+
|
116 |
+
|
117 |
+
return all_articles[:count*len(sources)]
|
tools/rss_get_papers.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from typing import Any, Optional
|
3 |
+
from smolagents.tools import Tool
|
4 |
+
import requests
|
5 |
+
import markdownify
|
6 |
+
import smolagents
|
7 |
+
|
8 |
+
from feed_processor import DailyHFRSSParser, RSSFeedProcessor
|
9 |
+
|
10 |
+
class HFDaylyPapperTool(Tool):
|
11 |
+
name = "get_actual_ai_news"
|
12 |
+
description = "Return actual news about AI today."
|
13 |
+
output_type = "string"
|
14 |
+
|
15 |
+
def __init__(self, max_results=10, **kwargs):
|
16 |
+
super().__init__()
|
17 |
+
self.max_results = max_results
|
18 |
+
self.rss_processor = RSSFeedProcessor()
|
19 |
+
self.rss_processor.register_feed(
|
20 |
+
"HuggingFace Daily pappers",
|
21 |
+
"https://jamesg.blog/hf-papers.xml",
|
22 |
+
DailyHFRSSParser(),
|
23 |
+
)
|
24 |
+
|
25 |
+
def forward(self) -> str:
|
26 |
+
try:
|
27 |
+
from markdownify import markdownify
|
28 |
+
from smolagents.utils import truncate_content
|
29 |
+
|
30 |
+
except ImportError as e:
|
31 |
+
raise ImportError(
|
32 |
+
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
33 |
+
) from e
|
34 |
+
|
35 |
+
try:
|
36 |
+
response = self.rss_processor.get_latest_articles()
|
37 |
+
|
38 |
+
result = ""
|
39 |
+
for article in response:
|
40 |
+
result+=self.pretty_str_paper(article)
|
41 |
+
|
42 |
+
markdown_content = markdownify(result).strip()
|
43 |
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
44 |
+
|
45 |
+
return truncate_content(markdown_content, 10000)
|
46 |
+
|
47 |
+
except Exception as e:
|
48 |
+
return f"An unexpected error occurred: {str(e)}"
|
49 |
+
|
50 |
+
def pretty_str_paper(article):
|
51 |
+
return f"*{article.title}*\n" \
|
52 |
+
f"_Источник_: {article.source}\n" \
|
53 |
+
f"_Авторы_: {article.authors}\n" \
|
54 |
+
f"_Опубликовано_: {article.published}\n" \
|
55 |
+
f"_Описание_: {article.summary}\n\n"
|