kodinD commited on
Commit
20366da
·
1 Parent(s): ece991a

feat: dayli

Browse files
Files changed (4) hide show
  1. app.py +3 -1
  2. requirements.txt +1 -0
  3. tools/feed_processor.py +117 -0
  4. tools/rss_get_papers.py +55 -0
app.py CHANGED
@@ -3,6 +3,7 @@ import datetime
3
  import requests
4
  import pytz
5
  import yaml
 
6
  from tools.final_answer import FinalAnswerTool
7
  from tools.visit_webpage import VisitWebpageTool
8
 
@@ -37,6 +38,7 @@ def get_current_time_in_timezone(timezone: str) -> str:
37
 
38
  final_answer = FinalAnswerTool()
39
  visit_page = VisitWebpageTool()
 
40
 
41
  model = HfApiModel(
42
  max_tokens=2096,
@@ -54,7 +56,7 @@ with open("prompts.yaml", 'r') as stream:
54
 
55
  agent = CodeAgent(
56
  model=model,
57
- tools=[final_answer, image_generation_tool, visit_page], ## add your tools here (don't remove final answer)
58
  max_steps=6,
59
  verbosity_level=1,
60
  grammar=None,
 
3
  import requests
4
  import pytz
5
  import yaml
6
+ from tools.rss_get_papers import HFDaylyPapperTool
7
  from tools.final_answer import FinalAnswerTool
8
  from tools.visit_webpage import VisitWebpageTool
9
 
 
38
 
39
  final_answer = FinalAnswerTool()
40
  visit_page = VisitWebpageTool()
41
+ daily_paper = HFDaylyPapperTool()
42
 
43
  model = HfApiModel(
44
  max_tokens=2096,
 
56
 
57
  agent = CodeAgent(
58
  model=model,
59
+ tools=[final_answer, image_generation_tool, visit_page, daily_paper], ## add your tools here (don't remove final answer)
60
  max_steps=6,
61
  verbosity_level=1,
62
  grammar=None,
requirements.txt CHANGED
@@ -3,3 +3,4 @@ smolagents
3
  requests
4
  duckduckgo_search
5
  pandas
 
 
3
  requests
4
  duckduckgo_search
5
  pandas
6
+ feedparser
tools/feed_processor.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import feedparser
3
+ from dataclasses import dataclass
4
+ from typing import List, Optional, Dict, Set
5
+ from abc import ABC, abstractmethod
6
+
7
+ @dataclass
8
+ class Article:
9
+ title: str
10
+ link: str
11
+ summary: str
12
+ published: str
13
+ authors: str
14
+ pdf_link: Optional[str] = None
15
+ source: str = None
16
+
17
+ class BaseRSSParser(ABC):
18
+ @abstractmethod
19
+ def parse_feed(self, feed: feedparser.FeedParserDict) -> List[Article]:
20
+ """Парсит RSS-ленту и возвращает список объектов Article."""
21
+ pass
22
+
23
+ class ArxivRSSParser(BaseRSSParser):
24
+ def parse_feed(self, feed: feedparser.FeedParserDict) -> List[Article]:
25
+ articles: List[Article] = []
26
+ for entry in feed.entries:
27
+ try:
28
+ title: str = entry.get('title', 'Без названия')
29
+ link: str = entry.get('link', '')
30
+ summary: str = entry.get('summary', '')
31
+ published: str = entry.get('published', 'Неизвестно')
32
+ authors_list = entry.get('authors', [])
33
+ authors: str = ', '.join([author.name for author in authors_list]) if authors_list else 'Неизвестно'
34
+ pdf_link: Optional[str] = next(
35
+ (l.href for l in entry.get('links', []) if l.type == 'application/pdf'), None
36
+ )
37
+
38
+ article = Article(
39
+ title=title,
40
+ link=link,
41
+ summary=summary,
42
+ published=published,
43
+ authors=authors,
44
+ pdf_link=pdf_link
45
+ )
46
+ articles.append(article)
47
+ except Exception as e:
48
+ print(f"Ошибка при парсинге записи: {e}")
49
+ return articles
50
+
51
+ ## Пример другого парсера для другого RSS-источника
52
+ class DailyHFRSSParser(BaseRSSParser):
53
+ def parse_feed(self, feed: feedparser.FeedParserDict) -> List[Article]:
54
+ # Реализуйте специфическую логику парсинга для другого источника
55
+ articles: List[Article] = []
56
+ for entry in feed.entries:
57
+ # Пример парсинга, замените на актуальные поля
58
+ title: str = entry.get('title', 'Без названия')
59
+ link: str = entry.get('link', '')
60
+ summary: str = entry.get('description', '')
61
+ published: str = entry.get('pubDate', 'Неизвестно')
62
+ authors: str = entry.get('author', 'Неизвестно')
63
+
64
+ article = Article(
65
+ title=title,
66
+ link=link,
67
+ summary=summary,
68
+ published=published,
69
+ authors=authors,
70
+ source="Daily papers"
71
+ )
72
+ articles.append(article)
73
+ return articles
74
+
75
+
76
+ class RSSFeedFetcher:
77
+ def __init__(self, feed_url: str):
78
+ self.feed_url = feed_url
79
+
80
+ def fetch_feed(self) -> feedparser.FeedParserDict:
81
+ """Загружает и парсит RSS-ленту."""
82
+ try:
83
+ feed = feedparser.parse(self.feed_url)
84
+ if feed.bozo:
85
+ raise ValueError(f"Ошибка при парсинге RSS-ленты: {feed.bozo_exception}")
86
+ return feed
87
+ except Exception as e:
88
+ print(f"Ошибка при загрузке ленты: {e}")
89
+ return feedparser.FeedParserDict()
90
+
91
+ class RSSFeedProcessor:
92
+ def __init__(self):
93
+ self.feed_parsers: Dict[str, BaseRSSParser] = {}
94
+ self.feed_urls: Dict[str, str] = {}
95
+
96
+ def register_feed(self, source_key: str, feed_url: str, parser: BaseRSSParser):
97
+ self.feed_parsers[source_key] = parser
98
+ self.feed_urls[source_key] = feed_url
99
+
100
+ def get_latest_articles(self, sources: Set[str], count: int = 1) -> List[Article]:
101
+ all_articles: List[List[Article]] = []
102
+ for source_key in sources:
103
+ parser = self.feed_parsers.get(source_key)
104
+ feed_url = self.feed_urls.get(source_key)
105
+ if parser and feed_url:
106
+ fetcher = RSSFeedFetcher(feed_url)
107
+ feed = fetcher.fetch_feed()
108
+ articles = parser.parse_feed(feed)
109
+ all_articles.append(articles[:count])
110
+ else:
111
+ print(f"Источник {source_key} не найден или не имеет парсера")
112
+
113
+
114
+ random.shuffle(all_articles)
115
+
116
+
117
+ return all_articles[:count*len(sources)]
tools/rss_get_papers.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Any, Optional
3
+ from smolagents.tools import Tool
4
+ import requests
5
+ import markdownify
6
+ import smolagents
7
+
8
+ from feed_processor import DailyHFRSSParser, RSSFeedProcessor
9
+
10
+ class HFDaylyPapperTool(Tool):
11
+ name = "get_actual_ai_news"
12
+ description = "Return actual news about AI today."
13
+ output_type = "string"
14
+
15
+ def __init__(self, max_results=10, **kwargs):
16
+ super().__init__()
17
+ self.max_results = max_results
18
+ self.rss_processor = RSSFeedProcessor()
19
+ self.rss_processor.register_feed(
20
+ "HuggingFace Daily pappers",
21
+ "https://jamesg.blog/hf-papers.xml",
22
+ DailyHFRSSParser(),
23
+ )
24
+
25
+ def forward(self) -> str:
26
+ try:
27
+ from markdownify import markdownify
28
+ from smolagents.utils import truncate_content
29
+
30
+ except ImportError as e:
31
+ raise ImportError(
32
+ "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
33
+ ) from e
34
+
35
+ try:
36
+ response = self.rss_processor.get_latest_articles()
37
+
38
+ result = ""
39
+ for article in response:
40
+ result+=self.pretty_str_paper(article)
41
+
42
+ markdown_content = markdownify(result).strip()
43
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
44
+
45
+ return truncate_content(markdown_content, 10000)
46
+
47
+ except Exception as e:
48
+ return f"An unexpected error occurred: {str(e)}"
49
+
50
+ def pretty_str_paper(article):
51
+ return f"*{article.title}*\n" \
52
+ f"_Источник_: {article.source}\n" \
53
+ f"_Авторы_: {article.authors}\n" \
54
+ f"_Опубликовано_: {article.published}\n" \
55
+ f"_Описание_: {article.summary}\n\n"