xuyingliKepler commited on
Commit
c869a11
1 Parent(s): 8dab0c7

Upload 4 files

Browse files
Files changed (4) hide show
  1. app/__init__.py +0 -0
  2. app/functions.py +157 -0
  3. app/models.py +28 -0
  4. app/tools.py +55 -0
app/__init__.py ADDED
File without changes
app/functions.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import asyncio
3
+ import aiohttp
4
+ from typing import List, Dict, Union
5
+ import json
6
+
7
+ BASE_URL = "https://hacker-news.firebaseio.com/v0"
8
+
9
+
10
+ async def fetch_item(session: aiohttp.ClientSession, item_id: int):
11
+ """
12
+ Asynchronously fetches details of a story by its ID.
13
+
14
+ Args:
15
+ session: Aiohttp ClientSession for making HTTP requests.
16
+ item_id (int): The ID of the item to fetch.
17
+
18
+ Returns:
19
+ dict: Details of the story.
20
+ """
21
+ url = f"{BASE_URL}/item/{item_id}.json"
22
+ async with session.get(url) as response:
23
+ return await response.json()
24
+
25
+
26
+ async def fetch_story_ids(story_type: str = "top", limit: int = None):
27
+ """
28
+ Asynchronously fetches the top story IDs.
29
+
30
+ Args:
31
+ story_type: The story type. Defaults to top (`topstories.json`)
32
+ limit: The limit of stories to be fetched.
33
+
34
+ Returns:
35
+ List[int]: A list of top story IDs.
36
+ """
37
+ url = f"{BASE_URL}/{story_type}stories.json"
38
+ async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
39
+ async with session.get(url) as response:
40
+ story_ids = await response.json()
41
+
42
+ if limit:
43
+ story_ids = story_ids[:limit]
44
+
45
+ return story_ids
46
+
47
+
48
+ async def fetch_text(session, url):
49
+ """
50
+ Fetches the text from a URL (if there's text to be fetched). If it fails,
51
+ it will return an informative message to the LLM.
52
+
53
+ Args:
54
+ session: `aiohttp` session
55
+ url: The story URL
56
+
57
+ Returns:
58
+ A string representing whether the story text or an informative error (represented as a string)
59
+ """
60
+ try:
61
+ async with session.get(url) as response:
62
+ if response.status == 200:
63
+
64
+ html_content = await response.text()
65
+ soup = BeautifulSoup(html_content, 'html.parser')
66
+ text_content = soup.get_text()
67
+
68
+ return text_content
69
+ else:
70
+ return f"Unable to fetch content from {url}. Status code: {response.status}"
71
+ except Exception as e:
72
+ return f"An error occurred: {e}"
73
+
74
+
75
+ async def get_hn_stories(limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
76
+ """
77
+ Asynchronously fetches the top Hacker News stories based on the provided parameters.
78
+
79
+ Args:
80
+ limit (int): The number of top stories to retrieve. Default is 10.
81
+ keywords (List[str]): A list of keywords to filter the top stories.
82
+ story_type (str): The story type
83
+
84
+ Returns:
85
+ List[Dict[str, Union[str, int]]]: A list of dictionaries containing
86
+ 'story_id', 'title', 'url', and 'score' of the stories.
87
+ """
88
+
89
+ if limit and keywords is None:
90
+ story_ids = await fetch_story_ids(story_type, limit)
91
+ else:
92
+ story_ids = await fetch_story_ids(story_type)
93
+
94
+ async def fetch_and_filter_stories(story_id):
95
+ async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
96
+ story = await fetch_item(session, story_id)
97
+ return story
98
+
99
+ tasks = [fetch_and_filter_stories(story_id) for story_id in story_ids]
100
+ stories = await asyncio.gather(*tasks)
101
+
102
+ filtered_stories = []
103
+ for story in stories:
104
+ story_info = {
105
+ "title": story.get("title"),
106
+ "url": story.get("url"),
107
+ "score": story.get("score"),
108
+ "story_id": story.get("id"),
109
+ }
110
+
111
+ if keywords is None or any(keyword.lower() in story['title'].lower() for keyword in keywords):
112
+ filtered_stories.append(story_info)
113
+
114
+ return filtered_stories[:limit]
115
+
116
+
117
+ async def get_relevant_comments(story_id: int, limit: int =10):
118
+ """
119
+ Get the most relevant comments for a Hacker News item.
120
+
121
+ Args:
122
+ story_id: The ID of the Hacker News item.
123
+ limit: The number of comments to retrieve (default is 10).
124
+
125
+ Returns:
126
+ A list of dictionaries, each containing comment details.
127
+ """
128
+ async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
129
+ story = await fetch_item(session, story_id)
130
+
131
+ if 'kids' not in story:
132
+ return "This item doesn't have comments."
133
+
134
+ comment_ids = story['kids']
135
+
136
+ comment_details = await asyncio.gather(*[fetch_item(session, cid) for cid in comment_ids])
137
+ comment_details.sort(key=lambda comment: comment.get('score', 0), reverse=True)
138
+
139
+ relevant_comments = comment_details[:limit]
140
+ relevant_comments = [comment["text"] for comment in relevant_comments]
141
+
142
+ return json.dumps(relevant_comments)
143
+
144
+
145
+ async def get_story_content(story_url: str):
146
+ """
147
+ Gets the content of the story using BeautifulSoup.
148
+
149
+ Args:
150
+ story_url: A string representing the story URL
151
+
152
+ Returns:
153
+ The content of the story
154
+ """
155
+ async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False)) as session:
156
+ story_content = await fetch_text(session, story_url)
157
+ return story_content
app/models.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel, Field
3
+
4
+
5
+ class Stories(BaseModel):
6
+ """A model representing stories from Hacker News"""
7
+ limit: int = Field(default=5, description="The number of stories to return. Defaults to 5.")
8
+ keywords: List[str] = Field(default=None, description="The list of keywords to filter the stories. "
9
+ "Defaults to None")
10
+ story_type: str = Field(default="top", description="The story type. It can be one of the following: "
11
+ "'top', 'new', 'best', 'ask', 'show', 'job'. Defaults to 'top'")
12
+
13
+
14
+ class Comments(BaseModel):
15
+ """A model representing the highest scored comments from a story"""
16
+ story_id: int = Field(..., description="The story id")
17
+ limit: int = Field(default=10, description="The number of comments to return. Defaults to 10.")
18
+
19
+
20
+ class Content(BaseModel):
21
+ """A model representing the content of a story fetched from the URL"""
22
+ story_url: str = Field(..., description="The story URL")
23
+
24
+
25
+ class Item(BaseModel):
26
+ """A model representing a story, comment, job, Ask HN and even a poll"""
27
+ item_id: str = Field(..., description="The item's unique id")
28
+
app/tools.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Type, List
2
+
3
+ from langchain.tools import BaseTool
4
+ from pydantic import BaseModel
5
+ from app.models import Stories, Comments, Content
6
+
7
+ from app.functions import get_hn_stories
8
+ from app.functions import get_relevant_comments
9
+ from app.functions import get_story_content
10
+
11
+
12
+ class StoriesTool(BaseTool):
13
+ name = "get_stories"
14
+ description = "Gets stories from Hacker News. The stories are described by a 'story_id', a 'title', a 'url' and" \
15
+ " a 'score'."
16
+
17
+ def _run(self, limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
18
+ stories = get_hn_stories(limit, keywords, story_type)
19
+ return stories
20
+
21
+ def _arun(self, limit: int = 5, keywords: List[str] = None, story_type: str = "top"):
22
+ stories = get_hn_stories(limit, keywords, story_type)
23
+ return stories
24
+
25
+ args_schema: Optional[Type[BaseModel]] = Stories
26
+
27
+
28
+ class CommentsTool(BaseTool):
29
+ name = "get_comments"
30
+ description = "Gets comments from a specific Hacker News story"
31
+
32
+ def _run(self, story_id: int, limit: int = 10):
33
+ comments = get_relevant_comments(story_id, limit)
34
+ return comments
35
+
36
+ def _arun(self, story_id: int, limit: int = 10):
37
+ comments = get_relevant_comments(story_id, limit)
38
+ return comments
39
+
40
+ args_schema: Optional[Type[BaseModel]] = Comments
41
+
42
+
43
+ class ContentTool(BaseTool):
44
+ name = "get_content"
45
+ description = "Gets the Hacker News story content from a URL"
46
+
47
+ def _run(self, story_url: str):
48
+ story_content = get_story_content(story_url)
49
+ return story_content
50
+
51
+ def _arun(self, story_url: str):
52
+ story_content = get_story_content(story_url)
53
+ return story_content
54
+
55
+ args_schema: Optional[Type[BaseModel]] = Content