Spaces:
Sleeping
Sleeping
File size: 6,670 Bytes
dbaa71b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import logging
from datetime import datetime
from typing import Any, Dict, List, Optional
from pydantic import Field, PrivateAttr
from pydantic.types import SecretStr
from pydantic_settings import BaseSettings
from pyfacebook import FacebookApi
from obsei.misc.utils import (
DATETIME_STRING_PATTERN,
DEFAULT_LOOKUP_PERIOD,
convert_utc_time,
obj_to_json,
convert_datetime_str_to_epoch,
)
from obsei.payload import TextPayload
from obsei.source.base_source import BaseSource, BaseSourceConfig
logger = logging.getLogger(__name__)
class FacebookCredentials(BaseSettings):
app_id: Optional[SecretStr] = Field(None, env="facebook_app_id")
app_secret: Optional[SecretStr] = Field(None, env="facebook_app_secret")
long_term_token: Optional[SecretStr] = Field(None, env="facebook_long_term_token")
class FacebookSourceConfig(BaseSourceConfig):
_api_client: FacebookApi = PrivateAttr()
TYPE: str = "Facebook"
page_id: str
post_ids: Optional[List[str]] = None
lookup_period: Optional[str] = None
max_post: Optional[int] = 50
cred_info: Optional[FacebookCredentials] = Field(None)
def __init__(self, **data: Any):
super().__init__(**data)
self.cred_info = self.cred_info or FacebookCredentials()
if self.cred_info.long_term_token is not None:
application_only_auth = False
elif self.cred_info.app_id is not None and self.cred_info.app_secret is not None:
application_only_auth = True
else:
raise AttributeError("`app_id`, `app_secret` and `long_term_token` required to connect to Facebook")
self._api_client = FacebookApi(
app_id=self.cred_info.app_id.get_secret_value() if self.cred_info.app_id else None,
app_secret=self.cred_info.app_secret.get_secret_value() if self.cred_info.app_secret else None,
access_token=self.cred_info.long_term_token.get_secret_value() if self.cred_info.long_term_token else None,
application_only_auth=application_only_auth,
)
def get_client(self) -> FacebookApi:
return self._api_client
class FacebookSource(BaseSource):
NAME: str = "Facebook"
def lookup(self, config: FacebookSourceConfig, **kwargs: Any) -> List[TextPayload]: # type: ignore[override]
source_responses: List[TextPayload] = []
# Get data from state
identifier: str = kwargs.get("id", None)
state: Optional[Dict[str, Any]] = (
None
if identifier is None or self.store is None
else self.store.get_source_state(identifier)
)
update_state: bool = True if identifier else False
state = state or dict()
since_timestamp: Optional[int] = state.get("since_timestamp", None)
if since_timestamp is None:
lookup_period = config.lookup_period or DEFAULT_LOOKUP_PERIOD
if len(lookup_period) <= 5:
since_time = convert_utc_time(lookup_period)
else:
since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
since_timestamp = int(since_time.timestamp())
self.log_object("Since: ", str(datetime.fromtimestamp(since_timestamp)))
post_last_since_time = since_timestamp
api = config.get_client()
post_ids = config.post_ids
if not post_ids:
posts = api.page.get_posts(
page_id=config.page_id,
count=config.max_post,
since_time=str(since_timestamp),
return_json=True,
)
self.log_object("Posts: ", str(posts))
post_ids = []
for post in posts:
post_update_time = convert_datetime_str_to_epoch(post["updated_time"])
if post_update_time is not None:
if post_update_time < since_timestamp:
break
if (
post_last_since_time is None
or post_last_since_time < post_update_time
):
post_last_since_time = post_update_time
else:
logger.warning("Unable to parse post update time: {}", post["updated_time"])
post_ids.append(post["id"])
for post_id in post_ids:
# Collect post state
post_stat: Dict[str, Any] = state.get(post_id, dict())
state[post_id] = post_stat
comment_since_time = state.get("since_timestamp", since_timestamp)
comment_last_since_time = comment_since_time
comments, comment_summary = api.page.get_comments(
object_id=post_id,
filter_type="stream",
order_type="reverse_chronological",
)
self.log_object("Comments: ", str(comments))
self.log_object("Comment Summary: ", str(comment_summary))
for comment in comments:
comment_created_time = convert_datetime_str_to_epoch(
comment.created_time
)
if comment_created_time < comment_since_time:
break
if (
comment_last_since_time is None
or comment_last_since_time < comment_created_time
):
comment_last_since_time = comment_created_time
source_responses.append(
TextPayload(
processed_text=comment.message,
meta=vars(comment),
source_name=self.NAME,
)
)
post_stat["since_timestamp"] = comment_last_since_time
state["since_timestamp"] = post_last_since_time
# TODO: See how to augment with with comments data
# if config.include_title_description:
# text_payloads = [
# TextPayload(
# processed_text=f"{data['title']}. {data['description']}",
# meta=data,
# source_name=self.NAME,
# )
# for post in posts
# for data in post["attachments"]["data"]
# ]
#
# source_responses.extend(text_payloads)
if update_state and self.store is not None:
self.store.update_source_state(workflow_id=identifier, state=state)
return source_responses
@staticmethod
def log_object(message: str, result: Any) -> None:
logger.debug(message + str(obj_to_json(result)))
|