Spaces:
Sleeping
Sleeping
import pandas as pd | |
import json | |
import os | |
#class FewShotPosts: | |
# def __init__(self, file_path="data/processed_posts.json"): | |
# self.df = None | |
# self.unique_tags = None | |
# self.load_posts(file_path) | |
class FewShotPosts: | |
def __init__(self, persona_name): | |
"""Dynamically load JSON based on the selected persona.""" | |
self.df = None | |
self.unique_tags = None | |
self.file_path = f"processed_{persona_name.lower()}_posts.json" | |
if os.path.exists(self.file_path): # Check if JSON exists | |
self.load_posts(self.file_path) | |
else: | |
raise FileNotFoundError(f"Processed JSON file not found for persona: {persona_name}") | |
def load_posts(self, file_path): | |
with open(file_path, encoding="utf-8") as f: | |
posts = json.load(f) | |
self.df = pd.json_normalize(posts) | |
self.df['length'] = self.df['line_count'].apply(self.categorize_length) | |
# collect unique tags | |
all_tags = self.df['tags'].apply(lambda x: x).sum() | |
self.unique_tags = list(set(all_tags)) | |
def get_filtered_posts(self, length, language, tag): | |
df_filtered = self.df[ | |
(self.df['tags'].apply(lambda tags: tag in tags)) & # Tags contain 'Influencer' | |
(self.df['language'] == language) & # Language is 'English' | |
(self.df['length'] == length) # Line count is less than 5 | |
] | |
return df_filtered.to_dict(orient='records') | |
def categorize_length(self, line_count): | |
if line_count < 5: | |
return "Short" | |
elif 5 <= line_count <= 10: | |
return "Medium" | |
else: | |
return "Long" | |
def get_tags(self): | |
return self.unique_tags | |
#if __name__ == "__main__": | |
# fs = FewShotPosts() | |
# # print(fs.get_tags()) | |
# posts = fs.get_filtered_posts("Short","English","Economy") | |
#print(posts) |