Spaces:
Sleeping
Sleeping
Upload 14 files
Browse files- create-database.py +33 -0
- fetch-data.py +32 -0
- few_shot.py +59 -0
- insert-data.py +38 -0
- llm_helper.py +15 -0
- main.py +59 -0
- personas.db +0 -0
- post_generator.py +52 -0
- preprocess.py +133 -0
- processed_investor_posts.json +29 -0
- processed_marketer_posts.json +29 -0
- processed_posts.json +137 -0
- processed_software engineer_posts.json +29 -0
- raw_posts.json +66 -0
create-database.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sqlite3
|
2 |
+
|
3 |
+
def create_database():
|
4 |
+
"""Creates a SQLite database with personas and posts tables."""
|
5 |
+
conn = sqlite3.connect("personas.db") # This creates the database file
|
6 |
+
cursor = conn.cursor()
|
7 |
+
|
8 |
+
# Create the personas table
|
9 |
+
cursor.execute('''
|
10 |
+
CREATE TABLE IF NOT EXISTS personas (
|
11 |
+
persona_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
12 |
+
name TEXT UNIQUE NOT NULL,
|
13 |
+
description TEXT
|
14 |
+
)
|
15 |
+
''')
|
16 |
+
|
17 |
+
# Create the posts table
|
18 |
+
cursor.execute('''
|
19 |
+
CREATE TABLE IF NOT EXISTS posts (
|
20 |
+
post_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
21 |
+
persona_id INTEGER,
|
22 |
+
text_blocks TEXT NOT NULL,
|
23 |
+
tags TEXT,
|
24 |
+
FOREIGN KEY (persona_id) REFERENCES personas(persona_id)
|
25 |
+
)
|
26 |
+
''')
|
27 |
+
|
28 |
+
conn.commit()
|
29 |
+
conn.close()
|
30 |
+
print("Database and tables created successfully!")
|
31 |
+
|
32 |
+
# Run the function
|
33 |
+
create_database()
|
fetch-data.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sqlite3
|
2 |
+
import json
|
3 |
+
|
4 |
+
|
5 |
+
def get_posts_by_persona(persona_name):
|
6 |
+
"""Fetch all posts for a given persona."""
|
7 |
+
conn = sqlite3.connect("personas.db")
|
8 |
+
cursor = conn.cursor()
|
9 |
+
|
10 |
+
# Fetch persona ID
|
11 |
+
cursor.execute("SELECT persona_id FROM personas WHERE name = ?", (persona_name,))
|
12 |
+
persona = cursor.fetchone()
|
13 |
+
|
14 |
+
if not persona:
|
15 |
+
print(f"Persona '{persona_name}' not found.")
|
16 |
+
return []
|
17 |
+
|
18 |
+
persona_id = persona[0]
|
19 |
+
|
20 |
+
# Fetch posts for this persona
|
21 |
+
cursor.execute("SELECT text_blocks FROM posts WHERE persona_id = ?", (persona_id,))
|
22 |
+
posts = [row[0] for row in cursor.fetchall()]
|
23 |
+
|
24 |
+
conn.close()
|
25 |
+
return posts
|
26 |
+
|
27 |
+
|
28 |
+
# Example: Fetch posts for "Marketer"
|
29 |
+
selected_persona = "Marketer"
|
30 |
+
persona_posts = get_posts_by_persona(selected_persona)
|
31 |
+
print(f"Posts for {selected_persona}:")
|
32 |
+
print(json.dumps(persona_posts, indent=2, ensure_ascii=False))
|
few_shot.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
|
5 |
+
|
6 |
+
#class FewShotPosts:
|
7 |
+
# def __init__(self, file_path="data/processed_posts.json"):
|
8 |
+
# self.df = None
|
9 |
+
# self.unique_tags = None
|
10 |
+
# self.load_posts(file_path)
|
11 |
+
|
12 |
+
|
13 |
+
class FewShotPosts:
|
14 |
+
def __init__(self, persona_name):
|
15 |
+
"""Dynamically load JSON based on the selected persona."""
|
16 |
+
self.df = None
|
17 |
+
self.unique_tags = None
|
18 |
+
self.file_path = f"data/processed_{persona_name.lower()}_posts.json"
|
19 |
+
|
20 |
+
if os.path.exists(self.file_path): # Check if JSON exists
|
21 |
+
self.load_posts(self.file_path)
|
22 |
+
else:
|
23 |
+
raise FileNotFoundError(f"Processed JSON file not found for persona: {persona_name}")
|
24 |
+
|
25 |
+
|
26 |
+
def load_posts(self, file_path):
|
27 |
+
with open(file_path, encoding="utf-8") as f:
|
28 |
+
posts = json.load(f)
|
29 |
+
self.df = pd.json_normalize(posts)
|
30 |
+
self.df['length'] = self.df['line_count'].apply(self.categorize_length)
|
31 |
+
# collect unique tags
|
32 |
+
all_tags = self.df['tags'].apply(lambda x: x).sum()
|
33 |
+
self.unique_tags = list(set(all_tags))
|
34 |
+
|
35 |
+
def get_filtered_posts(self, length, language, tag):
|
36 |
+
df_filtered = self.df[
|
37 |
+
(self.df['tags'].apply(lambda tags: tag in tags)) & # Tags contain 'Influencer'
|
38 |
+
(self.df['language'] == language) & # Language is 'English'
|
39 |
+
(self.df['length'] == length) # Line count is less than 5
|
40 |
+
]
|
41 |
+
return df_filtered.to_dict(orient='records')
|
42 |
+
|
43 |
+
def categorize_length(self, line_count):
|
44 |
+
if line_count < 5:
|
45 |
+
return "Short"
|
46 |
+
elif 5 <= line_count <= 10:
|
47 |
+
return "Medium"
|
48 |
+
else:
|
49 |
+
return "Long"
|
50 |
+
|
51 |
+
def get_tags(self):
|
52 |
+
return self.unique_tags
|
53 |
+
|
54 |
+
|
55 |
+
#if __name__ == "__main__":
|
56 |
+
# fs = FewShotPosts()
|
57 |
+
# # print(fs.get_tags())
|
58 |
+
# posts = fs.get_filtered_posts("Short","English","Economy")
|
59 |
+
#print(posts)
|
insert-data.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sqlite3
|
2 |
+
|
3 |
+
def insert_data():
|
4 |
+
conn = sqlite3.connect("personas.db")
|
5 |
+
cursor = conn.cursor()
|
6 |
+
|
7 |
+
# Define personas
|
8 |
+
personas = [
|
9 |
+
("Marketer", "Writes engaging social media and blog content."),
|
10 |
+
("Software Engineer", "Shares insights on coding and software architecture."),
|
11 |
+
("Investor", "Focuses on market analysis and investment strategies."),
|
12 |
+
]
|
13 |
+
|
14 |
+
# Insert personas if they don't exist
|
15 |
+
for name, description in personas:
|
16 |
+
cursor.execute("INSERT OR IGNORE INTO personas (name, description) VALUES (?, ?)", (name, description))
|
17 |
+
|
18 |
+
# Fetch persona IDs
|
19 |
+
cursor.execute("SELECT persona_id, name FROM personas")
|
20 |
+
persona_map = {name: persona_id for persona_id, name in cursor.fetchall()}
|
21 |
+
|
22 |
+
# Insert multiple posts per persona
|
23 |
+
posts = [
|
24 |
+
(persona_map["Marketer"], "Top 5 social media strategies for brand growth.", "Marketing, Branding"),
|
25 |
+
(persona_map["Marketer"], "How to write viral LinkedIn posts?", "Content Writing, Engagement"),
|
26 |
+
(persona_map["Software Engineer"], "Understanding microservices architecture.", "Software Design, Microservices"),
|
27 |
+
(persona_map["Software Engineer"], "How Python is shaping the AI industry?", "AI, Python"),
|
28 |
+
(persona_map["Investor"], "Why semiconductor stocks are booming in 2024?", "Investing, Technology"),
|
29 |
+
(persona_map["Investor"], "Risk management strategies in stock market investing.", "Risk Management, Finance"),
|
30 |
+
]
|
31 |
+
|
32 |
+
cursor.executemany("INSERT INTO posts (persona_id, text_blocks, tags) VALUES (?, ?, ?)", posts)
|
33 |
+
|
34 |
+
conn.commit()
|
35 |
+
conn.close()
|
36 |
+
print("Multiple posts inserted for each persona!")
|
37 |
+
|
38 |
+
insert_data()
|
llm_helper.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_groq import ChatGroq
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model_name="llama-3.3-70b-versatile")
|
7 |
+
|
8 |
+
|
9 |
+
if __name__ == "__main__":
|
10 |
+
response = llm.invoke("Two most important ingredient in samosa are ")
|
11 |
+
print(response.content)
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
|
main.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from few_shot import FewShotPosts
|
3 |
+
from post_generator import generate_post
|
4 |
+
from preprocess import process_posts_for_persona
|
5 |
+
|
6 |
+
|
7 |
+
# Options for length and language
|
8 |
+
length_options = ["Short", "Medium", "Long"]
|
9 |
+
language_options = ["English", "Hindi","Kannada"]
|
10 |
+
|
11 |
+
|
12 |
+
# Main app layout
|
13 |
+
def main():
|
14 |
+
# Get persona name from UI or user input
|
15 |
+
persona = input("Enter Persona Name: ").strip()
|
16 |
+
|
17 |
+
try:
|
18 |
+
# Create instance of FewShotPosts with the selected persona
|
19 |
+
fs = FewShotPosts(persona)
|
20 |
+
|
21 |
+
# Print available tags for debugging
|
22 |
+
print(f"Available Tags for {persona}: {fs.get_tags()}")
|
23 |
+
|
24 |
+
# Example: Fetch posts based on user-defined criteria
|
25 |
+
posts = fs.get_filtered_posts(length="Short", language="English", tag="Economy")
|
26 |
+
print(posts)
|
27 |
+
|
28 |
+
except FileNotFoundError as e:
|
29 |
+
print(e)
|
30 |
+
st.subheader("LinkedIn Post Generator: Codebasics")
|
31 |
+
|
32 |
+
# Create three columns for the dropdowns
|
33 |
+
col1, col2, col3 = st.columns(3)
|
34 |
+
|
35 |
+
fs = FewShotPosts(persona)
|
36 |
+
tags = fs.get_tags()
|
37 |
+
with col1:
|
38 |
+
# Dropdown for Topic (Tags)
|
39 |
+
selected_tag = st.selectbox("Topic", options=tags)
|
40 |
+
|
41 |
+
with col2:
|
42 |
+
# Dropdown for Length
|
43 |
+
selected_length = st.selectbox("Length", options=length_options)
|
44 |
+
|
45 |
+
with col3:
|
46 |
+
# Dropdown for Language
|
47 |
+
selected_language = st.selectbox("Language", options=language_options)
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
# Generate Button
|
52 |
+
if st.button("Generate"):
|
53 |
+
post = generate_post(persona,selected_length, selected_language, selected_tag)
|
54 |
+
st.write(post)
|
55 |
+
|
56 |
+
|
57 |
+
# Run the app
|
58 |
+
if __name__ == "__main__":
|
59 |
+
main()
|
personas.db
ADDED
Binary file (20.5 kB). View file
|
|
post_generator.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import few_shot
|
2 |
+
from llm_helper import llm
|
3 |
+
from few_shot import FewShotPosts
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
def get_length_str(length):
|
8 |
+
if length == "Short":
|
9 |
+
return "1 to 5 lines"
|
10 |
+
if length == "Medium":
|
11 |
+
return "6 to 10 lines"
|
12 |
+
if length == "Long":
|
13 |
+
return "11 to 15 lines"
|
14 |
+
|
15 |
+
|
16 |
+
def generate_post(persona_name, length, language, tag):
|
17 |
+
few_shot = FewShotPosts(persona_name)
|
18 |
+
prompt = get_prompt(length, language, tag)
|
19 |
+
response = llm.invoke(prompt)
|
20 |
+
return response.content
|
21 |
+
|
22 |
+
|
23 |
+
def get_prompt(length, language, tag):
|
24 |
+
length_str = get_length_str(length)
|
25 |
+
|
26 |
+
prompt = f'''
|
27 |
+
Generate a LinkedIn post using the below information. No preamble.
|
28 |
+
|
29 |
+
1) Topic: {tag}
|
30 |
+
2) Length: {length_str}
|
31 |
+
3) Language: {language}
|
32 |
+
The script for the generated post should always be English.
|
33 |
+
'''
|
34 |
+
# prompt = prompt.format(post_topic=tag, post_length=length_str, post_language=language)
|
35 |
+
|
36 |
+
examples = few_shot.get_filtered_posts(length, language, tag)
|
37 |
+
|
38 |
+
if len(examples) > 0:
|
39 |
+
prompt += "4) Use the writing style as per the following examples."
|
40 |
+
|
41 |
+
for i, post in enumerate(examples):
|
42 |
+
post_text = post['text_blocks']
|
43 |
+
prompt += f'\n\n Example {i+1}: \n\n {post_text}'
|
44 |
+
|
45 |
+
if i == 1: # Use max two samples
|
46 |
+
break
|
47 |
+
|
48 |
+
return prompt
|
49 |
+
|
50 |
+
|
51 |
+
if __name__ == "__main__":
|
52 |
+
print(generate_post("Medium", "English", "Economy"))
|
preprocess.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from llm_helper import llm
|
3 |
+
from langchain_core.prompts import PromptTemplate
|
4 |
+
from langchain_core.output_parsers import JsonOutputParser
|
5 |
+
from langchain_core.exceptions import OutputParserException
|
6 |
+
import sqlite3
|
7 |
+
|
8 |
+
|
9 |
+
#def process_posts(raw_file_path, processed_file_path=None):
|
10 |
+
# with open(raw_file_path, encoding='utf-8') as file:
|
11 |
+
# posts = json.load(file)
|
12 |
+
# enriched_posts = []
|
13 |
+
# for post in posts:
|
14 |
+
# metadata = extract_metadata(post['text_blocks'])
|
15 |
+
# post_with_metadata = post | metadata
|
16 |
+
# enriched_posts.append(post_with_metadata)
|
17 |
+
|
18 |
+
def process_posts_for_persona(persona_name, processed_file_path=None):
|
19 |
+
posts = get_posts_by_persona(persona_name)
|
20 |
+
|
21 |
+
if not posts:
|
22 |
+
print(f"No posts found for persona '{persona_name}'.")
|
23 |
+
return
|
24 |
+
|
25 |
+
enriched_posts = []
|
26 |
+
for post in posts:
|
27 |
+
metadata = extract_metadata(post)
|
28 |
+
post_with_metadata = {"text": post} | metadata # Combine metadata with post
|
29 |
+
enriched_posts.append(post_with_metadata)
|
30 |
+
|
31 |
+
if processed_file_path:
|
32 |
+
with open(processed_file_path, "w", encoding="utf-8") as outfile:
|
33 |
+
json.dump(enriched_posts, outfile, indent=4)
|
34 |
+
|
35 |
+
return enriched_posts
|
36 |
+
|
37 |
+
|
38 |
+
def get_posts_by_persona(persona_name):
|
39 |
+
"""Fetch all posts for a given persona."""
|
40 |
+
conn = sqlite3.connect("personas.db")
|
41 |
+
cursor = conn.cursor()
|
42 |
+
|
43 |
+
# Fetch persona ID
|
44 |
+
cursor.execute("SELECT persona_id FROM personas WHERE name = ?", (persona_name,))
|
45 |
+
persona = cursor.fetchone()
|
46 |
+
|
47 |
+
if not persona:
|
48 |
+
print(f"Persona '{persona_name}' not found.")
|
49 |
+
return []
|
50 |
+
|
51 |
+
persona_id = persona[0]
|
52 |
+
|
53 |
+
# Fetch posts for this persona
|
54 |
+
cursor.execute("SELECT text_blocks FROM posts WHERE persona_id = ?", (persona_id,))
|
55 |
+
posts = [row[0] for row in cursor.fetchall()]
|
56 |
+
|
57 |
+
conn.close()
|
58 |
+
return posts
|
59 |
+
|
60 |
+
unified_tags = get_unified_tags(enriched_posts)
|
61 |
+
for post in enriched_posts:
|
62 |
+
current_tags = post['tags']
|
63 |
+
new_tags = {unified_tags[tag] for tag in current_tags}
|
64 |
+
post['tags'] = list(new_tags)
|
65 |
+
|
66 |
+
with open(processed_file_path, encoding='utf-8', mode="w") as outfile:
|
67 |
+
json.dump(enriched_posts, outfile, indent=4)
|
68 |
+
|
69 |
+
|
70 |
+
def extract_metadata(post):
|
71 |
+
template = '''
|
72 |
+
You are given a LinkedIn post. You need to extract number of lines, language of the post and tags.
|
73 |
+
1. Return a valid JSON. No preamble.
|
74 |
+
2. JSON object should have exactly three keys: line_count, language and tags.
|
75 |
+
3. tags is an array of text tags. Extract maximum two tags.
|
76 |
+
4. Language should be English, Kannada and Hindi
|
77 |
+
|
78 |
+
Here is the actual post on which you need to perform this task:
|
79 |
+
{post}
|
80 |
+
'''
|
81 |
+
|
82 |
+
pt = PromptTemplate.from_template(template)
|
83 |
+
chain = pt | llm
|
84 |
+
response = chain.invoke(input={"post": post})
|
85 |
+
|
86 |
+
try:
|
87 |
+
json_parser = JsonOutputParser()
|
88 |
+
res = json_parser.parse(response.content)
|
89 |
+
except OutputParserException:
|
90 |
+
raise OutputParserException("Context too big. Unable to parse jobs.")
|
91 |
+
return res
|
92 |
+
|
93 |
+
|
94 |
+
def get_unified_tags(posts_with_metadata):
|
95 |
+
unique_tags = set()
|
96 |
+
# Loop through each post and extract the tags
|
97 |
+
for post in posts_with_metadata:
|
98 |
+
unique_tags.update(post['tags']) # Add the tags to the set
|
99 |
+
|
100 |
+
unique_tags_list = ','.join(unique_tags)
|
101 |
+
|
102 |
+
template = '''I will give you a list of tags. You need to unify tags with the following requirements,
|
103 |
+
1. Tags are unified and merged to create a shorter list.
|
104 |
+
Example 1: "Jobseekers", "Job Hunting" can be all merged into a single tag "Job Search".
|
105 |
+
Example 2: "Motivation", "Inspiration", "Drive" can be mapped to "Motivation"
|
106 |
+
Example 3: "Personal Growth", "Personal Development", "Self Improvement" can be mapped to "Self Improvement"
|
107 |
+
Example 4: "Scam Alert", "Job Scam" etc. can be mapped to "Scams"
|
108 |
+
Example 5: "Finance", "economics", "currency" etc., can be mapped to "Financial literacy"
|
109 |
+
2. Each tag should be follow title case convention. example: "Motivation", "Job Search"
|
110 |
+
3. Output should be a JSON object, No preamble
|
111 |
+
3. Output should have mapping of original tag and the unified tag.
|
112 |
+
For example: {{"Jobseekers": "Job Search", "Job Hunting": "Job Search", "Motivation": "Motivation}}
|
113 |
+
|
114 |
+
Here is the list of tags:
|
115 |
+
{tags}
|
116 |
+
'''
|
117 |
+
pt = PromptTemplate.from_template(template)
|
118 |
+
chain = pt | llm
|
119 |
+
response = chain.invoke(input={"tags": str(unique_tags_list)})
|
120 |
+
try:
|
121 |
+
json_parser = JsonOutputParser()
|
122 |
+
res = json_parser.parse(response.content)
|
123 |
+
except OutputParserException:
|
124 |
+
raise OutputParserException("Context too big. Unable to parse jobs.")
|
125 |
+
return res
|
126 |
+
|
127 |
+
|
128 |
+
|
129 |
+
|
130 |
+
if __name__ == "__main__":
|
131 |
+
persona = input("Enter the persona name: ")
|
132 |
+
process_posts_for_persona(persona, f"data/processed_{persona.lower()}_posts.json")
|
133 |
+
# process_posts("data/raw_posts.json", "data/processed_posts.json")
|
processed_investor_posts.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "Investing in semiconductor stocks: A 2024 guide.",
|
4 |
+
"line_count": 1,
|
5 |
+
"language": "English",
|
6 |
+
"tags": [
|
7 |
+
"semiconductor",
|
8 |
+
"stocks"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"text": "Why semiconductor stocks are booming in 2024?",
|
13 |
+
"line_count": 1,
|
14 |
+
"language": "English",
|
15 |
+
"tags": [
|
16 |
+
"semiconductor",
|
17 |
+
"stockmarket"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"text": "Risk management strategies in stock market investing.",
|
22 |
+
"line_count": 1,
|
23 |
+
"language": "English",
|
24 |
+
"tags": [
|
25 |
+
"Risk Management",
|
26 |
+
"Stock Market"
|
27 |
+
]
|
28 |
+
}
|
29 |
+
]
|
processed_marketer_posts.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "Top 10 marketing trends to follow in 2024.",
|
4 |
+
"line_count": 1,
|
5 |
+
"language": "English",
|
6 |
+
"tags": [
|
7 |
+
"Marketing",
|
8 |
+
"Trends2024"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"text": "Top 5 social media strategies for brand growth.",
|
13 |
+
"line_count": 1,
|
14 |
+
"language": "English",
|
15 |
+
"tags": [
|
16 |
+
"social media",
|
17 |
+
"brand growth"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"text": "How to write viral LinkedIn posts?",
|
22 |
+
"line_count": 1,
|
23 |
+
"language": "English",
|
24 |
+
"tags": [
|
25 |
+
"LinkedIn",
|
26 |
+
"ViralPosts"
|
27 |
+
]
|
28 |
+
}
|
29 |
+
]
|
processed_posts.json
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text_blocks": "The value of 1 Crore (assuming 7% inflation).\n10 years from now = 50 Lakhs\n15 years from now = 36 Lakhs\n20 years from now = 25 Lakhs",
|
4 |
+
"tags": [
|
5 |
+
"Finance",
|
6 |
+
"Economy"
|
7 |
+
],
|
8 |
+
"line_count": 4,
|
9 |
+
"language": "English"
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"text_blocks": "Inflation Effect.\nThe buying power of money gets killed with time due to inflation.\nIn simple words, things become expensive over time.",
|
13 |
+
"tags": [
|
14 |
+
"Economy"
|
15 |
+
],
|
16 |
+
"line_count": 3,
|
17 |
+
"language": "English"
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"text_blocks": "Why does inflation exist?\nWhy should we have inflation in the first place? Why can't we solve inflation permanently?\nModern monetary theorists argue that without inflation, people will save enough money and then stop working.\nIf people don't work, how will the world grow?\nOne way to incentivize people to work is by reducing the buying power of money each year.",
|
21 |
+
"tags": [
|
22 |
+
"Economy"
|
23 |
+
],
|
24 |
+
"line_count": 6,
|
25 |
+
"language": "English"
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"text_blocks": "How does inflation work?\nPrinting more money each year decreases the buying power of money.\nSince 2018, the average rate of money printing worldwide has been around 8%.",
|
29 |
+
"tags": [
|
30 |
+
"Economy"
|
31 |
+
],
|
32 |
+
"line_count": 3,
|
33 |
+
"language": "English"
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"text_blocks": "Building Wealth.\nA SIP of 500 might grow to 123 Crores after 213 years, but its buying power will be significantly lower.\nThe only sustainable way to build wealth is to beat inflation through nuanced investing.",
|
37 |
+
"tags": [
|
38 |
+
"Finance"
|
39 |
+
],
|
40 |
+
"line_count": 3,
|
41 |
+
"language": "English"
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"text_blocks": "Childhood Inspiration.\nI used to read the work of Ray Dalio.\nHe was a Hedge Fund manager, so I learned what a Hedge Fund does and wanted to run my own fund.",
|
45 |
+
"tags": [
|
46 |
+
"Finance",
|
47 |
+
"Motivation"
|
48 |
+
],
|
49 |
+
"line_count": 4,
|
50 |
+
"language": "English"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"text_blocks": "Challenges.\nI had no mentors growing up.\nDid not have a rich dad.\nMy parents did their best to provide a good education, and I deeply appreciate that.",
|
54 |
+
"tags": [
|
55 |
+
"Education",
|
56 |
+
"Personal Growth"
|
57 |
+
],
|
58 |
+
"line_count": 4,
|
59 |
+
"language": "English"
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"text_blocks": "Journey.\nAced most exams I took.\nHustled and got jobs at top corporate firms.\nWorked 14-15 hours a day to progress.\nQuit corporate and built a YouTube channel.\nBuilt a few firms along the way.\nInvested significant capital in public and private markets.",
|
63 |
+
"tags": [
|
64 |
+
"Career",
|
65 |
+
"Motivation"
|
66 |
+
],
|
67 |
+
"line_count": 7,
|
68 |
+
"language": "English"
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"text_blocks": "Current Status.\nNow, I run a small fund and get to live my dream.",
|
72 |
+
"tags": [],
|
73 |
+
"line_count": 2,
|
74 |
+
"language": "English"
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"text_blocks": "Comparisons.\nKids from rich families run Hedge Funds, PE Funds, and family offices in Singapore or Dubai.\nThey can get a meeting with a Billionaire with just one phone call.\nI can't compete with them, and honestly, I don\u2019t need to.\nLife is not always a competition. Life is about progress.",
|
78 |
+
"tags": [
|
79 |
+
"Motivation",
|
80 |
+
"Personal Growth"
|
81 |
+
],
|
82 |
+
"line_count": 5,
|
83 |
+
"language": "English"
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"text_blocks": "Gratitude.\nLooking back at where I started and the progress I have made, I am happy.\nI am happy being small, useful, and living a life created through hard work, luck, and the blessings of others.\nAnd for that, I am thankful.",
|
87 |
+
"tags": [
|
88 |
+
"Motivation"
|
89 |
+
],
|
90 |
+
"line_count": 4,
|
91 |
+
"language": "English"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"text_blocks": "Dealing with Criticism.\nEven if you give 100% of your wealth to charity, 5% of people will still criticize you.\nNo matter what you do, there will always be naysayers.",
|
95 |
+
"tags": [
|
96 |
+
"Negativity",
|
97 |
+
"Charity"
|
98 |
+
],
|
99 |
+
"line_count": 3,
|
100 |
+
"language": "English"
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"text_blocks": "Why Negativity Exists.\nThe internet allows people to say things online that they wouldn't have the guts to say in person.",
|
104 |
+
"tags": [
|
105 |
+
"Technology",
|
106 |
+
"Negativity"
|
107 |
+
],
|
108 |
+
"line_count": 2,
|
109 |
+
"language": "English"
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"text_blocks": "The Art of Not Giving a F***.\nIt preserves your confidence.\nIt keeps your mental health in check.",
|
113 |
+
"tags": [
|
114 |
+
"Motivation",
|
115 |
+
"Mental Health"
|
116 |
+
],
|
117 |
+
"line_count": 3,
|
118 |
+
"language": "English"
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"text_blocks": "Tips for Not Caring.\nTake accountability. You are responsible for everything (good or bad) that happens to you, not your friends or family.\nGrow each day. Become a self-learning machine. If your goal is to learn, grow, and share, you will focus on things that matter.\nUnderstand that you are human and will make mistakes. Make a few and move on\u2014that's life.",
|
122 |
+
"tags": [
|
123 |
+
"Motivation",
|
124 |
+
"Self Improvement"
|
125 |
+
],
|
126 |
+
"line_count": 4,
|
127 |
+
"language": "English"
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"text_blocks": "Final Thought.\nLife is about building great stories.\nIf you focus too much on your boss\u2019s feedback, online trolls, or negativity, you will get distracted from building YOUR great story.",
|
131 |
+
"tags": [
|
132 |
+
"Motivation"
|
133 |
+
],
|
134 |
+
"line_count": 3,
|
135 |
+
"language": "English"
|
136 |
+
}
|
137 |
+
]
|
processed_software engineer_posts.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "Why Python is the best language for AI?",
|
4 |
+
"line_count": 1,
|
5 |
+
"language": "English",
|
6 |
+
"tags": [
|
7 |
+
"Python",
|
8 |
+
"AI"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"text": "Understanding microservices architecture.",
|
13 |
+
"line_count": 1,
|
14 |
+
"language": "English",
|
15 |
+
"tags": [
|
16 |
+
"microservices",
|
17 |
+
"architecture"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"text": "How Python is shaping the AI industry?",
|
22 |
+
"line_count": 1,
|
23 |
+
"language": "English",
|
24 |
+
"tags": [
|
25 |
+
"Python",
|
26 |
+
"AI"
|
27 |
+
]
|
28 |
+
}
|
29 |
+
]
|
raw_posts.json
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text_blocks": "The value of 1 Crore (assuming 7% inflation).\n10 years from now = 50 Lakhs\n15 years from now = 36 Lakhs\n20 years from now = 25 Lakhs",
|
4 |
+
"tags": ["Inflation", "Finance"]
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"text_blocks": "Inflation Effect.\nThe buying power of money gets killed with time due to inflation.\nIn simple words, things become expensive over time.",
|
8 |
+
"tags": ["Inflation", "Economic Principles"]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"text_blocks": "Why does inflation exist?\nWhy should we have inflation in the first place? Why can't we solve inflation permanently?\nModern monetary theorists argue that without inflation, people will save enough money and then stop working.\nIf people don't work, how will the world grow?\nOne way to incentivize people to work is by reducing the buying power of money each year.",
|
12 |
+
"tags": ["Inflation", "Economic Theory"]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"text_blocks": "How does inflation work?\nPrinting more money each year decreases the buying power of money.\nSince 2018, the average rate of money printing worldwide has been around 8%.",
|
16 |
+
"tags": ["Economics", "Money Supply"]
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"text_blocks": "Building Wealth.\nA SIP of 500 might grow to 123 Crores after 213 years, but its buying power will be significantly lower.\nThe only sustainable way to build wealth is to beat inflation through nuanced investing.",
|
20 |
+
"tags": ["Wealth", "Investing"]
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"text_blocks": "Childhood Inspiration.\nI used to read the work of Ray Dalio.\nHe was a Hedge Fund manager, so I learned what a Hedge Fund does and wanted to run my own fund.",
|
24 |
+
"tags": ["Inspiration", "Ray Dalio"]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"text_blocks": "Challenges.\nI had no mentors growing up.\nDid not have a rich dad.\nMy parents did their best to provide a good education, and I deeply appreciate that.",
|
28 |
+
"tags": ["Challenges", "Education"]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"text_blocks": "Journey.\nAced most exams I took.\nHustled and got jobs at top corporate firms.\nWorked 14-15 hours a day to progress.\nQuit corporate and built a YouTube channel.\nBuilt a few firms along the way.\nInvested significant capital in public and private markets.",
|
32 |
+
"tags": ["Career", "Entrepreneurship"]
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"text_blocks": "Current Status.\nNow, I run a small fund and get to live my dream.",
|
36 |
+
"tags": ["Career", "Investing"]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"text_blocks": "Comparisons.\nKids from rich families run Hedge Funds, PE Funds, and family offices in Singapore or Dubai.\nThey can get a meeting with a Billionaire with just one phone call.\nI can't compete with them, and honestly, I don’t need to.\nLife is not always a competition. Life is about progress.",
|
40 |
+
"tags": ["Comparisons", "Perspective"]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"text_blocks": "Gratitude.\nLooking back at where I started and the progress I have made, I am happy.\nI am happy being small, useful, and living a life created through hard work, luck, and the blessings of others.\nAnd for that, I am thankful.",
|
44 |
+
"tags": ["Gratitude", "Perspective"]
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"text_blocks": "Dealing with Criticism.\nEven if you give 100% of your wealth to charity, 5% of people will still criticize you.\nNo matter what you do, there will always be naysayers.",
|
48 |
+
"tags": ["Criticism", "Perspective"]
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"text_blocks": "Why Negativity Exists.\nThe internet allows people to say things online that they wouldn't have the guts to say in person.",
|
52 |
+
"tags": ["Negativity", "Internet"]
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"text_blocks": "The Art of Not Giving a F***.\nIt preserves your confidence.\nIt keeps your mental health in check.",
|
56 |
+
"tags": ["Mental Health", "Confidence"]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"text_blocks": "Tips for Not Caring.\nTake accountability. You are responsible for everything (good or bad) that happens to you, not your friends or family.\nGrow each day. Become a self-learning machine. If your goal is to learn, grow, and share, you will focus on things that matter.\nUnderstand that you are human and will make mistakes. Make a few and move on—that's life.",
|
60 |
+
"tags": ["Self-Improvement", "Mental Health"]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text_blocks": "Final Thought.\nLife is about building great stories.\nIf you focus too much on your boss’s feedback, online trolls, or negativity, you will get distracted from building YOUR great story.",
|
64 |
+
"tags": ["Life Lessons", "Stories"]
|
65 |
+
}
|
66 |
+
]
|