Josh Strupp
commited on
Commit
Β·
7edf494
1
Parent(s):
ea2b4f2
Track large dataset with Git LFS
Browse files- .gitattributes +1 -0
- app.py +110 -4
- self_help_books.csv +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
self_help_books.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -1,7 +1,113 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
def
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
|
7 |
+
def recommend_books(concern, top_n=5, reviews_per_book=2):
|
8 |
+
# Load and preprocess data
|
9 |
+
df = pd.read_csv('self_help_books.csv')
|
10 |
+
|
11 |
+
# Create TF-IDF vectors from reviews
|
12 |
+
tfidf = TfidfVectorizer(stop_words='english')
|
13 |
+
review_vectors = tfidf.fit_transform(df['Review'].fillna(''))
|
14 |
+
concern_vector = tfidf.transform([concern])
|
15 |
+
|
16 |
+
# Calculate similarity scores
|
17 |
+
similarities = cosine_similarity(concern_vector, review_vectors).flatten()
|
18 |
+
|
19 |
+
# Get top books based on review similarity
|
20 |
+
top_indices = np.argsort(similarities)[-top_n:][::-1]
|
21 |
+
recommended_books = df.iloc[top_indices].copy()
|
22 |
+
|
23 |
+
# Add helpful and harmful reviews
|
24 |
+
for idx, row in recommended_books.iterrows():
|
25 |
+
book_reviews = df[df['Book'] == row['Book']]
|
26 |
+
|
27 |
+
# Get helpful reviews
|
28 |
+
helpful_reviews = book_reviews.nlargest(reviews_per_book, 'Helpful_Ratio')['Review'].tolist()
|
29 |
+
recommended_books.at[idx, 'Helpful Reviews'] = helpful_reviews
|
30 |
+
|
31 |
+
# Get critical reviews
|
32 |
+
harmful_reviews = book_reviews.nsmallest(reviews_per_book, 'Helpful_Ratio')['Review'].tolist()
|
33 |
+
recommended_books.at[idx, 'Harmful Reviews'] = harmful_reviews
|
34 |
+
|
35 |
+
return recommended_books
|
36 |
|
37 |
+
def recommend_authors(concern, top_n=5):
|
38 |
+
df = pd.read_csv('self_help_books.csv')
|
39 |
+
|
40 |
+
# Calculate author metrics
|
41 |
+
author_stats = df.groupby('author_clean').agg({
|
42 |
+
'Helpful_Ratio': ['mean', 'count']
|
43 |
+
}).reset_index()
|
44 |
+
|
45 |
+
author_stats.columns = ['author_clean', 'helpful_ratio', 'review_count']
|
46 |
+
|
47 |
+
# Filter authors with minimum reviews
|
48 |
+
min_reviews = 5
|
49 |
+
author_stats = author_stats[author_stats['review_count'] >= min_reviews]
|
50 |
+
|
51 |
+
# Get top and bottom authors
|
52 |
+
good_authors = author_stats.nlargest(top_n, 'helpful_ratio')
|
53 |
+
risky_authors = author_stats.nsmallest(top_n, 'helpful_ratio')
|
54 |
+
|
55 |
+
return good_authors, risky_authors
|
56 |
+
|
57 |
+
def recommend_for_concern(concern, num_books=5, num_reviews=2):
|
58 |
+
"""Wrapper function to format recommendations for Gradio"""
|
59 |
+
books_df = recommend_books(concern, top_n=num_books, reviews_per_book=num_reviews)
|
60 |
+
good_authors, risky_authors = recommend_authors(concern, top_n=num_books)
|
61 |
+
|
62 |
+
# Format book recommendations
|
63 |
+
book_output = "=== RECOMMENDED BOOKS ===\n\n"
|
64 |
+
for _, book in books_df.iterrows():
|
65 |
+
book_output += f"π {book['Book']}\n"
|
66 |
+
book_output += f"π€ Author: {book['Author']}\n"
|
67 |
+
book_output += f"β Rating: {book['Star_Rating']}\n"
|
68 |
+
book_output += f"π° Price: ${book['Price']}\n"
|
69 |
+
book_output += f"π Helpful Ratio: {book['Helpful_Ratio']:.2f}\n"
|
70 |
+
|
71 |
+
if book['Helpful Reviews']:
|
72 |
+
book_output += "\nβ
Helpful Reviews:\n"
|
73 |
+
for review in book['Helpful Reviews']:
|
74 |
+
book_output += f"β’ {review}\n"
|
75 |
+
|
76 |
+
if book['Harmful Reviews']:
|
77 |
+
book_output += "\nβ οΈ Critical Reviews:\n"
|
78 |
+
for review in book['Harmful Reviews']:
|
79 |
+
book_output += f"β’ {review}\n"
|
80 |
+
|
81 |
+
book_output += "\n" + "-"*50 + "\n\n"
|
82 |
+
|
83 |
+
# Format author recommendations
|
84 |
+
author_output = "=== RECOMMENDED AUTHORS ===\n\n"
|
85 |
+
author_output += "β
Authors Likely to be Helpful:\n"
|
86 |
+
for _, author in good_authors.iterrows():
|
87 |
+
author_output += f"β’ {author['author_clean']} (Helpful ratio: {author['helpful_ratio']:.2f})\n"
|
88 |
+
|
89 |
+
author_output += "\nβ οΈ Authors to Approach with Caution:\n"
|
90 |
+
for _, author in risky_authors.iterrows():
|
91 |
+
author_output += f"β’ {author['author_clean']} (Helpful ratio: {author['helpful_ratio']:.2f})\n"
|
92 |
+
|
93 |
+
return book_output + "\n\n" + author_output
|
94 |
+
|
95 |
+
# Create the Gradio interface
|
96 |
+
iface = gr.Interface(
|
97 |
+
fn=recommend_for_concern,
|
98 |
+
inputs=[
|
99 |
+
gr.Textbox(label="What concern or fear would you like help with?", placeholder="e.g. I'm a lonely teenager"),
|
100 |
+
gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Number of recommendations"),
|
101 |
+
gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Reviews per book")
|
102 |
+
],
|
103 |
+
outputs=gr.Textbox(label="Recommendations", lines=20),
|
104 |
+
title="Self-Help Book Recommender",
|
105 |
+
description="Get personalized book recommendations based on your concerns or fears.",
|
106 |
+
examples=[
|
107 |
+
["I'm a lonely teenager", 5, 2],
|
108 |
+
["I'm worried about my career", 5, 2],
|
109 |
+
["I have anxiety about the future", 5, 2]
|
110 |
+
]
|
111 |
+
)
|
112 |
+
|
113 |
+
iface.launch()
|
self_help_books.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8acadc7a62c6f88ce0cde40fdbf41b1b4ee20d98dd7e519b2228b6dfb1d6e5b
|
3 |
+
size 62107596
|