Josh Strupp commited on
Commit
7edf494
Β·
1 Parent(s): ea2b4f2

Track large dataset with Git LFS

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. app.py +110 -4
  3. self_help_books.csv +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ self_help_books.csv filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,7 +1,113 @@
1
  import gradio as gr
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
 
7
+ def recommend_books(concern, top_n=5, reviews_per_book=2):
8
+ # Load and preprocess data
9
+ df = pd.read_csv('self_help_books.csv')
10
+
11
+ # Create TF-IDF vectors from reviews
12
+ tfidf = TfidfVectorizer(stop_words='english')
13
+ review_vectors = tfidf.fit_transform(df['Review'].fillna(''))
14
+ concern_vector = tfidf.transform([concern])
15
+
16
+ # Calculate similarity scores
17
+ similarities = cosine_similarity(concern_vector, review_vectors).flatten()
18
+
19
+ # Get top books based on review similarity
20
+ top_indices = np.argsort(similarities)[-top_n:][::-1]
21
+ recommended_books = df.iloc[top_indices].copy()
22
+
23
+ # Add helpful and harmful reviews
24
+ for idx, row in recommended_books.iterrows():
25
+ book_reviews = df[df['Book'] == row['Book']]
26
+
27
+ # Get helpful reviews
28
+ helpful_reviews = book_reviews.nlargest(reviews_per_book, 'Helpful_Ratio')['Review'].tolist()
29
+ recommended_books.at[idx, 'Helpful Reviews'] = helpful_reviews
30
+
31
+ # Get critical reviews
32
+ harmful_reviews = book_reviews.nsmallest(reviews_per_book, 'Helpful_Ratio')['Review'].tolist()
33
+ recommended_books.at[idx, 'Harmful Reviews'] = harmful_reviews
34
+
35
+ return recommended_books
36
 
37
+ def recommend_authors(concern, top_n=5):
38
+ df = pd.read_csv('self_help_books.csv')
39
+
40
+ # Calculate author metrics
41
+ author_stats = df.groupby('author_clean').agg({
42
+ 'Helpful_Ratio': ['mean', 'count']
43
+ }).reset_index()
44
+
45
+ author_stats.columns = ['author_clean', 'helpful_ratio', 'review_count']
46
+
47
+ # Filter authors with minimum reviews
48
+ min_reviews = 5
49
+ author_stats = author_stats[author_stats['review_count'] >= min_reviews]
50
+
51
+ # Get top and bottom authors
52
+ good_authors = author_stats.nlargest(top_n, 'helpful_ratio')
53
+ risky_authors = author_stats.nsmallest(top_n, 'helpful_ratio')
54
+
55
+ return good_authors, risky_authors
56
+
57
+ def recommend_for_concern(concern, num_books=5, num_reviews=2):
58
+ """Wrapper function to format recommendations for Gradio"""
59
+ books_df = recommend_books(concern, top_n=num_books, reviews_per_book=num_reviews)
60
+ good_authors, risky_authors = recommend_authors(concern, top_n=num_books)
61
+
62
+ # Format book recommendations
63
+ book_output = "=== RECOMMENDED BOOKS ===\n\n"
64
+ for _, book in books_df.iterrows():
65
+ book_output += f"πŸ“š {book['Book']}\n"
66
+ book_output += f"πŸ‘€ Author: {book['Author']}\n"
67
+ book_output += f"⭐ Rating: {book['Star_Rating']}\n"
68
+ book_output += f"πŸ’° Price: ${book['Price']}\n"
69
+ book_output += f"πŸ“Š Helpful Ratio: {book['Helpful_Ratio']:.2f}\n"
70
+
71
+ if book['Helpful Reviews']:
72
+ book_output += "\nβœ… Helpful Reviews:\n"
73
+ for review in book['Helpful Reviews']:
74
+ book_output += f"β€’ {review}\n"
75
+
76
+ if book['Harmful Reviews']:
77
+ book_output += "\n⚠️ Critical Reviews:\n"
78
+ for review in book['Harmful Reviews']:
79
+ book_output += f"β€’ {review}\n"
80
+
81
+ book_output += "\n" + "-"*50 + "\n\n"
82
+
83
+ # Format author recommendations
84
+ author_output = "=== RECOMMENDED AUTHORS ===\n\n"
85
+ author_output += "βœ… Authors Likely to be Helpful:\n"
86
+ for _, author in good_authors.iterrows():
87
+ author_output += f"β€’ {author['author_clean']} (Helpful ratio: {author['helpful_ratio']:.2f})\n"
88
+
89
+ author_output += "\n⚠️ Authors to Approach with Caution:\n"
90
+ for _, author in risky_authors.iterrows():
91
+ author_output += f"β€’ {author['author_clean']} (Helpful ratio: {author['helpful_ratio']:.2f})\n"
92
+
93
+ return book_output + "\n\n" + author_output
94
+
95
+ # Create the Gradio interface
96
+ iface = gr.Interface(
97
+ fn=recommend_for_concern,
98
+ inputs=[
99
+ gr.Textbox(label="What concern or fear would you like help with?", placeholder="e.g. I'm a lonely teenager"),
100
+ gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Number of recommendations"),
101
+ gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Reviews per book")
102
+ ],
103
+ outputs=gr.Textbox(label="Recommendations", lines=20),
104
+ title="Self-Help Book Recommender",
105
+ description="Get personalized book recommendations based on your concerns or fears.",
106
+ examples=[
107
+ ["I'm a lonely teenager", 5, 2],
108
+ ["I'm worried about my career", 5, 2],
109
+ ["I have anxiety about the future", 5, 2]
110
+ ]
111
+ )
112
+
113
+ iface.launch()
self_help_books.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8acadc7a62c6f88ce0cde40fdbf41b1b4ee20d98dd7e519b2228b6dfb1d6e5b
3
+ size 62107596