AKKI-AFK commited on
Commit
1550086
Β·
verified Β·
1 Parent(s): 8181fef

Created this so that i have a backup in case my friend crashes my other site

Browse files
Files changed (1) hide show
  1. app.py +115 -0
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import faiss
4
+ import pandas as pd
5
+ import numpy as np
6
+ import requests
7
+ import torch
8
+ import re
9
+ import time
10
+ from sentence_transformers import SentenceTransformer, CrossEncoder
11
+ from huggingface_hub import hf_hub_download
12
+ from langdetect import detect
13
+
14
+ HF_REPO = "AKKI-AFK/deepshelf-data"
15
+
16
+ books_file = hf_hub_download(repo_id=HF_REPO, filename="booksummaries.txt", repo_type="dataset")
17
+ faiss_file = hf_hub_download(repo_id=HF_REPO, filename="faiss_index.bin", repo_type="dataset")
18
+
19
+ df = pd.read_csv(books_file, delimiter="\t")
20
+ index = faiss.read_index(faiss_file)
21
+
22
+ encoder = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2")
23
+ cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")
24
+
25
+ request_times = [] # Track request timestamps
26
+
27
+ @st.cache_data
28
+ def recommend_books(query):
29
+ query = sanitize_input(query)
30
+ if len(query) > 200:
31
+ st.warning("⚠️ Query is too long. Please keep it under 200 characters.")
32
+ return []
33
+
34
+ if len(query) < 3:
35
+ st.warning("⚠️ Query is too short. Please provide more details.")
36
+ return []
37
+
38
+ try:
39
+ lang = detect(query)
40
+ if lang != "en":
41
+ st.warning("⚠️ Non-English query detected. Results may not be accurate.")
42
+ except:
43
+ st.warning("⚠️ Could not detect language. Ensure proper input.")
44
+
45
+ search_vector = encoder.encode(query)
46
+ search_vector = np.array([search_vector])
47
+ faiss.normalize_L2(search_vector)
48
+
49
+ distances, ann = index.search(search_vector, k=50)
50
+ results = pd.DataFrame({'distances': distances[0], 'ann': ann[0]})
51
+ merge = pd.merge(results, df, left_on='ann', right_index=True)
52
+ merge['Query'] = query
53
+
54
+ pairs = list(zip(merge['Query'], merge['summary']))
55
+ scores = cross_encoder.predict(pairs) # Running synchronously
56
+ merge['score'] = scores
57
+
58
+ df_sorted = merge.iloc[merge["score"].argsort()][::-1]
59
+ return df_sorted[["title", "summary"]][:5].to_dict(orient="records")
60
+
61
+ def sanitize_input(text):
62
+ """Sanitize input by removing special characters and excessive spaces."""
63
+ text = re.sub(r'[^\w\s]', '', text) # Remove special characters
64
+ text = re.sub(r'\s+', ' ', text).strip() # Normalize spaces
65
+ return text
66
+
67
+ def rate_limit():
68
+ """Rate-limiting function to prevent excessive queries."""
69
+ global request_times
70
+ current_time = time.time()
71
+ request_times = [t for t in request_times if current_time - t < 10] # Keep only recent requests within 10 seconds
72
+ if len(request_times) >= 5:
73
+ st.error("⚠️ Too many requests. Please wait a few seconds before trying again.")
74
+ return False
75
+ request_times.append(current_time)
76
+ return True
77
+
78
+ st.set_page_config(page_title="DeepShelf", page_icon="πŸ“š", layout="wide")
79
+
80
+ st.markdown("""
81
+ <style>
82
+ body {background-color: #1E1E1E; color: white;}
83
+ .title {text-align: center; font-size: 3em; font-weight: bold; color: #E6A400;}
84
+ .subtext {text-align: center; font-size: 1.2em; color: #AAAAAA;}
85
+ .recommend-btn {text-align: center;}
86
+ .book-container {border-radius: 10px; padding: 20px; margin: 10px; background: #2E2E2E; box-shadow: 2px 2px 10px #00000050;}
87
+ .book-title {font-size: 1.5em; font-weight: bold; color: #FFD700;}
88
+ .book-summary {font-size: 1em; color: #CCCCCC;}
89
+ </style>
90
+ """, unsafe_allow_html=True)
91
+
92
+ st.markdown('<div class="title">πŸ“– DeepShelf</div>', unsafe_allow_html=True)
93
+ st.markdown('<div class="subtext">Find the best books based on your preferences!</div>', unsafe_allow_html=True)
94
+
95
+ query = st.text_input("πŸ” Enter a book description (e.g., 'A dark fantasy with drama')", max_chars=200, help="Use keywords to describe your ideal book!")
96
+ button_disabled = not (3 <= len(query) <= 200) # Disable button if query length is invalid
97
+
98
+ button_placeholder = st.empty()
99
+
100
+ if button_placeholder.button("✨ Recommend Books", disabled=button_disabled, help="Click to get personalized book recommendations!"):
101
+ if rate_limit():
102
+ button_placeholder.button("✨ Processing...", disabled=True) # Disable button while processing
103
+ with st.spinner("πŸ” Searching for the best books..."):
104
+ recommendations = recommend_books(query)
105
+
106
+ button_placeholder.button("✨ Recommend Books", disabled=False) # Re-enable button after processing
107
+
108
+ st.markdown("## πŸ“š Recommended Books:")
109
+ for rec in recommendations:
110
+ st.markdown(f"""
111
+ <div class="book-container">
112
+ <div class="book-title">πŸ“– {rec["title"]}</div>
113
+ <div class="book-summary">{rec["summary"]}</div>
114
+ </div>
115
+ """, unsafe_allow_html=True)