FridayMaster commited on
Commit
0bdc9aa
·
verified ·
1 Parent(s): 25ba997

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -26
app.py CHANGED
@@ -8,13 +8,14 @@ import nltk
8
 
9
  # Download the required NLTK data
10
  nltk.download('punkt')
11
- nltk.download('punkt_tab')
12
-
13
-
14
 
15
  # Load the Ubuntu manual from a .txt file
 
 
16
  with open("ubuntu_manual.txt", "r", encoding="utf-8") as file:
17
  full_text = file.read()
 
 
18
 
19
  # Function to chunk the text into smaller pieces
20
  def chunk_text(text, chunk_size=500): # Larger chunks
@@ -38,7 +39,11 @@ def chunk_text(text, chunk_size=500): # Larger chunks
38
  manual_chunks = chunk_text(full_text, chunk_size=500)
39
 
40
  # Load your FAISS index
41
- index = faiss.read_index("manual_chunked_faiss_index_500.bin")
 
 
 
 
42
 
43
  # Load your embedding model
44
  embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
@@ -46,6 +51,7 @@ embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
46
  # OpenAI API key
47
  openai.api_key = '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'
48
 
 
49
  # Function to create embeddings
50
  def embed_text(text_list):
51
  return np.array(embedding_model.encode(text_list), dtype=np.float32)
@@ -55,11 +61,10 @@ def retrieve_chunks(query, k=5):
55
  query_embedding = embed_text([query])
56
 
57
  # Search the FAISS index
58
- distances, indices = index.search(query_embedding, k=k)
59
-
60
- # Debugging: Print out the distances and indices
61
- print("Distances:", distances)
62
- print("Indices:", indices)
63
 
64
  # Check if indices are valid
65
  if len(indices[0]) == 0:
@@ -81,23 +86,26 @@ def truncate_input(text, max_length=512):
81
 
82
  # Function to perform RAG: Retrieve chunks and generate a response
83
  def rag_response(query, k=5, max_new_tokens=150):
84
- # Step 1: Retrieve relevant chunks
85
- relevant_chunks = retrieve_chunks(query, k=k)
86
-
87
- if not relevant_chunks:
88
- return "Sorry, I couldn't find relevant information."
89
-
90
- # Step 2: Combine the query with retrieved chunks
91
- augmented_input = query + "\n" + "\n".join(relevant_chunks)
92
-
93
- # Truncate and encode the input
94
- inputs = truncate_input(augmented_input)
95
-
96
- # Generate response
97
- outputs = generator_model.generate(inputs, max_new_tokens=max_new_tokens)
98
- generated_text = generator_tokenizer.decode(outputs[0], skip_special_tokens=True)
99
-
100
- return generated_text
 
 
 
101
 
102
  # Gradio Interface
103
  iface = gr.Interface(
@@ -111,3 +119,10 @@ iface = gr.Interface(
111
  if __name__ == "__main__":
112
  iface.launch()
113
 
 
 
 
 
 
 
 
 
8
 
9
  # Download the required NLTK data
10
  nltk.download('punkt')
 
 
 
11
 
12
  # Load the Ubuntu manual from a .txt file
13
+ try:
14
+ # Load the Ubuntu manual from a .txt file
15
  with open("ubuntu_manual.txt", "r", encoding="utf-8") as file:
16
  full_text = file.read()
17
+ except FileNotFoundError:
18
+ raise FileNotFoundError("The file /content/ubuntu_manual.txt was not found.")
19
 
20
  # Function to chunk the text into smaller pieces
21
  def chunk_text(text, chunk_size=500): # Larger chunks
 
39
  manual_chunks = chunk_text(full_text, chunk_size=500)
40
 
41
  # Load your FAISS index
42
+ try:
43
+ # Load your FAISS index
44
+ index = faiss.read_index("manual_chunked_faiss_index_500.bin")
45
+ except Exception as e:
46
+ raise RuntimeError(f"Failed to load FAISS index: {e}")
47
 
48
  # Load your embedding model
49
  embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
 
51
  # OpenAI API key
52
  openai.api_key = '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'
53
 
54
+
55
  # Function to create embeddings
56
  def embed_text(text_list):
57
  return np.array(embedding_model.encode(text_list), dtype=np.float32)
 
61
  query_embedding = embed_text([query])
62
 
63
  # Search the FAISS index
64
+ try:
65
+ distances, indices = index.search(query_embedding, k=k)
66
+ except Exception as e:
67
+ raise RuntimeError(f"FAISS search failed: {e}")
 
68
 
69
  # Check if indices are valid
70
  if len(indices[0]) == 0:
 
86
 
87
  # Function to perform RAG: Retrieve chunks and generate a response
88
  def rag_response(query, k=5, max_new_tokens=150):
89
+ try:
90
+ # Step 1: Retrieve relevant chunks
91
+ relevant_chunks = retrieve_chunks(query, k=k)
92
+
93
+ if not relevant_chunks:
94
+ return "Sorry, I couldn't find relevant information."
95
+
96
+ # Step 2: Combine the query with retrieved chunks
97
+ augmented_input = query + "\n" + "\n".join(relevant_chunks)
98
+
99
+ # Truncate and encode the input
100
+ inputs = truncate_input(augmented_input)
101
+
102
+ # Generate response
103
+ outputs = generator_model.generate(inputs, max_new_tokens=max_new_tokens)
104
+ generated_text = generator_tokenizer.decode(outputs[0], skip_special_tokens=True)
105
+
106
+ return generated_text
107
+ except Exception as e:
108
+ return f"An error occurred: {e}"
109
 
110
  # Gradio Interface
111
  iface = gr.Interface(
 
119
  if __name__ == "__main__":
120
  iface.launch()
121
 
122
+
123
+
124
+
125
+
126
+
127
+
128
+