bardicreels commited on
Commit
b300879
·
verified ·
1 Parent(s): c994860

Upload 2 files

Browse files
Files changed (2) hide show
  1. ammons_muse.txt +0 -0
  2. app.py +81 -0
ammons_muse.txt ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
3
+ import faiss
4
+ from transformers import pipeline
5
+ import numpy as np
6
+ import os
7
+
8
+ # File paths
9
+ INDEX_FILE = 'ammons_muse_index.faiss'
10
+ EMBEDDINGS_FILE = 'ammons_muse_embeddings.npy'
11
+ CHUNKS_FILE = 'ammons_muse_chunks.npy'
12
+ TEXT_FILE = 'ammons_muse.txt'
13
+
14
+ # Load and prepare the text
15
+ def prepare_text():
16
+ with open(TEXT_FILE, 'r', encoding='utf-8') as file:
17
+ text = file.read()
18
+ chunk_size = 1000
19
+ return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
20
+
21
+ # Create or load embeddings and index
22
+ def get_embeddings_and_index(chunks):
23
+ if os.path.exists(INDEX_FILE) and os.path.exists(EMBEDDINGS_FILE):
24
+ print("Loading existing index and embeddings...")
25
+ index = faiss.read_index(INDEX_FILE)
26
+ embeddings = np.load(EMBEDDINGS_FILE)
27
+ else:
28
+ print("Creating new index and embeddings...")
29
+ model = SentenceTransformer('all-MiniLM-L6-v2')
30
+ embeddings = model.encode(chunks)
31
+ dimension = embeddings.shape[1]
32
+ index = faiss.IndexFlatL2(dimension)
33
+ index.add(embeddings.astype('float32'))
34
+
35
+ # Save index and embeddings
36
+ faiss.write_index(index, INDEX_FILE)
37
+ np.save(EMBEDDINGS_FILE, embeddings)
38
+
39
+ return embeddings, index
40
+
41
+ # Load or create chunks
42
+ if os.path.exists(CHUNKS_FILE):
43
+ chunks = np.load(CHUNKS_FILE, allow_pickle=True).tolist()
44
+ else:
45
+ chunks = prepare_text()
46
+ np.save(CHUNKS_FILE, np.array(chunks, dtype=object))
47
+
48
+ # Get embeddings and index
49
+ embeddings, index = get_embeddings_and_index(chunks)
50
+
51
+ # Set up text generation pipeline
52
+ generator = pipeline('text-generation', model='gpt2')
53
+
54
+ # Retrieval function
55
+ def retrieve_relevant_chunks(query, top_k=3):
56
+ model = SentenceTransformer('all-MiniLM-L6-v2')
57
+ query_vector = model.encode([query])
58
+ _, indices = index.search(query_vector.astype('float32'), top_k)
59
+ return [chunks[i] for i in indices[0]]
60
+
61
+ # Character response generation
62
+ def generate_character_response(query):
63
+ relevant_chunks = retrieve_relevant_chunks(query)
64
+ prompt = f"""As the Muse from A.R. Ammons' poetry, respond to this query:
65
+ Context: {' '.join(relevant_chunks)}
66
+ User: {query}
67
+ Muse:"""
68
+
69
+ response = generator(prompt, max_length=150, num_return_sequences=1)[0]['generated_text']
70
+ return response.split('Muse:')[-1].strip()
71
+
72
+ # Gradio interface
73
+ iface = gr.Interface(
74
+ fn=generate_character_response,
75
+ inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
76
+ outputs="text",
77
+ title="A.R. Ammons' Muse Chatbot",
78
+ description="Ask a question and get a response from the Muse of A.R. Ammons' poetry."
79
+ )
80
+
81
+ iface.launch()