Michaeldavidstein commited on
Commit
6aedbd0
·
verified ·
1 Parent(s): e6e49ef

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -176
app.py DELETED
@@ -1,176 +0,0 @@
1
-
2
- import gradio as gr
3
- from sentence_transformers import SentenceTransformer
4
- import chromadb
5
- import pandas as pd
6
- import os
7
- import json
8
- from pathlib import Path
9
-
10
- # Load the sentence transformer model
11
- model = SentenceTransformer('all-MiniLM-L6-v2')
12
-
13
- # Initialize the ChromaDB client
14
- chroma_client = chromadb.Client()
15
-
16
- # Function to build the database from CSV
17
- def build_database():
18
- # Read the CSV file
19
- df = pd.read_csv('collection_data.csv')
20
-
21
- # Create a collection
22
- collection_name = 'Dataset-10k-companies'
23
-
24
- # Delete the existing collection if it exists
25
- # chroma_client.delete_collection(name=collection_name)
26
-
27
- # Create a new collection
28
- collection = chroma_client.create_collection(name=collection_name)
29
-
30
- # Add the data from the DataFrame to the collection
31
- collection.add(
32
- documents=df['documents'].tolist(),
33
- ids=df['ids'].tolist(),
34
- metadatas=df['metadatas'].apply(eval).tolist(),
35
- embeddings=df['embeddings'].apply(lambda x: eval(x.replace(',,', ','))).tolist()
36
- )
37
-
38
- return collection
39
-
40
- # Build the database when the app starts
41
- collection = build_database()
42
-
43
- # Instantiate the Anyscale client
44
- client = Anyscale(api_key=anyscale_api_key, model="meta-llama/Llama-2-70b-chat-hf")
45
-
46
- # Function to get relevant chunks
47
- def get_relevant_chunks(query, collection, top_n=3):
48
- query_embedding = model.encode(query).tolist()
49
- results = collection.query(query_embeddings=[query_embedding], n_results=top_n)
50
-
51
- relevant_chunks = []
52
- for i in range(len(results['documents'][0])):
53
- chunk = results['documents'][0][i]
54
- source = results['metadatas'][0][i]['source']
55
- page = results['metadatas'][0][i]['page']
56
- relevant_chunks.append((chunk, source, page))
57
-
58
- return relevant_chunks
59
-
60
- # Define system message for LLM
61
- qna_system_message = """
62
- You are an assistant to Finsights analysts. Your task is to provide relevant information about the financial performance of the companies followed by Finsights.
63
-
64
- User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context.
65
- The context contains references to specific portions of documents relevant to the user's query, along with source links.
66
- The source for a context will begin with the token: ###Source.
67
-
68
- When crafting your response:
69
- 1. Select only the context relevant to answer the question.
70
- 2. Include the source links in your response.
71
- 3. User questions will begin with the token: ###Question.
72
- 4. If the question is irrelevant to Finsights, respond with: "I am an assistant for Finsight Docs. I can only help you with questions related to Finsights."
73
-
74
- Adhere to the following guidelines:
75
- - Your response should only address the question asked and nothing else.
76
- - Answer only using the context provided.
77
- - Do not mention anything about the context in your final answer.
78
- - If the answer is not found in the context, respond with: "I don't know."
79
- - Always quote the source when you use the context. Cite the relevant source at the end of your response under the section - Source:
80
- - Do not make up sources. Use only the links provided in the sources section of the context. You are prohibited from providing other links/sources.
81
-
82
- Here is an example of how to structure your response:
83
-
84
- Answer:
85
- [Answer]
86
-
87
- Source:
88
- [Source]
89
- """
90
- # Create a user message template
91
- qna_user_message_template = """
92
- ###Context
93
- Here are some documents and their source links that are relevant to the question mentioned below.
94
- {context}
95
-
96
- ###Question
97
- {question}
98
- """
99
-
100
- # Function to get LLM response
101
- def get_llm_response(prompt, max_attempts=3):
102
- full_response = ""
103
- for attempt in range(max_attempts):
104
- try:
105
- response = client.complete(prompt, max_tokens=1000) # Increase max_tokens if possible
106
- chunk = response.text.strip()
107
- full_response += chunk
108
- if chunk.endswith((".", "!", "?")): # Check if response seems complete
109
- break
110
- else:
111
- prompt = "Please continue from where you left off:\n" + chunk[-100:] # Use the last 100 chars as context
112
- except Exception as e:
113
- print(f"Attempt {attempt + 1} failed with error: {e}")
114
- return full_response
115
-
116
- # Prediction function
117
- def predict(company, user_query):
118
- try:
119
- # Modify the query to include the company name
120
- modified_query = f"{user_query} for {company}"
121
-
122
- # Get relevant chunks
123
- relevant_chunks = get_relevant_chunks(modified_query, collection)
124
-
125
- # Prepare the context string
126
- context = ""
127
- for chunk, source, page in relevant_chunks:
128
- context += chunk + "\n"
129
- context += f"###Source {source}, Page {page}\n"
130
-
131
- # Prepare the user message
132
- user_message = qna_user_message_template.format(context=context, question=user_query)
133
-
134
- # Craft the prompt to pass to the Llama model
135
- prompt = f"{qna_system_message}\n\n{qna_user_message_template.format(context=context, question=user_query)}"
136
-
137
- # Generate the response using the Llama model through Anyscale
138
- answer = get_llm_response(prompt)
139
-
140
- # Extract the generated response
141
- # answer = response.text.strip()
142
-
143
- # Log the interaction
144
- log_interaction(company, user_query, context, answer)
145
-
146
- return answer
147
- except Exception as e:
148
- return f"An error occurred: {str(e)}"
149
-
150
- # Function to log interactions
151
- def log_interaction(company, user_query, context, answer):
152
- log_file = Path("interaction_log.jsonl")
153
- with log_file.open("a") as f:
154
- json.dump({
155
- 'company': company,
156
- 'user_query': user_query,
157
- 'context': context,
158
- 'answer': answer
159
- }, f)
160
- f.write("\n")
161
-
162
- # Create Gradio interface
163
- company_list = ["MSFT", "AWS", "Meta", "Google", "IBM"]
164
- iface = gr.Interface(
165
- fn=predict,
166
- inputs=[
167
- gr.Radio(company_list, label="Select Company"),
168
- gr.Textbox(lines=2, placeholder="Enter your query here...", label="User Query")
169
- ],
170
- outputs=gr.Textbox(label="Generated Answer"),
171
- title="Company Reports Q&A",
172
- description="Query the vector database and get an LLM response based on the documents in the collection."
173
- )
174
-
175
- # Launch the interface
176
- iface.launch()