Izza-shahzad-13 commited on
Commit
f6359f9
·
verified ·
1 Parent(s): c8f019e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -30
app.py CHANGED
@@ -20,36 +20,52 @@ st.title("Economics & Population Advisor")
20
  uploaded_file = st.file_uploader("Upload your CSV file with economic documents", type=["csv"])
21
 
22
  if uploaded_file is not None:
23
- # Load CSV
24
- df = pd.read_csv(uploaded_file, error_bad_lines=False, engine='python')
25
  st.write("Dataset Preview:", df.head())
26
 
27
- # Assume 'text' column contains the document text; replace with actual column name
28
- documents = df['Country Name'].tolist() if 'text' in df.columns else st.text_input("Specify the text column name:")
29
-
30
- # Create embeddings for FAISS indexing
31
- st.write("Indexing documents...")
32
- embeddings = embedding_model.encode(documents)
33
- dimension = embeddings.shape[1]
34
- index = faiss.IndexFlatL2(dimension)
35
- index.add(np.array(embeddings))
36
- st.write("Indexing complete.")
37
-
38
- # Function to generate response
39
- def generate_summary(context):
40
- inputs = tokenizer("summarize: " + context, return_tensors="pt", max_length=512, truncation=True)
41
- outputs = qa_model.generate(inputs["input_ids"], max_length=150, min_length=50, length_penalty=2.0)
42
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
43
-
44
- # RAG functionality: Ask a question, retrieve documents, and generate an answer
45
- st.subheader("Ask a Question about Economic Data")
46
- question = st.text_input("Enter your question:")
47
-
48
- if st.button("Get Answer") and question:
49
- question_embedding = embedding_model.encode([question])
50
- D, I = index.search(np.array(question_embedding), k=3)
51
- retrieved_docs = [documents[i] for i in I[0]]
52
- context = " ".join(retrieved_docs)
53
- answer = generate_summary(context)
54
 
55
- st.write("Answer:", answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  uploaded_file = st.file_uploader("Upload your CSV file with economic documents", type=["csv"])
21
 
22
  if uploaded_file is not None:
23
+ # Load CSV with error handling
24
+ df = pd.read_csv(uploaded_file, on_bad_lines='skip', engine='python')
25
  st.write("Dataset Preview:", df.head())
26
 
27
+ # Allow user to specify the column containing the text (economic documents)
28
+ text_column = st.text_input("Specify the column containing the document text:", value="Country Name")
29
+
30
+ if text_column not in df.columns:
31
+ st.error(f"The column '{text_column}' was not found in the dataset.")
32
+ else:
33
+ # Extract documents from the specified column
34
+ documents = df[text_column].tolist()
35
+
36
+ # Create embeddings for FAISS indexing
37
+ st.write("Indexing documents...")
38
+ embeddings = embedding_model.encode(documents, convert_to_numpy=True)
39
+ dimension = embeddings.shape[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # Create a FAISS index and add embeddings
42
+ index = faiss.IndexFlatL2(dimension)
43
+ index.add(np.array(embeddings, dtype=np.float32))
44
+ st.write("Indexing complete.")
45
+
46
+ # Function to generate summary using T5 model
47
+ def generate_summary(context):
48
+ inputs = tokenizer("summarize: " + context, return_tensors="pt", max_length=512, truncation=True)
49
+ outputs = qa_model.generate(inputs["input_ids"], max_length=150, min_length=50, length_penalty=2.0)
50
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+
52
+ # RAG functionality: Ask a question, retrieve documents, and generate an answer
53
+ st.subheader("Ask a Question about Economic Data")
54
+ question = st.text_input("Enter your question:")
55
+
56
+ if st.button("Get Answer") and question:
57
+ # Embed the question
58
+ question_embedding = embedding_model.encode([question], convert_to_numpy=True)
59
+
60
+ # Search for the most relevant documents
61
+ D, I = index.search(np.array(question_embedding, dtype=np.float32), k=3)
62
+ retrieved_docs = [documents[i] for i in I[0]]
63
+
64
+ # Combine retrieved documents into context
65
+ context = " ".join(retrieved_docs[:5]) # Limit to 5 documents to avoid long input
66
+ if len(context) > 1000: # Truncate context if too long
67
+ context = context[:1000]
68
+
69
+ # Generate summary using the context
70
+ answer = generate_summary(context)
71
+ st.write("Answer:", answer)