|
import streamlit as st |
|
import PyPDF2 |
|
import openai |
|
import faiss |
|
import os |
|
import numpy as np |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
reader = PyPDF2.PdfReader(pdf_file) |
|
text = "" |
|
for page in reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
def get_embeddings(text, model="text-embedding-ada-002"): |
|
response = openai.Embedding.create(input=[text], model=model) |
|
return response['data'][0]['embedding'] |
|
|
|
|
|
def search_similar(query_embedding, index, stored_texts, top_k=3): |
|
distances, indices = index.search(np.array([query_embedding]), top_k) |
|
results = [(stored_texts[i], distances[0][idx]) for idx, i in enumerate(indices[0])] |
|
return results |
|
|
|
|
|
def generate_html(response_content): |
|
html_template = f""" |
|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>Course Query Response</title> |
|
<style> |
|
body {{ |
|
font-family: Arial, sans-serif; |
|
margin: 0; |
|
padding: 0; |
|
background-color: #f4f4f9; |
|
color: #333; |
|
}} |
|
.container {{ |
|
width: 80%; |
|
margin: 30px auto; |
|
background-color: white; |
|
padding: 20px; |
|
border-radius: 8px; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
}} |
|
h1 {{ |
|
color: #2C3E50; |
|
font-size: 2em; |
|
text-align: center; |
|
}} |
|
.response {{ |
|
background-color: #ecf0f1; |
|
border-left: 5px solid #3498db; |
|
padding: 20px; |
|
font-size: 1.2em; |
|
margin-top: 20px; |
|
border-radius: 5px; |
|
}} |
|
footer {{ |
|
text-align: center; |
|
margin-top: 30px; |
|
font-size: 0.9em; |
|
color: #7f8c8d; |
|
}} |
|
</style> |
|
</head> |
|
<body> |
|
<div class="container"> |
|
<h1>Course Query Response</h1> |
|
<div class="response"> |
|
<h3>Answer:</h3> |
|
<p>{response_content}</p> |
|
</div> |
|
<footer> |
|
<p>Generated by Course Query Assistant</p> |
|
</footer> |
|
</div> |
|
</body> |
|
</html> |
|
""" |
|
return html_template |
|
|
|
|
|
st.title("Course Query Assistant") |
|
|
|
|
|
openai_api_key = st.text_input("Enter your OpenAI API key:", type="password") |
|
|
|
if openai_api_key: |
|
openai.api_key = openai_api_key |
|
|
|
|
|
uploaded_files = st.file_uploader("Upload Course Materials (PDFs)", type=["pdf"], accept_multiple_files=True) |
|
|
|
if uploaded_files: |
|
st.write("Processing uploaded course materials...") |
|
|
|
|
|
course_texts = [] |
|
for uploaded_file in uploaded_files: |
|
text = extract_text_from_pdf(uploaded_file) |
|
course_texts.append(text) |
|
|
|
|
|
combined_text = " ".join(course_texts) |
|
|
|
|
|
chunks = [combined_text[i:i+1000] for i in range(0, len(combined_text), 1000)] |
|
|
|
|
|
embeddings = [get_embeddings(chunk) for chunk in chunks] |
|
|
|
|
|
embeddings_np = np.array(embeddings).astype("float32") |
|
|
|
|
|
index = faiss.IndexFlatL2(len(embeddings_np[0])) |
|
index.add(embeddings_np) |
|
|
|
st.write("Course materials have been processed and indexed.") |
|
|
|
|
|
query = st.text_input("Enter your question about the course materials:") |
|
|
|
if query: |
|
|
|
query_embedding = get_embeddings(query) |
|
|
|
|
|
results = search_similar(query_embedding, index, chunks) |
|
|
|
|
|
context = "\n".join([result[0] for result in results]) |
|
modified_prompt = f"Context: {context}\n\nQuestion: {query}\n\nProvide a detailed answer based on the context." |
|
|
|
|
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=[{"role": "user", "content": modified_prompt}] |
|
) |
|
|
|
|
|
response_content = response['choices'][0]['message']['content'] |
|
|
|
|
|
st.write("### Intelligent Reply:") |
|
st.write(response_content) |
|
|
|
|
|
html_content = generate_html(response_content) |
|
|
|
|
|
st.download_button( |
|
label="Download Response as HTML", |
|
data=html_content, |
|
file_name="course_query_response.html", |
|
mime="text/html" |
|
) |
|
|