Spaces:

Johan713
/

study-sherlock

Sleeping

App Files Files Community

study-sherlock / pages /sherlock_observation.py

Johan713

Upload 13 files

5347681 verified 11 months ago

raw

history blame

8.21 kB

	import streamlit as st
	import random
	from langchain_community.chat_models import ChatOpenAI
	from langchain.schema import HumanMessage, SystemMessage
	from langchain_community.document_loaders import PyPDFLoader, TextLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.chains import RetrievalQA
	import os
	from dotenv import load_dotenv
	import tempfile

	# Load environment variables
	load_dotenv()

	AI71_BASE_URL = "https://api.ai71.ai/v1/"
	AI71_API_KEY = os.getenv('AI71_API_KEY')

	# Initialize the Falcon model
	chat = ChatOpenAI(
	model="tiiuae/falcon-180B-chat",
	api_key=AI71_API_KEY,
	base_url=AI71_BASE_URL,
	streaming=True,
	)

	# Initialize embeddings
	embeddings = HuggingFaceEmbeddings()

	# Expanded list of predefined topics
	PREDEFINED_TOPICS = [
	"Quantum Computing", "Artificial Intelligence Ethics", "Blockchain Technology",
	"Neuroscience", "Climate Change Mitigation", "Space Exploration",
	"Renewable Energy", "Genetic Engineering", "Cybersecurity",
	"Machine Learning", "Nanotechnology", "Robotics",
	"Virtual Reality", "Augmented Reality", "Internet of Things",
	"5G Technology", "Autonomous Vehicles", "Bioinformatics",
	"Cloud Computing", "Data Science", "Artificial General Intelligence",
	"Quantum Cryptography", "3D Printing", "Smart Cities",
	"Biotechnology", "Fusion Energy", "Sustainable Agriculture",
	"Space Tourism", "Quantum Sensors", "Brain-Computer Interfaces",
	"Personalized Medicine", "Synthetic Biology", "Exoplanets",
	"Dark Matter", "CRISPR Technology", "Quantum Internet",
	"Deep Learning", "Edge Computing", "Humanoid Robots",
	"Drone Technology", "Quantum Supremacy", "Neuromorphic Computing",
	"Asteroid Mining", "Bionic Implants", "Smart Materials",
	"Quantum Dots", "Lab-grown Meat", "Vertical Farming",
	"Hyperloop Transportation", "Molecular Nanotechnology", "Quantum Metrology",
	"Artificial Photosynthesis", "Cognitive Computing", "Swarm Robotics",
	"Metamaterials", "Neuroplasticity", "Quantum Machine Learning",
	"Green Hydrogen", "Organ-on-a-Chip", "Bioprinting",
	"Plasma Physics", "Quantum Simulation", "Soft Robotics",
	"Geoengineering", "Exoskeletons", "Programmable Matter",
	"Graphene Applications", "Quantum Sensing", "Neuralink",
	"Holographic Displays", "Quantum Error Correction", "Synthetic Genomes",
	"Carbon Capture and Storage", "Quantum Memory", "Organoids",
	"Artificial Synapses", "Quantum Imaging", "Biosensors",
	"Memristors", "Quantum Annealing", "DNA Data Storage",
	"Cultured Meat", "Quantum Radar", "Neuromorphic Hardware",
	"Quantum Entanglement", "Phytomining", "Biohacking",
	"Topological Quantum Computing", "Neuroprosthetics", "Optogenetics",
	"Quantum Gravity", "Molecular Machines", "Biomimicry",
	"Quantum Teleportation", "Neurogenesis", "Bioelectronics",
	"Quantum Tunneling", "Tissue Engineering", "Bioremediation",
	"Quantum Photonics", "Synthetic Neurobiology", "Nanomedicine",
	"Quantum Biology", "Biogeochemistry", "Molecular Gastronomy",
	"Quantum Thermodynamics", "Nutrigenomics", "Biomechatronics",
	"Quantum Chemistry", "Psychoneuroimmunology", "Nanophotonics",
	"Quantum Optics", "Neuroeconomics", "Bionanotechnology"
	]

	def process_document(file):
	with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.name)[1]) as temp_file:
	temp_file.write(file.getvalue())
	temp_file_path = temp_file.name

	if file.name.endswith('.pdf'):
	loader = PyPDFLoader(temp_file_path)
	else:
	loader = TextLoader(temp_file_path)

	documents = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	texts = text_splitter.split_documents(documents)

	vectorstore = FAISS.from_documents(texts, embeddings)
	retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

	qa_chain = RetrievalQA.from_chain_type(
	llm=chat,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True
	)

	os.unlink(temp_file_path)
	return qa_chain

	def get_sherlock_analysis(topic, qa_chain=None):
	system_prompt = """
	You are Sherlock Holmes, the world's greatest detective and master of observation and deduction.
	Your task is to provide an in-depth analysis of the given topic, offering unique insights on how to approach learning it from the ground up.
	Your analysis should:
	1. Break down the topic into its fundamental components.
	2. Identify key concepts and their relationships.
	3. Suggest a structured approach to learning, starting from first principles.
	4. Highlight potential challenges and how to overcome them.
	5. Provide a unique point of view that encourages critical thinking.
	Your response should be detailed, insightful, and encourage a deep understanding of the subject.
	"""

	if qa_chain:
	result = qa_chain({"query": f"Provide a Sherlock Holmes style analysis of the topic: {topic}"})
	response = result['result']
	else:
	messages = [
	SystemMessage(content=system_prompt),
	HumanMessage(content=f"Analyze the following topic: {topic}")
	]
	response = chat.invoke(messages).content

	return response

	def chunk_text(text, max_chunk_size=4000):
	chunks = []
	current_chunk = ""
	for sentence in text.split(". "):
	if len(current_chunk) + len(sentence) < max_chunk_size:
	current_chunk += sentence + ". "
	else:
	chunks.append(current_chunk)
	current_chunk = sentence + ". "
	if current_chunk:
	chunks.append(current_chunk)
	return chunks

	def main():
	st.set_page_config(page_title="S.H.E.R.L.O.C.K. Observation", page_icon="🔍", layout="wide")

	st.title("🕵️ S.H.E.R.L.O.C.K. Observation")
	st.markdown("Uncover the depths of any subject with the keen insight of Sherlock Holmes")

	col1, col2 = st.columns([2, 1])

	with col2:
	st.subheader("Choose Your Method")
	method = st.radio("Select input method:", ["Enter Topic", "Upload Document", "Choose from List"])

	if method == "Enter Topic":
	topic = st.text_input("Enter your topic of interest:")
	elif method == "Upload Document":
	uploaded_file = st.file_uploader("Upload a document (PDF or TXT)", type=["pdf", "txt"])
	if uploaded_file:
	topic = uploaded_file.name
	else:
	topic = st.selectbox("Choose a topic:", PREDEFINED_TOPICS)

	if st.button("Analyze", key="analyze_button"):
	if method == "Upload Document" and uploaded_file:
	qa_chain = process_document(uploaded_file)
	analysis = get_sherlock_analysis(topic, qa_chain)
	elif topic:
	analysis = get_sherlock_analysis(topic)
	else:
	st.warning("Please provide a topic or upload a document.")
	return

	col1.markdown("## Sherlock's Analysis")
	chunks = chunk_text(analysis)
	for chunk in chunks:
	col1.markdown(chunk)

	st.sidebar.image("https://upload.wikimedia.org/wikipedia/commons/c/cd/Sherlock_Holmes_Portrait_Paget.jpg", use_column_width=True)
	st.sidebar.title("About S.H.E.R.L.O.C.K. Observation")
	st.sidebar.markdown("""
	S.H.E.R.L.O.C.K. Observation is your personal detective for any subject.
	It provides:
	- In-depth analysis of topics
	- Unique perspectives on learning approaches
	- First principles breakdown of subjects
	- Critical thinking encouragement

	Let Sherlock guide you through the intricacies of any field of study!
	""")

	st.sidebar.markdown("---")
	st.sidebar.markdown("Powered by Falcon-180B and Streamlit")

	if __name__ == "__main__":
	main()