In [4]:
from langchain_community.document_loaders import PyPDFLoader


In [5]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o")

In [6]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)


In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
 "You are an assistant for question-answering tasks. "
 "Use the following pieces of retrieved context to answer "
 "the question. If you don't know the answer, say that you "
 "don't know. Use three sentences maximum and keep the "
 "answer concise."
 "\n\n"
 "{context}"
)

prompt = ChatPromptTemplate.from_messages(
 [
 ("system", system_prompt),
 ("human", "{input}"),
 ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

results = rag_chain.invoke({"input": "What is this paper about?"})



{'input': 'What is this paper about?',
 'context': [Document(id='de54a167-b052-4340-8c9d-c96f3b20b1c8', metadata={'source': '../data/main.pdf', 'page': 12}, page_content='Dunnett’s post hoc multiple comparisons test. A 5% sig-\nnificance level was used for all statistical tests. All statis-\ntical analysis was performed using Prism software\n(GraphPad software, LLC).\nData availability\nAll data are contained within the manuscript.\nSupporting information—This article contains supporting informa-\ntion (62).\nAcknowledgments—Recombinant protein for biophysical studies\nreported in this publication was generated with supported from the\nUniversity of Michigan Center for Structural Biology (CSB). The\nCSB acknowledges support from the U-M Life Sciences Institute,\nthe U-M Rogel Cancer Center, the U-M Medical School Endow-\nment for Basic Sciences, and grants from the NIH. We thank the\nUniversity of Michigan BioNMR Core fand for assistance per-\nforming, analyzing, and interpreting NMR s

In [9]:
print(results['answer'])

This paper investigates the regulation of CLEC16A stability by RNF41 through an intrinsically disordered protein region (IDPR). It explores the mechanisms by which protein disorder contributes to substrate selection and degradation processes in the ubiquitin-proteasome system. The study involves biophysical and structural analyses supported by various University of Michigan resources.


In [11]:
# List of questions based on the PDF content for testing
questions = [
 "What role does CLEC16A play in mitochondrial quality control, and why is this important for cellular health?",
 "How does the intrinsically disordered protein region (IDPR) within CLEC16A impact its stability and interaction with RNF41?",
 "What is the significance of the CLEC16A-RNF41 complex in the regulation of mitophagy?",
 "How does RNF41 influence the turnover of CLEC16A, and what are the molecular mechanisms involved?",
 "Which diseases are associated with dysregulation of CLEC16A, and what implications does this have for potential treatments?",
 "What techniques were used in this study to confirm the presence and function of the IDPR in CLEC16A?",
 "How does the disruption of CLEC16A’s IDPR affect its ubiquitination and degradation?",
 "Why might the IDPR in CLEC16A be considered a therapeutic target for diseases related to mitochondrial dysfunction?",
 "How do mutations within CLEC16A’s IDPR affect the protein's ability to form complexes with RNF41 and USP8?",
 "What did biophysical analyses reveal about the structural properties of CLEC16A’s IDPR, and how do these properties contribute to its function?"
]

# Loop through each question, invoke the RAG chain, and print each answer
for question in questions:
 result = rag_chain.invoke({"input": question})
 print(f"Question: {question}")
 print(f"Answer: {result["answer"]}\n")


Question: What role does CLEC16A play in mitochondrial quality control, and why is this important for cellular health?
Answer: CLEC16A is an E3 ubiquitin ligase that regulates mitochondrial quality control by facilitating mitophagy, a process that eliminates damaged mitochondria. It forms a complex with RNF41 and USP8 to control the activity of the mitophagy regulator PRKN/Parkin. This function is crucial for cellular health as it maintains mitochondrial integrity, which is vital for energy production and preventing cellular damage, especially in cell types like pancreatic β-cells, sensory neurons, and immune cells.

Question: How does the intrinsically disordered protein region (IDPR) within CLEC16A impact its stability and interaction with RNF41?
Answer: The intrinsically disordered protein region (IDPR) within CLEC16A is crucial for its stability and interaction with RNF41, as it regulates CLEC16A turnover and is the site where RNF41 acts to destabilize CLEC16A. The IDPR is essentia