In [1]:
from langchain.document_loaders import TextLoader
import pinecone
from langchain.vectorstores import Pinecone
import os
from transformers import AutoTokenizer, AutoModel
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.chat_models import ChatOpenAI
import torch
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (AgentTokenBufferMemory)
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.schema.messages import SystemMessage
from langchain.prompts import MessagesPlaceholder
import gradio as gr
import time

# loader = TextLoader("../../../../../docs/docs/modules/state_of_the_union.txt")

  from tqdm.autonotebook import tqdm


In [23]:
import gradio as gr
from conversation import make_conversation, auth_function
import random



In [24]:
demo = gr.ChatInterface(make_conversation).queue()

In [25]:
demo.launch(share=True, auth=auth_function)

Running on local URL:  http://127.0.0.1:7863


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Running on public URL: https://36c80e8ac1f1026dec.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




## Retrieval

In [2]:
pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),  # find at app.pinecone.io
    environment=os.getenv("PINECONE_ENV"),  # next to api key in console
)

index_name = "ophtal-knowledge-base"

In [3]:
model = AutoModel.from_pretrained("models/models--BAAI--bge-base-en-v1.5/snapshots/617ca489d9e86b49b8167676d8220688b99db36e")
tokenizer = AutoTokenizer.from_pretrained("models/models--BAAI--bge-base-en-v1.5/snapshots/617ca489d9e86b49b8167676d8220688b99db36e")

In [4]:
prompt = open("prompts/version_2.txt", "r").read()

In [5]:
def get_bert_embeddings(sentence):
    embeddings = []
    input_ids = tokenizer.encode(sentence, return_tensors="pt")
    with torch.no_grad():
        output = model(input_ids)
    embedding = output.last_hidden_state[:,0,:].numpy().tolist()
    return embedding

In [6]:
index = pinecone.Index(index_name)
vectorstore = Pinecone(index, get_bert_embeddings, "text")



In [7]:
retriever = vectorstore.as_retriever()

In [8]:
tool = create_retriever_tool(
    retriever,
    "search_ophtal-knowledge-base",
    "Searches and returns documents regarding the ophtal-knowledge-base.",
)
tools = [tool]

In [9]:
llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), openai_organization=os.getenv("ORGANIZATION_KEY"), model="gpt-4", temperature=0.2)

In [10]:
system_message = SystemMessage(
    content=prompt,
)

memory_key='history'

In [11]:
prompt = OpenAIFunctionsAgent.create_prompt(
    system_message=system_message,
    extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
)

In [12]:
agent_executor = create_conversational_retrieval_agent(llm, tools, verbose=True, prompt=prompt)

In [14]:
result = agent_executor({"input": "Transport of ascorbic acid to lens is done by:\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase\n\n"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_ophtal-knowledge-base` with `Transport of ascorbic acid to lens`


[0m[36;1m[1;3m[Document(page_content='The essential process is probably anaerobic and in the   \nlens there are a number of enzymes which break down \npyruvate to lactic acid and water. Lactic acid is found in \nconsiderable quantity in the aqueous humour when the lens \nis present; this is not so in the aphakic eye. Agents which \nappear to participate in this process are glutathione and \nascorbic acid (vitamin C) which, reacting together, proba-\nbly participate in an internal auto-oxidative system. The \nformer, both as reduced and oxidized glutathione, occurs \nin very high concentration in the lens, particularly in the \ncortex; the latter is specially secreted by the ciliary body. \nNeither is present in a cataract. The metabolic activity of \nthe lens is largely confined to the cortex; the older nucleus \nis relatively inert.\nThe

In [22]:
result["intermediate_steps"][0][1][0].page_content

'The essential process is probably anaerobic and in the   \nlens there are a number of enzymes which break down \npyruvate to lactic acid and water. Lactic acid is found in \nconsiderable quantity in the aqueous humour when the lens \nis present; this is not so in the aphakic eye. Agents which \nappear to participate in this process are glutathione and \nascorbic acid (vitamin C) which, reacting together, proba-\nbly participate in an internal auto-oxidative system. The \nformer, both as reduced and oxidized glutathione, occurs \nin very high concentration in the lens, particularly in the \ncortex; the latter is specially secreted by the ciliary body. \nNeither is present in a cataract. The metabolic activity of \nthe lens is largely confined to the cortex; the older nucleus \nis relatively inert.\nThe metabolism of the lens is fairly complex but   \ncan be simplified for an overview ( Fig. 2.3 ). The energy \nrequirements are met by various pathways of glucose   \nmetabolism, namely t

In [18]:
def run(input_):
    output = agent_executor({"input": input_})
    output_text = output["output"]
    if len(output["intermediate_steps"])>0:
        documents = output["intermediate_steps"][0][1]
        sources = []
        for doc in documents:
            print(doc.metadata)
            sources.append(doc.metadata["source"])
        
        source_text = ""
        for i in range(len(sources)):
            source_text += f"{i+1}. {sources[i]}\n"

        return f"{output_text} \n\nSources: \n{source_text}"
    return output_text

In [19]:
def slow_echo(message, history):
    text_ = run(message)
    for i in range(len(text_)):
        time.sleep(0.001)
        yield text_[: i+1]

In [20]:
def auth_function(username, password):
    return username == password

In [21]:
ques = run("Transport of ascorbic acid to lens is done by:\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase\n\nReturn options")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe transport of ascorbic acid to the lens is done by:

A. Myoinositol
B. Choline
C. Taurine
D. Na/K ATPase[0m

[1m> Finished chain.[0m


In [17]:
ques

'The transport of ascorbic acid to the lens is done by:\n\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase'

In [35]:
demo = gr.ChatInterface(slow_echo).queue()

In [36]:
demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://c73fe7008b858e476a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [16]:
# gr.ChatInterface(
#     run,
#     ).launch(share=True)

In [11]:
result = agent_executor({"input": "whats my name?"})




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYour name is Bob.[0m

[1m> Finished chain.[0m


In [14]:
print("Transport of ascorbic acid to lens is done by:\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase\n\nReturn options")

Transport of ascorbic acid to lens is done by:
A. Myoinositol
B. Choline
C. Taurine
D. Na/K ATPase

Return options




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_ophtal-knowledge-base` with `Transport of ascorbic acid to lens`


[0m[36;1m[1;3m[Document(page_content='The essential process is probably anaerobic and in the   \nlens there are a number of enzymes which break down \npyruvate to lactic acid and water. Lactic acid is found in \nconsiderable quantity in the aqueous humour when the lens \nis present; this is not so in the aphakic eye. Agents which \nappear to participate in this process are glutathione and \nascorbic acid (vitamin C) which, reacting together, proba-\nbly participate in an internal auto-oxidative system. The \nformer, both as reduced and oxidized glutathione, occurs \nin very high concentration in the lens, particularly in the \ncortex; the latter is specially secreted by the ciliary body. \nNeither is present in a cataract. The metabolic activity of \nthe lens is largely confined to the cortex; the older nucleus \nis relatively inert.\nThe

In [15]:
result = agent_executor(
    {
        "input": "Transport of ascorbic acid to lens is done by:\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase\n\nReturn options, and explanation in json format. Output should look like: {\"options\": \"a\", \"explanation\": \"This is the explanation\"}"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_ophtal-knowledge-base` with `Transport of ascorbic acid to lens`


[0m[36;1m[1;3m[Document(page_content='The essential process is probably anaerobic and in the   \nlens there are a number of enzymes which break down \npyruvate to lactic acid and water. Lactic acid is found in \nconsiderable quantity in the aqueous humour when the lens \nis present; this is not so in the aphakic eye. Agents which \nappear to participate in this process are glutathione and \nascorbic acid (vitamin C) which, reacting together, proba-\nbly participate in an internal auto-oxidative system. The \nformer, both as reduced and oxidized glutathione, occurs \nin very high concentration in the lens, particularly in the \ncortex; the latter is specially secreted by the ciliary body. \nNeither is present in a cataract. The metabolic activity of \nthe lens is largely confined to the cortex; the older nucleus \nis relatively inert.\nThe

In [28]:
result["intermediate_steps"][0][1][0].metadata['source']

'data/book/text/Parsons_ Diseases of the Eye 22nd edition.pdf.txt'

In [19]:
result

{'input': 'Transport of ascorbic acid to lens is done by:\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase\n\nReturn options, and explanation in json format. Output should look like: {"options": "a", "explanation": "This is the explanation"}',
 'chat_history': [HumanMessage(content='hi, im bob'),
  AIMessage(content='Hello Bob! How can I assist you today?'),
  HumanMessage(content='Transport of ascorbic acid to lens is done by:\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase\n\nReturn options, and explanation in json format. Output should look like: {"options": "a", "explanation": "This is the explanation"}'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'search_ophtal-knowledge-base', 'arguments': '{\n  "__arg1": "Transport of ascorbic acid to lens"\n}'}}),
  FunctionMessage(content="[Document(page_content='The essential process is probably anaerobic and in the   \\nlens there are a number of enzymes which break down \\npyruvate to lactic acid an

In [37]:
result = agent_executor(
    {
        "input": "Transport of ascorbic acid to lens is done by:\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase\n\n"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe transport of ascorbic acid to the lens is done by Na/K ATPase. This enzyme helps to maintain the balance of sodium and potassium ions in the cells, which is necessary for the absorption of ascorbic acid.[0m

[1m> Finished chain.[0m


In [38]:
result

{'input': 'Transport of ascorbic acid to lens is done by:\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase\n\n',
 'chat_history': [HumanMessage(content='Transport of ascorbic acid to lens is done by:\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase\n\n'),
  AIMessage(content='The transport of ascorbic acid to the lens is done by Na/K ATPase. This enzyme helps to maintain the balance of sodium and potassium ions in the cells, which is necessary for the absorption of ascorbic acid.')],
 'output': 'The transport of ascorbic acid to the lens is done by Na/K ATPase. This enzyme helps to maintain the balance of sodium and potassium ions in the cells, which is necessary for the absorption of ascorbic acid.',
 'intermediate_steps': []}



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_ophtal-knowledge-base` with `Transport of ascorbic acid to lens`


[0m[36;1m[1;3m[Document(page_content='The essential process is probably anaerobic and in the   \nlens there are a number of enzymes which break down \npyruvate to lactic acid and water. Lactic acid is found in \nconsiderable quantity in the aqueous humour when the lens \nis present; this is not so in the aphakic eye. Agents which \nappear to participate in this process are glutathione and \nascorbic acid (vitamin C) which, reacting together, proba-\nbly participate in an internal auto-oxidative system. The \nformer, both as reduced and oxidized glutathione, occurs \nin very high concentration in the lens, particularly in the \ncortex; the latter is specially secreted by the ciliary body. \nNeither is present in a cataract. The metabolic activity of \nthe lens is largely confined to the cortex; the older nucleus \nis relatively inert.\nThe

In [13]:
result = agent_executor(
    {
        "input": "I belive the answer is A. Myoinositol"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_ophtal-knowledge-base` with `Myoinositol transport of ascorbic acid to lens`


[0m[36;1m[1;3m[Document(page_content='. Anterior and Posterior. Its levels are reduced in patients of cataract. Most common cause of acquired cataract is senile cataract. \uf077 Myoinositol is actively transported into the lens by sodium dependent carrier mediated CONGENITAL AND DEVELOPMENTAL mechanism. Its levels are markedly CATARACT reduced in cataract. It is a precursor of membrane phosphoinositides which are Etiology involved in Na/Ka ATPase function. It also participates in ascorbic acid transport in a. Heredity: Usually dominant. the lens. b. Maternal factors: Lens 1. Malnutrition. Water 99% 66% 2. TORCHS infections, i.e., Toxoplas- Na+ 144 20 mosis, rubella, cytomegalovirus, Herpes and syphilis. Na + K + AT Pase K+ 4.5 125 3. Drug: Thalidomide, Corticosteroid. Glucose 6 1 4. Radiation. Lactic acid 7.4 14 c. Foetal or I

In [16]:
result = agent_executor(
    {
        "input": "Question: Lens develops from?\nA. Endoderm\nB. Mesoderm\nC. Surface ectoderm\nD. Neuroectoderm" 
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_ophtal-knowledge-base` with `Lens develops from`


[0m[36;1m[1;3m[Document(page_content='The mechanisms responsible for the precise formation and changing organ ization of the \nsuture pattern remain obscure.\nThe  human lens weighs approximately 90\xa0 mg at birth, and it increases in mass \nby approximately 2\xa0 mg per year throughout life as new fibers form. With increas -\ning age, the central, or oldest, lens fibers gradually become less malleable, and the \nlens nucleus becomes more rigid. This pro cess progressively reduces the amplitude o f \naccommodation.CHAPTER 4: Embryology and Developmental\xa0Defects ● 33 \nTunica Vasculosa Lentis\nAround 1 month of gestation, the hyaloid artery, which enters the eye at the optic nerve head \n(also called the optic disc), branches to form a network of capillaries, the tunica vasculosa \nlentis, on the posterior surface of the lens capsule ( Fig\xa04-3 ).  

In [8]:
memory_key = "history"

In [10]:
llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), openai_organization=os.getenv("ORGANIZATION_KEY"), model="gpt-4", temperature=0.2)

In [11]:
memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm)

In [12]:
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.schema.messages import SystemMessage
from langchain.prompts import MessagesPlaceholder

In [13]:
prompt = open("prompts/version_2.txt", "r").read()

In [14]:
system_message = SystemMessage(
    content=prompt,
)

In [15]:
system_message = SystemMessage(
    content="You are an assistant to Ophthamologists and that's your only job. You will be interacting with doctors. Keep your language medical and professional at all times. You can help doctors in answering their questions and also lookup the knowledge base to find something.",
)

In [16]:
prompt = OpenAIFunctionsAgent.create_prompt(
    system_message=system_message,
    extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
)

In [33]:
agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)

In [19]:
agent_executor = create_conversational_retrieval_agent(llm, tools, verbose=True)

In [21]:
result = agent_executor({"input": "can you answer questions about ophtalmology?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYes, I can answer questions about ophthalmology to the best of my knowledge and abilities. However, please note that while I strive to provide accurate and up-to-date information, my responses should not be used as a replacement for professional medical advice.[0m

[1m> Finished chain.[0m


In [22]:
result = agent_executor(
    {
        "input": "Transport of ascorbic acid to lens is done by:\nA. Myoinositol\nB. Choline\nC. Taurine\nD. Na/K ATPase\n\n"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_ophtal-knowledge-base` with `Transport of ascorbic acid to lens`


[0m[36;1m[1;3m[Document(page_content='The essential process is probably anaerobic and in the   \nlens there are a number of enzymes which break down \npyruvate to lactic acid and water. Lactic acid is found in \nconsiderable quantity in the aqueous humour when the lens \nis present; this is not so in the aphakic eye. Agents which \nappear to participate in this process are glutathione and \nascorbic acid (vitamin C) which, reacting together, proba-\nbly participate in an internal auto-oxidative system. The \nformer, both as reduced and oxidized glutathione, occurs \nin very high concentration in the lens, particularly in the \ncortex; the latter is specially secreted by the ciliary body. \nNeither is present in a cataract. The metabolic activity of \nthe lens is largely confined to the cortex; the older nucleus \nis relatively inert.\nThe

In [23]:
result['output']

'The transport of ascorbic acid (vitamin C) to the lens is not directly facilitated by any of the options provided (Myoinositol, Choline, Taurine, Na/K ATPase). Ascorbic acid is present in high concentrations in the aqueous humor, which nourishes the lens. This high concentration of ascorbic acid in the aqueous humor helps protect intraocular structures by blocking UV light. However, the specific transport mechanism of ascorbic acid to the lens is not mentioned in the options provided.'

In [35]:
result = agent_executor(
    {
        "input": "what all can you do?"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAs an AI, I can assist with a variety of tasks, including but not limited to:

1. Answering questions: I can provide information on a wide range of topics. If I don't know the answer, I can look it up for you.

2. Setting reminders and alarms: I can help you remember important dates and tasks.

3. Sending messages: I can send emails or text messages on your behalf.

4. Making calls: I can dial numbers and make calls for you.

5. Providing news and weather updates: I can give you the latest news headlines and weather forecasts.

6. Managing your schedule: I can add events to your calendar and notify you of upcoming appointments.

7. Providing entertainment: I can tell jokes, play games, and more.

8. Conducting online searches: I can find and present information from the web.

9. Providing recommendations: I can suggest movies, books, restaurants, and more based on your preferences.

10. Learning and adapting: I can learn from

## ReAct

In [35]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI
import os
from transformers import AutoTokenizer, AutoModel
import torch
from langchain.vectorstores import Pinecone
import pinecone
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.chat_models import ChatOpenAI
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.schema.messages import SystemMessage
from langchain.prompts import MessagesPlaceholder

In [27]:
llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), openai_organization=os.getenv("ORGANIZATION_KEY"), model="gpt-4", temperature=0.2)

In [28]:
pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),  # find at app.pinecone.io
    environment=os.getenv("PINECONE_ENV"),  # next to api key in console
)

index_name = "ophtal-knowledge-base"

In [29]:
model = AutoModel.from_pretrained("models/models--BAAI--bge-base-en-v1.5/snapshots/617ca489d9e86b49b8167676d8220688b99db36e")
tokenizer = AutoTokenizer.from_pretrained("models/models--BAAI--bge-base-en-v1.5/snapshots/617ca489d9e86b49b8167676d8220688b99db36e")

In [30]:
def get_bert_embeddings(sentence):
    embeddings = []
    input_ids = tokenizer.encode(sentence, return_tensors="pt")
    with torch.no_grad():
        output = model(input_ids)
    embedding = output.last_hidden_state[:,0,:].numpy().tolist()
    return embedding

In [31]:
index = pinecone.Index(index_name)

In [32]:
vectorstore = Pinecone(index, get_bert_embeddings, "text")
retriever = vectorstore.as_retriever()
tool = create_retriever_tool(
    retriever,
    "search_ophtal-knowledge-base",
    "Searches and returns documents regarding the ophtal-knowledge-base.",
)
tools = [tool]



In [36]:
system_message = SystemMessage(
    content="You are an assistant to Ophthamologists and that's your only job. You will be interacting with doctors. Keep your language medical and professional at all times. You can help doctors in answering their questions and also lookup the knowledge base to find something.",
)

In [38]:
memory_key = "history"

In [41]:
prompt = OpenAIFunctionsAgent.create_prompt(
    system_message=system_message,
    extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
)

In [42]:
agent = AgentType.ZERO_SHOT_REACT_DESCRIPTION(llm=llm, tools=tools, prompt=prompt)

TypeError: 'AgentType' object is not callable

In [33]:
agent_executor = initialize_agent(
    tools, llm, agent=, verbose=True
)

In [34]:
agent_executor.invoke(
    {
        "input": "Question: Lens develops from?\nA. Endoderm\nB. Mesoderm\nC. Surface ectoderm\nD. Neuroectoderm" 
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find out which layer of embryonic tissue the lens of the eye develops from.
Action: search_ophtal-knowledge-base
Action Input: Lens development embryonic tissue[0m
Observation: [36;1m[1;3m[Document(page_content='115\nFigure 2-29 Organization of the lens. At areas where lens cells converge and meet, sutures are\nformed. A, Cutaway view of the adult lens showing an embryonic lens inside. The embryonal\nnucleus has a Y-shaped suture at both the anterior and posterior poles; in the adult lens cortex, the\norganization of the sutures is more complex. At the equator, the lens epithelium can divide, and the\ncells become highly elongated and ribbonlike, sending processes anteriorly and posteriorly. As new\nlens cells are formed, older cells come to lie in the deeper parts of the cortex. B, Cross section and\ncorresponding surface view showing the difference in lens fibers at the anterior (A), intermediate\n(B), and equa

{'input': 'Question: Lens develops from?\nA. Endoderm\nB. Mesoderm\nC. Surface ectoderm\nD. Neuroectoderm',
 'output': 'C. Surface ectoderm'}

In [43]:
agent_executor.invoke(
    {
        "input": "What question did I ask before?" 
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis question is asking about a previous question, but there is no context provided. I can't answer this question accurately without knowing the previous question.
Final Answer: I'm sorry, but I can't provide the information you're looking for.[0m

[1m> Finished chain.[0m


{'input': 'What question did I ask before?',
 'output': "I'm sorry, but I can't provide the information you're looking for."}