chatpdf / app.py
hadxu's picture
add claude model
cb594b3
raw
history blame
3.4 kB
import urllib.request
import fitz
import re
import numpy as np
import tensorflow_hub as hub
from openai import OpenAI
import gradio as gr
import os
import shutil
from pathlib import Path
from tempfile import NamedTemporaryFile
from sklearn.neighbors import NearestNeighbors
import anthropic
# client = OpenAI(
# base_url='https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1/v1/',
# api_key=os.getenv('openai_key')
# )
client = anthropic.Anthropic()
from util import pdf_to_text, text_to_chunks, SemanticSearch
recommender = SemanticSearch()
def load_recommender(path, start_page=1):
global recommender
texts = pdf_to_text(path, start_page=start_page)
chunks = text_to_chunks(texts, start_page=start_page)
recommender.fit(chunks)
return 'Corpus Loaded.'
# def openai_generate_text(prompt, model = "gpt-3.5-turbo-16k-0613"):
# model="mistralai/Mixtral-8x7B-Instruct-v0.1"
# max_tokens=1024
# message = clinet.chat.completions.create(
# model=model,
# messages=[
# {"role": "user", "content": prompt}
# ],
# max_tokens=max_tokens,
# ).choices[0].message.content
# return message
def claude_generate_text(prompt, model = "claude-3-haiku-20240307"):
message = client.messages.create(
model=model,
max_tokens=1000,
temperature=0.0,
# system="Respond only in mandarin",
messages=[
{"role": "user", "content": prompt}
]
)
return message.content[0].text
def generate_answer(question):
topn_chunks = recommender(question)
prompt = 'search results:\n\n'
for c in topn_chunks:
prompt += c + '\n\n'
prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\
"Cite each reference using [ Page Number] notation. "\
"Only answer what is asked. The answer should be short and concise. "\
"If asked in Chinese, respond in Chinese; if asked in English, respond"\
"in English \n\nQuery: "
prompt += f"{question}\nAnswer:"
answer = claude_generate_text(prompt)
return answer
def question_answer(chat_history, file, question):
suffix = Path(file.name).suffix
with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
shutil.copyfile(file.name, tmp.name)
tmp_path = Path(tmp.name)
load_recommender(str(tmp_path))
answer = generate_answer(question)
chat_history.append([question, answer])
return chat_history
title = 'PDF GPT '
description = """ PDF GPT """
with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:
gr.Markdown(f'<center><h3>{title}</h3></center>')
gr.Markdown(description)
with gr.Row():
with gr.Group():
with gr.Accordion("URL or pdf file"):
file = gr.File(label='Upload your PDF/ Research Paper / Book here', file_types=['.pdf'])
question = gr.Textbox(label='Enter your question here')
btn = gr.Button(value='Submit')
with gr.Group():
chatbot = gr.Chatbot(label="Chat History", elem_id="chatbot")
btn.click(
question_answer,
inputs=[chatbot, file, question],
outputs=[chatbot],
api_name="predict",
)
demo.launch(server_name="0.0.0.0")