Spaces:

Condense-AI
/

Fast-PDF-Chat

Sleeping

App Files Files Community

Fast-PDF-Chat / app.py

toilaluan

update

37fc80f 3 months ago

raw

history blame contribute delete

2.13 kB

	import gradio as gr
	from gradio_pdf import PDF
	from transformers import AutoModelForCausalLM, AutoTokenizer, DynamicCache
	from pathlib import Path
	from markitdown import MarkItDown
	from utils import generate_answer, get_condense_kv_cache
	import spaces
	import torch


	MID = MarkItDown()
	MODEL_ID = "unsloth/Mistral-7B-Instruct-v0.2"
	MODEL = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
	TOKENIZER = AutoTokenizer.from_pretrained(MODEL_ID)
	MAX_CHARS_TO_COMPRESS = 15000

	@torch.no_grad()

	def get_model_kv_cache(context_ids):
	context_ids = context_ids.to("cuda")
	past_key_values = MODEL(context_ids, num_logits_to_keep=1).past_key_values
	kv_cache = DynamicCache.from_legacy_cache(
	past_key_values
	)
	return kv_cache

	@spaces.GPU
	def inference(question: str, doc_path: str, use_turbo=True) -> str:
	MODEL.to("cuda")
	question = "\n\nBased on above informations, answer this question: " + question
	doc_md = MID.convert(doc_path)
	doc_text = doc_md.text_content[:20000]
	to_compress_doc = "<s> [INST] " + doc_text[:MAX_CHARS_TO_COMPRESS]
	remaining_doc_and_question_prompt = doc_text[MAX_CHARS_TO_COMPRESS:] + question + " [/INST] "
	prompt_ids = TOKENIZER.encode(remaining_doc_and_question_prompt, add_special_tokens=False, return_tensors="pt")
	context_ids = TOKENIZER.encode(to_compress_doc, add_special_tokens=False, return_tensors="pt")
	context_length = context_ids.shape[1]
	if use_turbo:
	print("turbo-mode-on")
	kv_cache = get_condense_kv_cache(to_compress_doc)
	kv_cache = kv_cache.to("cuda")
	else:
	print("turbo-mode-off")
	kv_cache = get_model_kv_cache(context_ids)

	print("kv-length", kv_cache.get_seq_length())

	answer = generate_answer(MODEL, TOKENIZER, prompt_ids, kv_cache, context_length, 128)
	print(answer)
	return answer




	demo = gr.Interface(
	inference,
	[gr.Textbox(label="Question"), PDF(label="Document"), gr.Checkbox(label="Turbo Bittensor", info="Use Subnet 47 API for Prefilling")],
	gr.Textbox(),
	)

	if __name__ == "__main__":
	demo.launch(share=True)