|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel |
|
import fitz |
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
doc = fitz.open(pdf_path) |
|
text = "" |
|
for page in doc: |
|
text += page.get_text() |
|
return text |
|
|
|
|
|
def analyze_document(file, prompt): |
|
|
|
if file.name.endswith(".pdf"): |
|
text = extract_text_from_pdf(file.name) |
|
elif file.name.endswith(".txt"): |
|
text = file.read().decode("utf-8") |
|
else: |
|
return "Unsupported file format. Please upload a PDF or TXT file." |
|
|
|
|
|
|
|
model_name = "THUDM/glm-4-9b" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
|
|
|
input_text = f"Document content:\n{text}\n\nPrompt:\n{prompt}" |
|
inputs = tokenizer(input_text, return_tensors="pt") |
|
outputs = model.generate(**inputs) |
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
return response |
|
|
|
|
|
file_input = gr.File(label="Upload TXT or PDF Document", file_count="single") |
|
prompt_input = gr.Textbox(label="Prompt", placeholder="Enter your structured prompt here") |
|
output_text = gr.Textbox(label="Analysis Result") |
|
|
|
iface = gr.Interface( |
|
fn=analyze_document, |
|
inputs=[file_input, prompt_input], |
|
outputs=output_text, |
|
title="Document Analysis with GPT Model", |
|
description="Upload a TXT or PDF document and enter a prompt to get an analysis." |
|
) |
|
|
|
|
|
iface.launch() |