File size: 1,558 Bytes
704093d
8d1154c
 
704093d
 
 
79776cb
 
134c076
 
 
 
 
 
 
 
 
 
 
289cb95
134c076
 
 
704093d
 
 
c3e00da
704093d
 
 
 
 
 
 
 
 
007defd
704093d
007defd
79776cb
 
 
289cb95
95d9791
 
 
 
7189e66
704093d
 
 
 
79776cb
704093d
79776cb
704093d
79776cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download
import numpy as np
from pypdf import PdfReader
import os
from transformers import pipeline


model_path = "models"
model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"

hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)

print("Start the model init process")
model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")


model.config["promptTemplate"] = "[INST] {0} [/INST]"
model.config["systemPrompt"] = "Tu es un assitant et ta tâche est de résumer des texte en français"
model._is_chat_session_activated = False

max_new_tokens = 2048

def extract_text(file):

    
    reader = PdfReader(file)

    text = []
    for p in np.arange(0, len(reader.pages), 1):
        page = reader.pages[int(p)]

        # extracting text from page
        text.append(page.extract_text())

    #text = ' '.join(text)

    return text[0]
    
def summarise(text):
    
    prompt = text
            
    
    outputs = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)

    return outputs

with gr.Blocks() as demo:
    file_input = gr.File(label="Upload a PDF file")
    text_output = gr.Textbox(label="Extracted Text")
    summary_output = gr.Textbox(label="Summary")
    file_input.upload(extract_text, inputs=file_input, outputs=text_output)
    text_output.change(summarise,text_output,summary_output)

demo.launch()