xavierbarbier commited on
Commit
c34df49
·
verified ·
1 Parent(s): 1961a98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -19
app.py CHANGED
@@ -1,37 +1,85 @@
1
  import gradio as gr
 
 
 
 
 
 
 
2
  from gradio_pdf import PDF
3
  from pdf2image import convert_from_path
4
  from transformers import pipeline
5
  from pathlib import Path
6
- from pypdf import PdfReader
7
- import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- dir_ = Path(__file__).parent
 
10
 
11
- p = pipeline(
12
- "document-question-answering",
13
- model="impira/layoutlm-document-qa",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
 
16
- def qa(question: str, doc: str) -> str:
17
- reader = PdfReader(doc)
 
 
 
 
 
 
 
 
 
 
18
 
19
  text = []
20
  for p in np.arange(0, len(reader.pages), 1):
21
- page = reader.pages[int(p)]
22
-
23
- # extracting text from page
24
- text.append(page.extract_text())
 
25
  text = ' '.join(text)
26
- #output = p(img, question)
27
  return text
 
28
 
29
 
30
- demo = gr.Interface(
31
- qa,
32
- [gr.Textbox(label="Question"), PDF(label="Document")],
33
- gr.Textbox(),
 
 
34
  )
35
-
36
  if __name__ == "__main__":
37
- demo.launch()
 
1
  import gradio as gr
2
+ from gpt4all import GPT4All
3
+ from huggingface_hub import hf_hub_download
4
+ import faiss
5
+ #from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ import numpy as np
8
+ from pypdf import PdfReader
9
  from gradio_pdf import PDF
10
  from pdf2image import convert_from_path
11
  from transformers import pipeline
12
  from pathlib import Path
13
+ from langchain_chroma import Chroma
14
+
15
+ title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"
16
+
17
+ description = """
18
+ 🔎 [Mistral AI's Mistral 7B Instruct v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) [GGUF format model](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) , 4-bit quantization balanced quality gguf version, running on CPU. English Only (Also support other languages but the quality's not good). Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all).
19
+ 🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue.
20
+ Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing).
21
+ """
22
+
23
+ """
24
+ [Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
25
+ [Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
26
+ """
27
 
28
+ model_path = "models"
29
+ model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
30
 
31
+ hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
32
+
33
+ print("Start the model init process")
34
+ model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
35
+
36
+
37
+ model.config["promptTemplate"] = "[INST] {0} [/INST]"
38
+ model.config["systemPrompt"] = "Tu es un assitant et tu dois répondre en français"
39
+ model._is_chat_session_activated = False
40
+
41
+ max_new_tokens = 2048
42
+
43
+ model_kwargs = {'device': 'cpu'}
44
+ encode_kwargs = {'normalize_embeddings': False}
45
+ embeddings = HuggingFaceEmbeddings(
46
+
47
+ model_kwargs=model_kwargs,
48
+ encode_kwargs=encode_kwargs
49
  )
50
 
51
+ chunk_size = 2048
52
+
53
+ # creating a pdf reader object
54
+
55
+
56
+
57
+ print("Finish the model init process")
58
+
59
+ def extract_text(file):
60
+
61
+
62
+ reader = PdfReader(file)
63
 
64
  text = []
65
  for p in np.arange(0, len(reader.pages), 1):
66
+ page = reader.pages[int(p)]
67
+
68
+ # extracting text from page
69
+ text.append(page.extract_text())
70
+
71
  text = ' '.join(text)
72
+
73
  return text
74
+
75
 
76
 
77
+ with gr.Blocks() as demo:
78
+ file_input = gr.File(label="Upload a PDF file")
79
+ text_output = gr.Textbox(label="Extracted Text")
80
+
81
+ file_input.upload(extract_text, inputs=file_input, outputs=text_output)
82
+
83
  )
 
84
  if __name__ == "__main__":
85
+ demo.queue(max_size=3).launch()