ramalMr commited on
Commit
a26857e
·
verified ·
1 Parent(s): beb08e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -1,6 +1,6 @@
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
- import re
4
 
5
  client = InferenceClient(
6
  "mistralai/Mixtral-8x7B-Instruct-v0.1"
@@ -14,7 +14,9 @@ def format_prompt(message, history):
14
  prompt += f"[INST] {message} [/INST]"
15
  return prompt
16
 
17
- def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, file=None):
 
 
18
  temperature = float(temperature)
19
  if temperature < 1e-2:
20
  temperature = 1e-2
@@ -30,18 +32,24 @@ def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=256
30
  )
31
 
32
  if file:
33
- sentences = process_file(file)
34
- prompt = "\n".join(sentences)
35
 
36
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
37
- response = client.text_generation(formatted_prompt, **generate_kwargs, details=True, return_full_text=True)
38
- return response.text
39
 
40
- def process_file(file):
41
- text = file.decode("utf-8")
42
- sentences = re.split(r'[.!?]+', text)
43
- sentences = [s.strip() for s in sentences if s.strip()]
44
- return sentences
 
 
 
 
 
 
45
 
46
  additional_inputs=[
47
  gr.Textbox(
@@ -85,7 +93,7 @@ additional_inputs=[
85
  interactive=True,
86
  info="Penalize repeated tokens",
87
  ),
88
- gr.File(label="Upload File", file_count="single"),
89
  ]
90
 
91
  gr.ChatInterface(
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
+ import PyPDF2
4
 
5
  client = InferenceClient(
6
  "mistralai/Mixtral-8x7B-Instruct-v0.1"
 
14
  prompt += f"[INST] {message} [/INST]"
15
  return prompt
16
 
17
+ def generate(
18
+ prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, file=None
19
+ ):
20
  temperature = float(temperature)
21
  if temperature < 1e-2:
22
  temperature = 1e-2
 
32
  )
33
 
34
  if file:
35
+ text = extract_text_from_pdf(file)
36
+ prompt = text
37
 
38
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
39
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
40
+ output = ""
41
 
42
+ for response in stream:
43
+ output += response.token.text
44
+ yield output
45
+ return output
46
+
47
+ def extract_text_from_pdf(file):
48
+ pdf_reader = PyPDF2.PdfReader(file)
49
+ text = ""
50
+ for page in range(len(pdf_reader.pages)):
51
+ text += pdf_reader.pages[page].extract_text()
52
+ return text
53
 
54
  additional_inputs=[
55
  gr.Textbox(
 
93
  interactive=True,
94
  info="Penalize repeated tokens",
95
  ),
96
+ gr.File(label="Upload PDF File", file_count="single", file_types=[".pdf"]),
97
  ]
98
 
99
  gr.ChatInterface(