thepowerfuldeez commited on
Commit
72a3819
·
verified ·
1 Parent(s): 4d1d8eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -4,14 +4,22 @@ from arxiv2text import arxiv_to_text
4
  import torch
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
 
7
- def get_model(model_url="thepowerfuldeez/Qwen2-1.5B-Summarize"):
8
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
9
- model = AutoModelForCausalLM.from_pretrained(
10
- model_url,
11
- bnb_4bit_compute_dtype=torch.bfloat16,
12
- load_in_4bit=True,
13
- attn_implementation="flash_attention_2",
14
- )
 
 
 
 
 
 
 
 
15
  return model, tokenizer
16
 
17
 
@@ -27,7 +35,7 @@ def call_llm(model, tokenizer, text):
27
  output = tokenizer.decode(new_tokens, skip_special_tokens=True)
28
  return output
29
 
30
- model, tokenizer = get_model()
31
 
32
  def summarize_pdf(pdf_url):
33
  extracted_text = arxiv_to_text(pdf_url)
 
4
  import torch
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
 
7
+ def get_model(model_url="thepowerfuldeez/Qwen2-1.5B-Summarize", use_cpu=False):
8
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
9
+ if use_cpu:
10
+ model = OVModelForCausalLM.from_pretrained(
11
+ model_url,
12
+ device_map="cpu",
13
+ load_in_4bit=True,
14
+ attn_implementation="flash_attention_2",
15
+ )
16
+ else:
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ model_url,
19
+ bnb_4bit_compute_dtype=torch.bfloat16,
20
+ load_in_4bit=True,
21
+ attn_implementation="flash_attention_2",
22
+ )
23
  return model, tokenizer
24
 
25
 
 
35
  output = tokenizer.decode(new_tokens, skip_special_tokens=True)
36
  return output
37
 
38
+ model, tokenizer = get_model(use_cpu=True)
39
 
40
  def summarize_pdf(pdf_url):
41
  extracted_text = arxiv_to_text(pdf_url)