andgrt commited on
Commit
0dd3755
·
1 Parent(s): 15151eb
Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -5,6 +5,7 @@ from transformers import (
5
  AutoProcessor,
6
  AutoModelForDocumentQuestionAnswering,
7
  )
 
8
  import torch
9
 
10
  tokenizer_ru2en = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ru-en")
@@ -21,6 +22,14 @@ git_model_base = AutoModelForDocumentQuestionAnswering.from_pretrained(
21
 
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
  git_model_base.to(device)
 
 
 
 
 
 
 
 
24
 
25
 
26
  def translate_ru2en(text):
@@ -38,16 +47,22 @@ def translate_en2ru(text):
38
 
39
 
40
  def generate_answer_git(processor, model, image, question):
41
- pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
42
- input_ids = processor(text=question, add_special_tokens=False).input_ids
43
- input_ids = [processor.tokenizer.cls_token_id] + input_ids
44
- input_ids = torch.tensor(input_ids).unsqueeze(0).to(device)
45
 
46
- generated_ids = model.generate(
47
- pixel_values=pixel_values, input_ids=input_ids, max_length=50
 
48
  )
49
- generated_answer = processor.batch_decode(generated_ids, skip_special_tokens=True)
50
- return generated_answer[0]
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  def generate_answer(image, question):
 
5
  AutoProcessor,
6
  AutoModelForDocumentQuestionAnswering,
7
  )
8
+ from transformers import pipeline
9
  import torch
10
 
11
  tokenizer_ru2en = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ru-en")
 
22
 
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
24
  git_model_base.to(device)
25
+ image_processor = git_processor_base.image_processor
26
+
27
+
28
+ def preprocess_image(image):
29
+ """Преобразуем изображение для модели"""
30
+ image_rgb = image.convert("RGB")
31
+ processed = image_processor(image_rgb, return_tensors="pt").pixel_values.to(device)
32
+ return processed.pixel_values, processed.words, processed.boxes
33
 
34
 
35
  def translate_ru2en(text):
 
47
 
48
 
49
  def generate_answer_git(processor, model, image, question):
 
 
 
 
50
 
51
+ qa_pipeline = pipeline(
52
+ "document-question-answering",
53
+ model="andgrt/layoutlmv2-base-uncased_finetuned_docvqa",
54
  )
55
+ return qa_pipeline(image, question)
56
+ # pixel_values, _, _ = preprocess_image(image)
57
+ # input_ids = processor(text=question, add_special_tokens=False).input_ids
58
+ # input_ids = [processor.tokenizer.cls_token_id] + input_ids
59
+ # input_ids = torch.tensor(input_ids).unsqueeze(0).to(device)
60
+
61
+ # generated_ids = model.generate(
62
+ # pixel_values=pixel_values, input_ids=input_ids, max_length=50
63
+ # )
64
+ # generated_answer = processor.batch_decode(generated_ids, skip_special_tokens=True)
65
+ # return generated_answer[0]
66
 
67
 
68
  def generate_answer(image, question):