bambadij commited on
Commit
5e06f87
·
1 Parent(s): b9dc5a8
Files changed (1) hide show
  1. app.py +27 -12
app.py CHANGED
@@ -4,10 +4,11 @@ from fastapi import FastAPI,HTTPException,status,UploadFile,File
4
  from pydantic import BaseModel
5
  import uvicorn
6
  import logging
7
- from PIL import Image
8
  import pytesseract
9
  from io import BytesIO
10
  import os
 
11
 
12
  # Configurer les répertoires de cache
13
  os.environ['TRANSFORMERS_CACHE'] = '/app/.cache'
@@ -32,8 +33,7 @@ app =FastAPI(
32
  logging.basicConfig(level=logging.INFO)
33
  logger =logging.getLogger(__name__)
34
  summarize =pipeline('summarization', model="facebook/bart-large-cnn")
35
- generated_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
36
- # translation_pieline =pipeline("translation_en_to_fr",model="Helsinki-NLP/opus-mt-en-fr")
37
  classify_zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
38
 
39
  class TextSummary(BaseModel):
@@ -50,7 +50,9 @@ async def home():
50
  async def summary_text_bart(input:TextSummary):
51
  "add text to summarize"
52
  try:
53
- summary = summarize(input.text,do_sample=False)
 
 
54
  summary_text =summary[0].get('summary_text')
55
  result = classify_zero_shot(
56
  summary_text,
@@ -66,7 +68,7 @@ async def summary_text_bart(input:TextSummary):
66
  return {
67
  "summary_text" :summary_text,
68
  "Statut":formatted_result,
69
- # "format":formatted_result,
70
  "len_input" : len(input.text),
71
  "len_output" :len(summary_text)
72
 
@@ -84,17 +86,30 @@ async def summary_text_bart(input:TextSummary):
84
  async def upload_image(image: UploadFile = File(...)):
85
  contents = await image.read()
86
  try:
87
- image_open = Image.open(BytesIO(contents))
 
 
 
 
 
88
  raw_text = pytesseract.image_to_string(image_open,lang='fra')
89
- summary = summarize(raw_text,do_sample=False)
90
- summary_text =summary[0].get('summary_text')
 
 
 
 
91
  return {
92
- # "text": raw_text,
93
- "summary":summary_text}
 
 
 
94
  except Exception as e:
95
- return {"error": str(e)}
 
96
  if __name__ == "__main__":
97
- uvicorn.run("main:app",host="0.0.0.0",port=8000,reload=True)
98
 
99
 
100
 
 
4
  from pydantic import BaseModel
5
  import uvicorn
6
  import logging
7
+ from PIL import Image,UnidentifiedImageError
8
  import pytesseract
9
  from io import BytesIO
10
  import os
11
+ import re
12
 
13
  # Configurer les répertoires de cache
14
  os.environ['TRANSFORMERS_CACHE'] = '/app/.cache'
 
33
  logging.basicConfig(level=logging.INFO)
34
  logger =logging.getLogger(__name__)
35
  summarize =pipeline('summarization', model="facebook/bart-large-cnn")
36
+ pipe = pipeline("summarization", model="plguillou/t5-base-fr-sum-cnndm")
 
37
  classify_zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
38
 
39
  class TextSummary(BaseModel):
 
50
  async def summary_text_bart(input:TextSummary):
51
  "add text to summarize"
52
  try:
53
+
54
+ preprocessed_text_summary =re.sub(r'\s+',' ',input.text).strip()
55
+ summary = summarize(preprocessed_text_summary,do_sample=False)
56
  summary_text =summary[0].get('summary_text')
57
  result = classify_zero_shot(
58
  summary_text,
 
68
  return {
69
  "summary_text" :summary_text,
70
  "Statut":formatted_result,
71
+ # "preprocessed_text_summary":preprocessed_text_summary,
72
  "len_input" : len(input.text),
73
  "len_output" :len(summary_text)
74
 
 
86
  async def upload_image(image: UploadFile = File(...)):
87
  contents = await image.read()
88
  try:
89
+
90
+ try:
91
+ image_open = Image.open(BytesIO(contents))
92
+ except UnidentifiedImageError:
93
+ raise HTTPException(status_code=400, detail="Unsupported or corrupted image file.")
94
+
95
  raw_text = pytesseract.image_to_string(image_open,lang='fra')
96
+ logger.info(f"Extract text:{raw_text}")
97
+ #preprocessing the text
98
+ preprocessed_text =re.sub(r'\s+',' ',raw_text).strip()
99
+ logger.info(f"Preprocessing text:{preprocessed_text}")
100
+ text_summary = pipe(raw_text,do_sample=False)
101
+ summary_text_image =text_summary[0].get('summary_text')
102
  return {
103
+ # "text": preprocessed_text,
104
+ # "len":len(preprocessed_text),
105
+ "summary":summary_text_image,
106
+ "len summary":len(summary_text_image)
107
+ }
108
  except Exception as e:
109
+ logger.error(f"Error processing image:{e}")
110
+ raise HTTPException(status_code=500, detail=str(e))
111
  if __name__ == "__main__":
112
+ uvicorn.run("app:app",host="0.0.0.0",port=8000,reload=True)
113
 
114
 
115