pragnakalp
commited on
Commit
·
98c7b0e
1
Parent(s):
0cc7d4a
Update app.py
Browse files
app.py
CHANGED
@@ -29,8 +29,8 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
29 |
DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
|
30 |
DATA_FILENAME = "ocr_data.csv"
|
31 |
DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
|
32 |
-
|
33 |
-
|
34 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
35 |
DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
|
36 |
print("is none?", HF_TOKEN is None)
|
@@ -117,6 +117,23 @@ def ocr_with_easy(img):
|
|
117 |
bounds = reader.readtext('image.png',paragraph="False",detail = 0)
|
118 |
bounds = ''.join(bounds)
|
119 |
return bounds
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
"""
|
121 |
Generate OCR
|
122 |
"""
|
@@ -124,7 +141,7 @@ def generate_ocr(Method,img):
|
|
124 |
try:
|
125 |
text_output = ''
|
126 |
add_csv = []
|
127 |
-
|
128 |
print("Method___________________",Method)
|
129 |
if Method == 'EasyOCR':
|
130 |
text_output = ocr_with_easy(img)
|
@@ -143,6 +160,15 @@ def generate_ocr(Method,img):
|
|
143 |
writer.writerow(add_csv)
|
144 |
commit_url = repo.push_to_hub()
|
145 |
print(commit_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
# try:
|
147 |
# dataset = load_dataset("pragnakalp/OCR-img-to-text", streaming=True)
|
148 |
# print(dataset.features)
|
|
|
29 |
DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
|
30 |
DATA_FILENAME = "ocr_data.csv"
|
31 |
DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
|
32 |
+
DATA_FILENAME2 = "ocr_image.csv"
|
33 |
+
DATA_FILE2 = os.path.join("ocr_image", DATA_FILENAME2)
|
34 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
35 |
DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
|
36 |
print("is none?", HF_TOKEN is None)
|
|
|
117 |
bounds = reader.readtext('image.png',paragraph="False",detail = 0)
|
118 |
bounds = ''.join(bounds)
|
119 |
return bounds
|
120 |
+
|
121 |
+
# def store_single_disk(image, image_id, label):
|
122 |
+
# """ Stores a single image as a .png file on disk.
|
123 |
+
# Parameters:
|
124 |
+
# ---------------
|
125 |
+
# image image array, (32, 32, 3) to be stored
|
126 |
+
# image_id integer unique ID for image
|
127 |
+
# label image label
|
128 |
+
# """
|
129 |
+
# Image.fromarray(image).save(disk_dir / f"{image_id}.png")
|
130 |
+
|
131 |
+
# with open(disk_dir / f"{image_id}.csv", "wt") as csvfile:
|
132 |
+
# writer = csv.writer(
|
133 |
+
# csvfile, delimiter=" ", quotechar="|", quoting=csv.QUOTE_MINIMAL
|
134 |
+
# )
|
135 |
+
# writer.writerow([label])
|
136 |
+
|
137 |
"""
|
138 |
Generate OCR
|
139 |
"""
|
|
|
141 |
try:
|
142 |
text_output = ''
|
143 |
add_csv = []
|
144 |
+
image_id = 1
|
145 |
print("Method___________________",Method)
|
146 |
if Method == 'EasyOCR':
|
147 |
text_output = ocr_with_easy(img)
|
|
|
160 |
writer.writerow(add_csv)
|
161 |
commit_url = repo.push_to_hub()
|
162 |
print(commit_url)
|
163 |
+
|
164 |
+
|
165 |
+
Image.fromarray(image).save(DATA_FILE2 / f"{image_id}.png")
|
166 |
+
with open(DATA_FILE2, "wt") as csvfile:
|
167 |
+
writer = csv.writer(
|
168 |
+
csvfile, delimiter=" ", quotechar="|", quoting=csv.QUOTE_MINIMAL
|
169 |
+
)
|
170 |
+
writer.writerow([0])
|
171 |
+
|
172 |
# try:
|
173 |
# dataset = load_dataset("pragnakalp/OCR-img-to-text", streaming=True)
|
174 |
# print(dataset.features)
|