Shabana commited on
Commit
b877ba2
1 Parent(s): a0270cc

has been commited

Browse files
Files changed (4) hide show
  1. Makefile +27 -0
  2. README.md +9 -1
  3. app.py +118 -0
  4. requirements.txt +167 -0
Makefile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ install:
2
+ pip install --upgrade pip &&\
3
+ pip install -r requirements.txt
4
+
5
+ test:
6
+ python -m pytest -vvv --cov=hello --cov=greeting \
7
+ --cov=smath --cov=web tests
8
+ python -m pytest --nbval notebook.ipynb #tests our jupyter notebook
9
+ #python -m pytest -v tests/test_web.py #if you just want to test web
10
+
11
+ debug:
12
+ python -m pytest -vv --pdb #Debugger is invoked
13
+
14
+ one-test:
15
+ python -m pytest -vv tests/test_greeting.py::test_my_name4
16
+
17
+ debugthree:
18
+ #not working the way I expect
19
+ python -m pytest -vv --pdb --maxfail=4 # drop to PDB for first three failures
20
+
21
+ format:
22
+ black *.py
23
+
24
+ lint:
25
+ pylint --disable=R,C *.py
26
+
27
+ all: install lint test format
README.md CHANGED
@@ -1 +1,9 @@
1
- # hface
 
 
 
 
 
 
 
 
 
1
+ # hface
2
+ title: OCR Image To Text
3
+ emoji: 📸
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.11.0
8
+ app_file: app.py
9
+ pinned: false
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ import keras_ocr
4
+ import requests
5
+ import cv2
6
+ import os
7
+ import csv
8
+ import numpy as np
9
+ import pandas as pd
10
+ import huggingface_hub
11
+ from huggingface_hub import Repository
12
+ from datetime import datetime
13
+ import scipy.ndimage.interpolation as inter
14
+ import easyocr
15
+ import datasets
16
+ from datasets import load_dataset, Image
17
+ from PIL import Image
18
+ from paddleocr import PaddleOCR
19
+ from save_data import flag
20
+
21
+ """
22
+ Paddle OCR
23
+ """
24
+ def ocr_with_paddle(img):
25
+ finaltext = ''
26
+ ocr = PaddleOCR(lang='en', use_angle_cls=True)
27
+ # img_path = 'exp.jpeg'
28
+ result = ocr.ocr(img)
29
+
30
+ for i in range(len(result[0])):
31
+ text = result[0][i][1][0]
32
+ finaltext += ' '+ text
33
+ return finaltext
34
+
35
+ """
36
+ Keras OCR
37
+ """
38
+ def ocr_with_keras(img):
39
+ output_text = ''
40
+ pipeline=keras_ocr.pipeline.Pipeline()
41
+ images=[keras_ocr.tools.read(img)]
42
+ predictions=pipeline.recognize(images)
43
+ first=predictions[0]
44
+ for text,box in first:
45
+ output_text += ' '+ text
46
+ return output_text
47
+
48
+ """
49
+ easy OCR
50
+ """
51
+ # gray scale image
52
+ def get_grayscale(image):
53
+ return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
54
+
55
+ # Thresholding or Binarization
56
+ def thresholding(src):
57
+ return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1]
58
+ def ocr_with_easy(img):
59
+ gray_scale_image=get_grayscale(img)
60
+ thresholding(gray_scale_image)
61
+ cv2.imwrite('image.png',gray_scale_image)
62
+ reader = easyocr.Reader(['th','en'])
63
+ bounds = reader.readtext('image.png',paragraph="False",detail = 0)
64
+ bounds = ''.join(bounds)
65
+ return bounds
66
+
67
+ """
68
+ Generate OCR
69
+ """
70
+ def generate_ocr(Method,img):
71
+
72
+ text_output = ''
73
+ if (img).any():
74
+ add_csv = []
75
+ image_id = 1
76
+ print("Method___________________",Method)
77
+ if Method == 'EasyOCR':
78
+ text_output = ocr_with_easy(img)
79
+ if Method == 'KerasOCR':
80
+ text_output = ocr_with_keras(img)
81
+ if Method == 'PaddleOCR':
82
+ text_output = ocr_with_paddle(img)
83
+
84
+ try:
85
+ flag(Method,text_output,img)
86
+ except Exception as e:
87
+ print(e)
88
+ return text_output
89
+ else:
90
+ raise gr.Error("Please upload an image!!!!")
91
+
92
+ # except Exception as e:
93
+ # print("Error in ocr generation ==>",e)
94
+ # text_output = "Something went wrong"
95
+ # return text_output
96
+
97
+
98
+ """
99
+ Create user interface for OCR demo
100
+ """
101
+
102
+ image = gr.Image(shape=(300, 300))
103
+ method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR")
104
+ output = gr.Textbox(label="Output")
105
+
106
+ demo = gr.Interface(
107
+ generate_ocr,
108
+ [method,image],
109
+ output,
110
+ title="Optical Character Recognition",
111
+ css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
112
+ article = """<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
113
+ <a href="mailto:[email protected]" target="_blank">[email protected]</a>
114
+ <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>"""
115
+
116
+
117
+ )
118
+ demo.launch(enable_queue = False)
requirements.txt ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.3.0
2
+ aiohttp==3.8.3
3
+ aiosignal==1.3.1
4
+ analytics-python==1.4.0
5
+ anyio==3.6.2
6
+ astor==0.8.1
7
+ astunparse==1.6.3
8
+ async-timeout==4.0.2
9
+ attrdict==2.0.1
10
+ attrs==22.1.0
11
+ Babel==2.11.0
12
+ backoff==1.10.0
13
+ bce-python-sdk==0.8.74
14
+ bcrypt==4.0.1
15
+ beautifulsoup4==4.11.1
16
+ cachetools==5.2.0
17
+ certifi==2022.9.24
18
+ cffi==1.15.1
19
+ charset-normalizer==2.1.1
20
+ click==8.1.3
21
+ cloudpickle==2.2.0
22
+ commonmark==0.9.1
23
+ contourpy==1.0.6
24
+ cryptography==38.0.3
25
+ cssselect==1.2.0
26
+ cssutils==2.6.0
27
+ cycler==0.11.0
28
+ Cython==0.29.32
29
+ decorator==5.1.1
30
+ dill==0.3.6
31
+ easyocr==1.6.2
32
+ editdistance==0.6.1
33
+ efficientnet==1.0.0
34
+ essential-generators==1.0
35
+ et-xmlfile==1.1.0
36
+ fastapi==0.87.0
37
+ ffmpy==0.3.0
38
+ fire==0.4.0
39
+ Flask==2.2.2
40
+ Flask-Babel==2.0.0
41
+ flatbuffers==22.10.26
42
+ fonttools==4.38.0
43
+ frozenlist==1.3.3
44
+ future==0.18.2
45
+ gast==0.4.0
46
+ google-auth==2.14.1
47
+ google-auth-oauthlib==0.4.6
48
+ google-pasta==0.2.0
49
+ gradio==3.0
50
+ grpcio==1.50.0
51
+ h11==0.14.0
52
+ h5py==3.7.0
53
+ idna==3.4
54
+ imageio==2.22.4
55
+ imgaug==0.4.0
56
+ importlib-metadata==5.0.0
57
+ itsdangerous==2.1.2
58
+ Jinja2==3.1.2
59
+ joblib==1.2.0
60
+ keras==2.11.0
61
+ Keras-Applications==1.0.8
62
+ keras-ocr==0.8.9
63
+ kiwisolver==1.4.4
64
+ libclang==14.0.6
65
+ linkify-it-py==1.0.3
66
+ llvmlite==0.39.1
67
+ lmdb==1.3.0
68
+ lxml==4.9.1
69
+ Markdown==3.4.1
70
+ markdown-it-py==2.1.0
71
+ MarkupSafe==2.1.1
72
+ matplotlib==3.3.0
73
+ mdit-py-plugins==0.3.1
74
+ mdurl==0.1.2
75
+ memory-profiler==0.61.0
76
+ monotonic==1.6
77
+ multidict==6.0.2
78
+ multiprocess==0.70.14
79
+ networkx==2.8.8
80
+ ninja==1.11.1
81
+ numba==0.56.4
82
+ numpy==1.23.5
83
+ nvidia-cublas-cu11==11.10.3.66
84
+ nvidia-cuda-nvrtc-cu11==11.7.99
85
+ nvidia-cuda-runtime-cu11==11.7.99
86
+ nvidia-cudnn-cu11==8.5.0.96
87
+ oauthlib==3.2.2
88
+ openai==0.25.0
89
+ opencv-contrib-python==4.5.5.62
90
+ opencv-python==4.5.5.64
91
+ opencv-python-headless==4.5.4.60
92
+ openpyxl==3.0.10
93
+ opt-einsum==3.3.0
94
+ orjson==3.8.2
95
+ packaging==21.3
96
+ paddle-bfloat==0.1.7
97
+ paddleocr==2.6.1.1
98
+ paddlepaddle==2.4.0
99
+ pandas==1.5.1
100
+ pandas-stubs==1.5.1.221024
101
+ paramiko==2.12.0
102
+ pdf2docx==0.5.6
103
+ Pillow==9.3.0
104
+ premailer==3.10.0
105
+ protobuf==3.19.6
106
+ psutil==5.9.4
107
+ pyasn1==0.4.8
108
+ pyasn1-modules==0.2.8
109
+ pyclipper==1.3.0.post4
110
+ pycparser==2.21
111
+ pycryptodome==3.15.0
112
+ pydantic==1.10.2
113
+ pydub==0.25.1
114
+ Pygments==2.13.0
115
+ PyMuPDF==1.20.2
116
+ PyNaCl==1.5.0
117
+ pynvml==11.4.1
118
+ pyparsing==3.0.9
119
+ python-bidi==0.4.2
120
+ python-dateutil==2.8.2
121
+ python-docx==0.8.11
122
+ python-http-client==3.3.7
123
+ python-multipart==0.0.5
124
+ pytz==2022.6
125
+ PyWavelets==1.4.1
126
+ PyYAML==6.0
127
+ rapidfuzz==2.13.2
128
+ requests==2.28.1
129
+ requests-oauthlib==1.3.1
130
+ rich==12.6.0
131
+ rsa==4.9
132
+ scalene==1.5.15
133
+ scikit-image==0.19.3
134
+ scikit-learn==1.1.3
135
+ scipy==1.9.3
136
+ sendgrid==6.9.7
137
+ shap==0.41.0
138
+ Shapely==1.8.5.post1
139
+ six==1.16.0
140
+ slicer==0.0.7
141
+ sniffio==1.3.0
142
+ soupsieve==2.3.2.post1
143
+ starkbank-ecdsa==2.2.0
144
+ starlette==0.21.0
145
+ tensorboard==2.11.0
146
+ tensorboard-data-server==0.6.1
147
+ tensorboard-plugin-wit==1.8.1
148
+ tensorflow==2.11.0
149
+ tensorflow-estimator==2.11.0
150
+ tensorflow-io-gcs-filesystem==0.28.0
151
+ termcolor==2.1.1
152
+ threadpoolctl==3.1.0
153
+ tifffile==2022.10.10
154
+ torch==1.13.0
155
+ torchvision==0.14.0
156
+ tqdm==4.64.1
157
+ types-pytz==2022.6.0.1
158
+ typing-extensions==4.4.0
159
+ uc-micro-py==1.0.1
160
+ urllib3==1.26.12
161
+ uvicorn==0.20.0
162
+ validators==0.20.0
163
+ visualdl==2.4.1
164
+ Werkzeug==2.2.2
165
+ wrapt==1.14.1
166
+ yarl==1.8.1
167
+ zipp==3.10.0