Shabana commited on
Commit
153d03d
1 Parent(s): f8e85d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -108
app.py CHANGED
@@ -1,118 +1,69 @@
 
 
1
  import gradio as gr
2
- import tensorflow as tf
3
- import keras_ocr
4
- import requests
5
- import cv2
6
- import os
7
- import csv
8
- import numpy as np
9
- import pandas as pd
10
- import huggingface_hub
11
- from huggingface_hub import Repository
12
- from datetime import datetime
13
- import scipy.ndimage.interpolation as inter
14
- import easyocr
15
- import datasets
16
- from datasets import load_dataset, Image
17
  from PIL import Image
18
- from paddleocr import PaddleOCR
19
- from save_data import flag
20
-
21
- """
22
- Paddle OCR
23
- """
24
- def ocr_with_paddle(img):
25
- finaltext = ''
26
- ocr = PaddleOCR(lang='en', use_angle_cls=True)
27
- # img_path = 'exp.jpeg'
28
- result = ocr.ocr(img)
29
-
30
- for i in range(len(result[0])):
31
- text = result[0][i][1][0]
32
- finaltext += ' '+ text
33
- return finaltext
34
 
35
- """
36
- Keras OCR
37
- """
38
- def ocr_with_keras(img):
39
- output_text = ''
40
- pipeline=keras_ocr.pipeline.Pipeline()
41
- images=[keras_ocr.tools.read(img)]
42
- predictions=pipeline.recognize(images)
43
- first=predictions[0]
44
- for text,box in first:
45
- output_text += ' '+ text
46
- return output_text
47
 
48
- """
49
- easy OCR
50
- """
51
- # gray scale image
52
- def get_grayscale(image):
53
- return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
54
 
55
- # Thresholding or Binarization
56
- def thresholding(src):
57
- return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1]
58
- def ocr_with_easy(img):
59
- gray_scale_image=get_grayscale(img)
60
- thresholding(gray_scale_image)
61
- cv2.imwrite('image.png',gray_scale_image)
62
- reader = easyocr.Reader(['th','en'])
63
- bounds = reader.readtext('image.png',paragraph="False",detail = 0)
64
- bounds = ''.join(bounds)
65
- return bounds
66
-
67
- """
68
- Generate OCR
69
- """
70
- def generate_ocr(Method,img):
71
-
72
- text_output = ''
73
- if (img).any():
74
- add_csv = []
75
- image_id = 1
76
- print("Method___________________",Method)
77
- if Method == 'EasyOCR':
78
- text_output = ocr_with_easy(img)
79
- if Method == 'KerasOCR':
80
- text_output = ocr_with_keras(img)
81
- if Method == 'PaddleOCR':
82
- text_output = ocr_with_paddle(img)
83
-
84
- try:
85
- flag(Method,text_output,img)
86
- except Exception as e:
87
- print(e)
88
- return text_output
89
- else:
90
- raise gr.Error("Please upload an image!!!!")
91
-
92
- # except Exception as e:
93
- # print("Error in ocr generation ==>",e)
94
- # text_output = "Something went wrong"
95
- # return text_output
96
-
97
 
98
- """
99
- Create user interface for OCR demo
100
- """
101
 
102
- image = gr.Image(shape=(300, 300))
103
- method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR")
104
- output = gr.Textbox(label="Output")
 
105
 
106
- demo = gr.Interface(
107
- generate_ocr,
108
- [method,image],
109
- output,
110
- title="Optical Character Recognition",
111
- css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
112
- article = """<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
113
- <a href="mailto:[email protected]" target="_blank">[email protected]</a>
114
- <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>"""
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- )
118
- demo.launch(enable_queue = False)
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import re
3
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
7
+ import os
8
+ import tensorflow as tf
9
+ os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
 
 
 
 
 
 
 
 
10
 
11
+ device='cpu'
 
 
 
 
 
12
 
13
+ model_id = "nttdataspain/vit-gpt2-stablediffusion2-lora"
14
+ model = VisionEncoderDecoderModel.from_pretrained(model_id)
15
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
16
+ feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)
17
+
18
+ # Predict function
19
+ def predict(image):
20
+ img = image.convert('RGB')
21
+ model.eval()
22
+ pixel_values = feature_extractor(images=[img], return_tensors="pt").pixel_values
23
+ with torch.no_grad():
24
+ output_ids = model.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True).sequences
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
27
+ preds = [pred.strip() for pred in preds]
28
+ return preds[0]
29
 
30
+ input = gr.inputs.Image(label="Upload any Image", type = 'pil', optional=True)
31
+ output = gr.outputs.Textbox(type="text",label="Captions")
32
+ examples_folder = os.path.join(os.path.dirname(__file__), "examples")
33
+ examples = [os.path.join(examples_folder, file) for file in os.listdir(examples_folder)]
34
 
35
+ with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
36
 
37
+ gr.HTML(
38
+ """
39
+ <div style="text-align: center; max-width: 1200px; margin: 20px auto;">
40
+ <h2 style="font-weight: 900; font-size: 3rem; margin: 0rem">
41
+ 📸 Image-to-Text with Awais Nayyar 📝
42
+ </h2>
43
+ <br>
44
+ </div>
45
+ """)
46
+
47
+ with gr.Row():
48
+ with gr.Column(scale=1):
49
+ # img = gr.inputs.Image(label="Upload any Image", type = 'pil', optional=True)
50
+ img = gr.Image(label="Upload any Image", type = 'pil', optional=True)
51
+
52
+ # img = gr.inputs.Image(type="pil", label="Upload any Image", optional=True)
53
+
54
+ button = gr.Button(value="Convert")
55
+ with gr.Column(scale=1):
56
+ # out = gr.outputs.Textbox(type="text",label="Captions")
57
+ out = gr.Label(type="text", label="Captions")
58
 
59
+
60
+ button.click(predict, inputs=[img], outputs=[out])
61
+
62
+ gr.Examples(
63
+ examples=examples,
64
+ inputs=img,
65
+ outputs=out,
66
+ fn=predict,
67
+ cache_examples=True,
68
+ )
69
+ demo.launch(debug=True)