Shabana commited on
Commit
0ff6692
1 Parent(s): b877ba2
Files changed (2) hide show
  1. .gitattributes +34 -0
  2. save_data.py +140 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
save_data.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import json
4
+ import shutil
5
+ import requests
6
+ import re as r
7
+ from urllib.request import urlopen
8
+ from datetime import datetime
9
+ from datasets import Image
10
+ from PIL import Image
11
+ from huggingface_hub import Repository, upload_file
12
+
13
+ HF_TOKEN = os.environ.get("HF_TOKEN")
14
+ DATASET_NAME = "OCR-img-to-text"
15
+ DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
16
+ DATA_FILENAME = "ocr_data.csv"
17
+ DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
18
+ DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
19
+ print("is none?", HF_TOKEN is None)
20
+ REPOSITORY_DIR = "data"
21
+ LOCAL_DIR = 'data_local'
22
+ os.makedirs(LOCAL_DIR,exist_ok=True)
23
+
24
+ try:
25
+ hf_hub_download(
26
+ repo_id=DATASET_REPO_ID,
27
+ filename=DATA_FILENAME,
28
+ cache_dir=DATA_DIRNAME,
29
+ force_filename=DATA_FILENAME
30
+ )
31
+
32
+ except:
33
+ print("file not found")
34
+
35
+ repo = Repository(
36
+ local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
37
+ )
38
+ repo.git_pull()
39
+
40
+ def getIP():
41
+ ip_address = ''
42
+ try:
43
+ d = str(urlopen('http://checkip.dyndns.com/')
44
+ .read())
45
+
46
+ return r.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(d).group(1)
47
+ except Exception as e:
48
+ print("Error while getting IP address -->",e)
49
+ return ip_address
50
+
51
+ def get_location(ip_addr):
52
+ location = {}
53
+ try:
54
+ ip=ip_addr
55
+
56
+ req_data={
57
+ "ip":ip,
58
+ "token":"pkml123"
59
+ }
60
+ url = "https://demos.pragnakalp.com/get-ip-location"
61
+
62
+ # req_data=json.dumps(req_data)
63
+ # print("req_data",req_data)
64
+ headers = {'Content-Type': 'application/json'}
65
+
66
+ response = requests.request("POST", url, headers=headers, data=json.dumps(req_data))
67
+ response = response.json()
68
+ print("response======>>",response)
69
+ return response
70
+ except Exception as e:
71
+ print("Error while getting location -->",e)
72
+ return location
73
+
74
+ """
75
+ Save generated details
76
+ """
77
+ def dump_json(thing,file):
78
+ with open(file,'w+',encoding="utf8") as f:
79
+ json.dump(thing,f)
80
+
81
+ def flag(Method,text_output,input_image):
82
+
83
+ print("saving data------------------------")
84
+ # try:
85
+ adversarial_number = 0
86
+ adversarial_number = 0 if None else adversarial_number
87
+
88
+ ip_address= getIP()
89
+ print("ip_address :",ip_address)
90
+ location = get_location(ip_address)
91
+ print("location :",location)
92
+
93
+ metadata_name = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
94
+ SAVE_FILE_DIR = os.path.join(LOCAL_DIR,metadata_name)
95
+ os.makedirs(SAVE_FILE_DIR,exist_ok=True)
96
+ image_output_filename = os.path.join(SAVE_FILE_DIR,'image.png')
97
+ print("image_output_filename :",image_output_filename)
98
+ print(input_image)
99
+ try:
100
+ Image.fromarray(input_image).save(image_output_filename)
101
+ # input_image.save(image_output_filename)
102
+ except Exception:
103
+ raise Exception(f"Had issues saving np array image to file")
104
+
105
+ # Write metadata.json to file
106
+ json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
107
+ metadata= {'id':metadata_name,'method':Method,'file_name':'image.png',
108
+ 'generated_text':text_output,'ip':ip_address, 'location':location
109
+ }
110
+
111
+ dump_json(metadata,json_file_path)
112
+
113
+ # Simply upload the image file and metadata using the hub's upload_file
114
+ # Upload the image
115
+ repo_image_path = os.path.join(REPOSITORY_DIR,os.path.join(metadata_name,'image.png'))
116
+
117
+ _ = upload_file(path_or_fileobj = image_output_filename,
118
+ path_in_repo =repo_image_path,
119
+ repo_id=DATASET_REPO_ID,
120
+ repo_type='dataset',
121
+ token=HF_TOKEN
122
+ )
123
+
124
+ # Upload the metadata
125
+ repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join(metadata_name,'metadata.jsonl'))
126
+ _ = upload_file(path_or_fileobj = json_file_path,
127
+ path_in_repo =repo_json_path,
128
+ repo_id= DATASET_REPO_ID,
129
+ repo_type='dataset',
130
+ token=HF_TOKEN
131
+ )
132
+ adversarial_number+=1
133
+ repo.git_pull()
134
+
135
+ url = 'http://pragnakalpdev35.pythonanywhere.com/HF_space_image_to_text'
136
+ myobj = {'Method': Method,'text_output':text_output,'img':input_image.tolist(),'ip_address':ip_address, 'loc':location}
137
+ x = requests.post(url, json = myobj)
138
+ print("mail status code",x.status_code)
139
+
140
+ return "*****Logs save successfully!!!!"