Raphaël Bournhonesque commited on
Commit
4364eec
1 Parent(s): ae6ec77

improve demo

Browse files
Files changed (2) hide show
  1. app.py +18 -58
  2. requirements.txt +3 -2
app.py CHANGED
@@ -1,56 +1,7 @@
1
- import re
2
-
3
- from annotated_text import annotated_text
4
  import requests
5
  import streamlit as st
6
-
7
-
8
- BARCODE_PATH_REGEX = re.compile(r"^(...)(...)(...)(.*)$")
9
-
10
-
11
- def split_barcode(barcode: str) -> list[str]:
12
- """Split barcode in the same way as done by Product Opener to generate a
13
- product image folder.
14
-
15
- :param barcode: The barcode of the product. For the pro platform only,
16
- it must be prefixed with the org ID using the format
17
- `{ORG_ID}/{BARCODE}`
18
- :raises ValueError: raise a ValueError if `barcode` is invalid
19
- :return: a list containing the splitted barcode
20
- """
21
- org_id = None
22
- if "/" in barcode:
23
- # For the pro platform, `barcode` is expected to be in the format
24
- # `{ORG_ID}/{BARCODE}` (ex: `org-lea-nature/3307130803004`)
25
- org_id, barcode = barcode.split("/", maxsplit=1)
26
-
27
- if not barcode.isdigit():
28
- raise ValueError(f"unknown barcode format: {barcode}")
29
-
30
- match = BARCODE_PATH_REGEX.fullmatch(barcode)
31
-
32
- splits = [x for x in match.groups() if x] if match else [barcode]
33
-
34
- if org_id is not None:
35
- # For the pro platform only, images and OCRs belonging to an org
36
- # are stored in a folder named after the org for all its products, ex:
37
- # https://images.pro.openfoodfacts.org/images/products/org-lea-nature/330/713/080/3004/1.jpg
38
- splits.append(org_id)
39
-
40
- return splits
41
-
42
-
43
- def _generate_file_path(barcode: str, image_id: str, suffix: str):
44
- splitted_barcode = split_barcode(barcode)
45
- return f"/{'/'.join(splitted_barcode)}/{image_id}{suffix}"
46
-
47
-
48
- def generate_ocr_path(barcode: str, image_id: str) -> str:
49
- return _generate_file_path(barcode, image_id, ".json")
50
-
51
-
52
- def generate_image_path(barcode: str, image_id: str) -> str:
53
- return _generate_file_path(barcode, image_id, ".400.jpg")
54
 
55
 
56
  @st.cache_data
@@ -85,7 +36,11 @@ def display_ner_tags(text: str, entities: list[dict]):
85
  annotated_text(spans)
86
 
87
 
88
- def run(barcode: str, model_version: str, min_threshold: float = 0.5,):
 
 
 
 
89
  product = get_product(barcode)
90
  st.markdown(f"[Product page](https://world.openfoodfacts.org/product/{barcode})")
91
 
@@ -93,18 +48,21 @@ def run(barcode: str, model_version: str, min_threshold: float = 0.5,):
93
  st.error(f"Product {barcode} not found")
94
  return
95
 
96
- images = product["images"]
 
 
 
 
 
97
  for image_id, _ in images.items():
98
  if not image_id.isdigit():
99
  continue
100
 
101
- ocr_path = generate_ocr_path(barcode, image_id)
102
- ocr_url = f"https://static.openfoodfacts.org/images/products{ocr_path}"
103
  prediction = send_prediction_request(ocr_url, model_version)
104
 
105
  st.divider()
106
- image_path = generate_image_path(barcode, image_id)
107
- image_url = f"https://static.openfoodfacts.org/images/products{image_path}"
108
  st.markdown(f"[Image {image_id}]({image_url}), [OCR]({ocr_url})")
109
  st.image(image_url)
110
 
@@ -126,7 +84,9 @@ st.markdown(
126
  "This demo leverages the ingredient entity detection model, "
127
  "that takes the OCR text as input and predict ingredient lists."
128
  )
129
- barcode = st.text_input("barcode", help="Barcode of the product", value=default_barcode).strip()
 
 
130
  model_version = "1"
131
  st.experimental_set_query_params(barcode=barcode)
132
 
 
 
 
 
1
  import requests
2
  import streamlit as st
3
+ from annotated_text import annotated_text
4
+ from openfoodfacts.images import generate_image_url, generate_json_ocr_url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  @st.cache_data
 
36
  annotated_text(spans)
37
 
38
 
39
+ def run(
40
+ barcode: str,
41
+ model_version: str,
42
+ min_threshold: float = 0.5,
43
+ ):
44
  product = get_product(barcode)
45
  st.markdown(f"[Product page](https://world.openfoodfacts.org/product/{barcode})")
46
 
 
48
  st.error(f"Product {barcode} not found")
49
  return
50
 
51
+ images = product.get("images", [])
52
+
53
+ if not images:
54
+ st.error(f"No images found for product {barcode}")
55
+ return
56
+
57
  for image_id, _ in images.items():
58
  if not image_id.isdigit():
59
  continue
60
 
61
+ ocr_url = generate_json_ocr_url(barcode, image_id)
 
62
  prediction = send_prediction_request(ocr_url, model_version)
63
 
64
  st.divider()
65
+ image_url = generate_image_url(barcode, image_id)
 
66
  st.markdown(f"[Image {image_id}]({image_url}), [OCR]({ocr_url})")
67
  st.image(image_url)
68
 
 
84
  "This demo leverages the ingredient entity detection model, "
85
  "that takes the OCR text as input and predict ingredient lists."
86
  )
87
+ barcode = st.text_input(
88
+ "barcode", help="Barcode of the product", value=default_barcode
89
+ ).strip()
90
  model_version = "1"
91
  st.experimental_set_query_params(barcode=barcode)
92
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  requests==2.28.1
2
- streamlit==1.15.1
3
- st-annotated-text==4.0.0
 
 
1
  requests==2.28.1
2
+ streamlit==1.28.1
3
+ st-annotated-text==4.0.0
4
+ openfoodfacts==0.1.11