Spaces:

openfoodfacts
/

ingredient-extraction

Sleeping

File size: 3,004 Bytes

import requests
import streamlit as st
from annotated_text import annotated_text
from openfoodfacts.images import generate_image_url, generate_json_ocr_url


@st.cache_data
def send_prediction_request(ocr_url: str, model_version: str):
    return requests.get(
        "https://robotoff.openfoodfacts.net/api/v1/predict/ingredient_list",
        params={"ocr_url": ocr_url, "model_version": model_version},
    ).json()


def get_product(barcode: str):
    r = requests.get(f"https://world.openfoodfacts.org/api/v2/product/{barcode}")

    if r.status_code == 404:
        return None

    return r.json()["product"]


def display_ner_tags(text: str, entities: list[dict]):
    spans = []
    previous_idx = 0
    for entity in entities:
        score = entity["score"]
        lang = entity["lang"]["lang"]
        start_idx = entity["start"]
        end_idx = entity["end"]
        spans.append(text[previous_idx:start_idx])
        spans.append((text[start_idx:end_idx], f"score={score:.3f} | lang={lang}"))
        previous_idx = end_idx
    spans.append(text[previous_idx:])
    annotated_text(spans)


def run(
    barcode: str,
    model_version: str,
    min_threshold: float = 0.5,
):
    product = get_product(barcode)
    st.markdown(f"[Product page](https://world.openfoodfacts.org/product/{barcode})")

    if not product:
        st.error(f"Product {barcode} not found")
        return

    images = product.get("images", [])

    if not images:
        st.error(f"No images found for product {barcode}")
        return

    for image_id, _ in images.items():
        if not image_id.isdigit():
            continue

        ocr_url = generate_json_ocr_url(barcode, image_id)
        prediction = send_prediction_request(ocr_url, model_version)

        st.divider()
        image_url = generate_image_url(barcode, image_id)
        st.markdown(f"[Image {image_id}]({image_url}), [OCR]({ocr_url})")
        st.image(image_url)

        if "error" in prediction:
            st.warning(f"Error: {prediction['description']}")
            continue

        entities = prediction["entities"]
        text = prediction["text"]
        filtered_entities = [e for e in entities if e["score"] >= min_threshold]
        display_ner_tags(text, filtered_entities)


query_params = st.experimental_get_query_params()
default_barcode = query_params["barcode"][0] if "barcode" in query_params else ""

st.title("Ingredient extraction demo")
st.markdown(
    "This demo leverages the ingredient entity detection model, "
    "that takes the OCR text as input and predict ingredient lists."
)
barcode = st.text_input(
    "barcode", help="Barcode of the product", value=default_barcode
).strip()
model_version = "1"
st.experimental_set_query_params(barcode=barcode)

threshold = st.number_input(
    "threshold",
    help="Minimum threshold for entity predictions",
    min_value=0.0,
    max_value=1.0,
    value=0.98,
)

if barcode:
    run(barcode, model_version=model_version, min_threshold=threshold)