Spaces:

aryadytm
/

remove-photo-background

Running

App Files Files Community

Lodor commited on May 10, 2022

Commit

206ce41

•

1 Parent(s): a33c8f4

Initial commit

Browse files

Files changed (17) hide show

.gitignore +124 -0
.streamlit/config.toml +6 -0
Dockerfile +9 -0
README.md +1 -0
app.py +80 -0
assets/demo.jpg +0 -0
docker-compose.yml +13 -0
requirements.txt +6 -0
src/__init__.py +0 -0
src/models/__init__.py +0 -0
src/models/backbones/__init__.py +10 -0
src/models/backbones/mobilenetv2.py +199 -0
src/models/backbones/wrapper.py +82 -0
src/models/modnet.py +255 -0
src/st_style.py +42 -0
src/trainer.py +299 -0
src/utils.py +107 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,124 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,6 @@

+[server]
+maxUploadSize = 10
+[theme]
+base="light"
+primaryColor="#0074ff"

Dockerfile ADDED Viewed

	@@ -0,0 +1,9 @@

+FROM pytorch/pytorch:latest
+WORKDIR /app
+COPY . .
+RUN pip install -r requirements.txt
+CMD [ "streamlit", "run", "app.py" ]

README.md CHANGED Viewed

@@ -5,6 +5,7 @@ colorFrom: green
 colorTo: indigo
 sdk: streamlit
 sdk_version: 1.2.0
 app_file: app.py
 pinned: false
 ---

 colorTo: indigo
 sdk: streamlit
 sdk_version: 1.2.0
+python_version: 3.9.5
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import streamlit as st
+import os
+from datetime import datetime
+from PIL import Image
+from io import BytesIO
+from src.utils import change_background, matte
+from src.st_style import apply_prod_style
+# apply_prod_style(st)  # NOTE: Uncomment this for production!
+def image_download_button(pil_image, filename: str, fmt: str, label="Download"):
+    if fmt not in ["jpg", "png"]:
+        raise Exception(f"Unknown image format (Available: {fmt} - case sensitive)")
+    pil_format = "JPEG" if fmt == "jpg" else "PNG"
+    file_format = "jpg" if fmt == "jpg" else "png"
+    mime = "image/jpeg" if fmt == "jpg" else "image/png"
+    buf = BytesIO()
+    pil_image.save(buf, format=pil_format)
+    return st.download_button(
+        label=label,
+        data=buf.getvalue(),
+        file_name=f'{filename}.{file_format}',
+        mime=mime,
+    )
+st.title("AI Photo Background Removal")
+st.image(Image.open("assets/demo.jpg"))
+st.write(
+    """
+    You want to remove your photo background, but don't have the time and effort to learn photo editing skills?
+    **This app will change or remove your photo background, in seconds.**
+    """
+)
+uploaded_file = st.file_uploader(
+    label="Upload your photo here",
+    accept_multiple_files=False, type=["png", "jpg", "jpeg"],
+)
+if uploaded_file is not None:
+    with st.expander("Original photo", expanded=True):
+        if uploaded_file is not None:
+            st.image(uploaded_file)
+        else:
+            st.warning("You haven't uploaded any photo yet")
+    in_mode = st.selectbox("Choose background color", ["Transparent (PNG)", "White", "Black", "Green", "Red", "Blue"])
+    in_submit = st.button("Submit")
+    if uploaded_file is not None and in_submit:
+        img_input = Image.open(uploaded_file)
+        with st.spinner("AI is doing magic to your photo. Please wait..."):
+            hexmap = {
+                "Transparent (PNG)": "#000000",
+                "Black": "#000000",
+                "White": "#FFFFFF",
+                "Green": "#22EE22",
+                "Red": "#EE2222",
+                "Blue": "#2222EE",
+            }
+            alpha = 0.0 if in_mode == "Transparent (PNG)" else 1.0
+            img_matte = matte(img_input)
+            img_output = change_background(img_input, img_matte, background_alpha=alpha, background_hex=hexmap[in_mode])
+        with st.expander("Success!", expanded=True):
+            st.image(img_output)
+            uploaded_name = os.path.splitext(uploaded_file.name)[0]
+            image_download_button(
+                pil_image=img_output,
+                filename=uploaded_name,
+                fmt="png"
+            )

assets/demo.jpg ADDED Viewed

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+version: '3'
+services:
+  st-remove-photo-background:
+    build: .
+    container_name: st-remove-photo-background
+    restart: unless-stopped
+    ports:
+    - 51001:8501
+    volumes:
+    - .:/app
+    environment:
+    - TZ=Asia/Jakarta

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch
+torchvision
+numpy
+opencv-python-headless
+matplotlib
+streamlit

src/__init__.py ADDED Viewed

File without changes

src/models/__init__.py ADDED Viewed

File without changes

src/models/backbones/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .wrapper import *
+#------------------------------------------------------------------------------
+#  Replaceable Backbones
+#------------------------------------------------------------------------------
+SUPPORTED_BACKBONES = {
+    'mobilenetv2': MobileNetV2Backbone,
+}

src/models/backbones/mobilenetv2.py ADDED Viewed

	@@ -0,0 +1,199 @@

+""" This file is adapted from https://github.com/thuyngch/Human-Segmentation-PyTorch"""
+import math
+import json
+from functools import reduce
+import torch
+from torch import nn
+#------------------------------------------------------------------------------
+#  Useful functions
+#------------------------------------------------------------------------------
+def _make_divisible(v, divisor, min_value=None):
+	if min_value is None:
+		min_value = divisor
+	new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+	# Make sure that round down does not go down by more than 10%.
+	if new_v < 0.9 * v:
+		new_v += divisor
+	return new_v
+def conv_bn(inp, oup, stride):
+	return nn.Sequential(
+		nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+		nn.BatchNorm2d(oup),
+		nn.ReLU6(inplace=True)
+	)
+def conv_1x1_bn(inp, oup):
+	return nn.Sequential(
+		nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+		nn.BatchNorm2d(oup),
+		nn.ReLU6(inplace=True)
+	)
+#------------------------------------------------------------------------------
+#  Class of Inverted Residual block
+#------------------------------------------------------------------------------
+class InvertedResidual(nn.Module):
+	def __init__(self, inp, oup, stride, expansion, dilation=1):
+		super(InvertedResidual, self).__init__()
+		self.stride = stride
+		assert stride in [1, 2]
+		hidden_dim = round(inp * expansion)
+		self.use_res_connect = self.stride == 1 and inp == oup
+		if expansion == 1:
+			self.conv = nn.Sequential(
+				# dw
+				nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
+				nn.BatchNorm2d(hidden_dim),
+				nn.ReLU6(inplace=True),
+				# pw-linear
+				nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+				nn.BatchNorm2d(oup),
+			)
+		else:
+			self.conv = nn.Sequential(
+				# pw
+				nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
+				nn.BatchNorm2d(hidden_dim),
+				nn.ReLU6(inplace=True),
+				# dw
+				nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
+				nn.BatchNorm2d(hidden_dim),
+				nn.ReLU6(inplace=True),
+				# pw-linear
+				nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+				nn.BatchNorm2d(oup),
+			)
+	def forward(self, x):
+		if self.use_res_connect:
+			return x + self.conv(x)
+		else:
+			return self.conv(x)
+#------------------------------------------------------------------------------
+#  Class of MobileNetV2
+#------------------------------------------------------------------------------
+class MobileNetV2(nn.Module):
+	def __init__(self, in_channels, alpha=1.0, expansion=6, num_classes=1000):
+		super(MobileNetV2, self).__init__()
+		self.in_channels = in_channels
+		self.num_classes = num_classes
+		input_channel = 32
+		last_channel = 1280
+		interverted_residual_setting = [
+			# t, c, n, s
+			[1        , 16, 1, 1],
+			[expansion, 24, 2, 2],
+			[expansion, 32, 3, 2],
+			[expansion, 64, 4, 2],
+			[expansion, 96, 3, 1],
+			[expansion, 160, 3, 2],
+			[expansion, 320, 1, 1],
+		]
+		# building first layer
+		input_channel = _make_divisible(input_channel*alpha, 8)
+		self.last_channel = _make_divisible(last_channel*alpha, 8) if alpha > 1.0 else last_channel
+		self.features = [conv_bn(self.in_channels, input_channel, 2)]
+		# building inverted residual blocks
+		for t, c, n, s in interverted_residual_setting:
+			output_channel = _make_divisible(int(c*alpha), 8)
+			for i in range(n):
+				if i == 0:
+					self.features.append(InvertedResidual(input_channel, output_channel, s, expansion=t))
+				else:
+					self.features.append(InvertedResidual(input_channel, output_channel, 1, expansion=t))
+				input_channel = output_channel
+		# building last several layers
+		self.features.append(conv_1x1_bn(input_channel, self.last_channel))
+		# make it nn.Sequential
+		self.features = nn.Sequential(*self.features)
+		# building classifier
+		if self.num_classes is not None:
+			self.classifier = nn.Sequential(
+				nn.Dropout(0.2),
+				nn.Linear(self.last_channel, num_classes),
+			)
+		# Initialize weights
+		self._init_weights()
+	def forward(self, x):
+		# Stage1
+		x = self.features[0](x)
+		x = self.features[1](x)
+		# Stage2
+		x = self.features[2](x)
+		x = self.features[3](x)
+		# Stage3
+		x = self.features[4](x)
+		x = self.features[5](x)
+		x = self.features[6](x)
+		# Stage4
+		x = self.features[7](x)
+		x = self.features[8](x)
+		x = self.features[9](x)
+		x = self.features[10](x)
+		x = self.features[11](x)
+		x = self.features[12](x)
+		x = self.features[13](x)
+		# Stage5
+		x = self.features[14](x)
+		x = self.features[15](x)
+		x = self.features[16](x)
+		x = self.features[17](x)
+		x = self.features[18](x)
+		# Classification
+		if self.num_classes is not None:
+			x = x.mean(dim=(2,3))
+			x = self.classifier(x)
+		# Output
+		return x
+	def _load_pretrained_model(self, pretrained_file):
+		pretrain_dict = torch.load(pretrained_file, map_location='cpu')
+		model_dict = {}
+		state_dict = self.state_dict()
+		print("[MobileNetV2] Loading pretrained model...")
+		for k, v in pretrain_dict.items():
+			if k in state_dict:
+				model_dict[k] = v
+			else:
+				print(k, "is ignored")
+		state_dict.update(model_dict)
+		self.load_state_dict(state_dict)
+	def _init_weights(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+				m.weight.data.normal_(0, math.sqrt(2. / n))
+				if m.bias is not None:
+					m.bias.data.zero_()
+			elif isinstance(m, nn.BatchNorm2d):
+				m.weight.data.fill_(1)
+				m.bias.data.zero_()
+			elif isinstance(m, nn.Linear):
+				n = m.weight.size(1)
+				m.weight.data.normal_(0, 0.01)
+				m.bias.data.zero_()

src/models/backbones/wrapper.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import os
+from functools import reduce
+import torch
+import torch.nn as nn
+from .mobilenetv2 import MobileNetV2
+class BaseBackbone(nn.Module):
+    """ Superclass of Replaceable Backbone Model for Semantic Estimation
+    """
+    def __init__(self, in_channels):
+        super(BaseBackbone, self).__init__()
+        self.in_channels = in_channels
+        self.model = None
+        self.enc_channels = []
+    def forward(self, x):
+        raise NotImplementedError
+    def load_pretrained_ckpt(self):
+        raise NotImplementedError
+class MobileNetV2Backbone(BaseBackbone):
+    """ MobileNetV2 Backbone
+    """
+    def __init__(self, in_channels):
+        super(MobileNetV2Backbone, self).__init__(in_channels)
+        self.model = MobileNetV2(self.in_channels, alpha=1.0, expansion=6, num_classes=None)
+        self.enc_channels = [16, 24, 32, 96, 1280]
+    def forward(self, x):
+        # x = reduce(lambda x, n: self.model.features[n](x), list(range(0, 2)), x)
+        x = self.model.features[0](x)
+        x = self.model.features[1](x)
+        enc2x = x
+        # x = reduce(lambda x, n: self.model.features[n](x), list(range(2, 4)), x)
+        x = self.model.features[2](x)
+        x = self.model.features[3](x)
+        enc4x = x
+        # x = reduce(lambda x, n: self.model.features[n](x), list(range(4, 7)), x)
+        x = self.model.features[4](x)
+        x = self.model.features[5](x)
+        x = self.model.features[6](x)
+        enc8x = x
+        # x = reduce(lambda x, n: self.model.features[n](x), list(range(7, 14)), x)
+        x = self.model.features[7](x)
+        x = self.model.features[8](x)
+        x = self.model.features[9](x)
+        x = self.model.features[10](x)
+        x = self.model.features[11](x)
+        x = self.model.features[12](x)
+        x = self.model.features[13](x)
+        enc16x = x
+        # x = reduce(lambda x, n: self.model.features[n](x), list(range(14, 19)), x)
+        x = self.model.features[14](x)
+        x = self.model.features[15](x)
+        x = self.model.features[16](x)
+        x = self.model.features[17](x)
+        x = self.model.features[18](x)
+        enc32x = x
+        return [enc2x, enc4x, enc8x, enc16x, enc32x]
+    def load_pretrained_ckpt(self):
+        # the pre-trained model is provided by https://github.com/thuyngch/Human-Segmentation-PyTorch
+        ckpt_path = './pretrained/mobilenetv2_human_seg.ckpt'
+        if not os.path.exists(ckpt_path):
+            print('cannot find the pretrained mobilenetv2 backbone')
+            exit()
+        ckpt = torch.load(ckpt_path)
+        self.model.load_state_dict(ckpt)

src/models/modnet.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .backbones import SUPPORTED_BACKBONES
+#------------------------------------------------------------------------------
+#  MODNet Basic Modules
+#------------------------------------------------------------------------------
+class IBNorm(nn.Module):
+    """ Combine Instance Norm and Batch Norm into One Layer
+    """
+    def __init__(self, in_channels):
+        super(IBNorm, self).__init__()
+        in_channels = in_channels
+        self.bnorm_channels = int(in_channels / 2)
+        self.inorm_channels = in_channels - self.bnorm_channels
+        self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
+        self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
+    def forward(self, x):
+        bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
+        in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
+        return torch.cat((bn_x, in_x), 1)
+class Conv2dIBNormRelu(nn.Module):
+    """ Convolution + IBNorm + ReLu
+    """
+    def __init__(self, in_channels, out_channels, kernel_size,
+                 stride=1, padding=0, dilation=1, groups=1, bias=True,
+                 with_ibn=True, with_relu=True):
+        super(Conv2dIBNormRelu, self).__init__()
+        layers = [
+            nn.Conv2d(in_channels, out_channels, kernel_size,
+                      stride=stride, padding=padding, dilation=dilation,
+                      groups=groups, bias=bias)
+        ]
+        if with_ibn:
+            layers.append(IBNorm(out_channels))
+        if with_relu:
+            layers.append(nn.ReLU(inplace=True))
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x):
+        return self.layers(x)
+class SEBlock(nn.Module):
+    """ SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
+    """
+    def __init__(self, in_channels, out_channels, reduction=1):
+        super(SEBlock, self).__init__()
+        self.pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(in_channels, int(in_channels // reduction), bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(int(in_channels // reduction), out_channels, bias=False),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        w = self.pool(x).view(b, c)
+        w = self.fc(w).view(b, c, 1, 1)
+        return x * w.expand_as(x)
+#------------------------------------------------------------------------------
+#  MODNet Branches
+#------------------------------------------------------------------------------
+class LRBranch(nn.Module):
+    """ Low Resolution Branch of MODNet
+    """
+    def __init__(self, backbone):
+        super(LRBranch, self).__init__()
+        enc_channels = backbone.enc_channels
+        self.backbone = backbone
+        self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
+        self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
+        self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
+        self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
+    def forward(self, img, inference):
+        enc_features = self.backbone.forward(img)
+        enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
+        enc32x = self.se_block(enc32x)
+        lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr16x = self.conv_lr16x(lr16x)
+        lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr8x = self.conv_lr8x(lr8x)
+        pred_semantic = None
+        if not inference:
+            lr = self.conv_lr(lr8x)
+            pred_semantic = torch.sigmoid(lr)
+        return pred_semantic, lr8x, [enc2x, enc4x]
+class HRBranch(nn.Module):
+    """ High Resolution Branch of MODNet
+    """
+    def __init__(self, hr_channels, enc_channels):
+        super(HRBranch, self).__init__()
+        self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
+        self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
+        self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
+        self.conv_hr4x = nn.Sequential(
+            Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
+        )
+        self.conv_hr2x = nn.Sequential(
+            Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
+        )
+        self.conv_hr = nn.Sequential(
+            Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
+            Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
+        )
+    def forward(self, img, enc2x, enc4x, lr8x, inference):
+        img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False)
+        img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False)
+        enc2x = self.tohr_enc2x(enc2x)
+        hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
+        enc4x = self.tohr_enc4x(enc4x)
+        hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
+        lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
+        hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
+        hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
+        hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
+        pred_detail = None
+        if not inference:
+            hr = F.interpolate(hr2x, scale_factor=2, mode='bilinear', align_corners=False)
+            hr = self.conv_hr(torch.cat((hr, img), dim=1))
+            pred_detail = torch.sigmoid(hr)
+        return pred_detail, hr2x
+class FusionBranch(nn.Module):
+    """ Fusion Branch of MODNet
+    """
+    def __init__(self, hr_channels, enc_channels):
+        super(FusionBranch, self).__init__()
+        self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
+        self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
+        self.conv_f = nn.Sequential(
+            Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
+            Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
+        )
+    def forward(self, img, lr8x, hr2x):
+        lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
+        lr4x = self.conv_lr4x(lr4x)
+        lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
+        f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
+        f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
+        f = self.conv_f(torch.cat((f, img), dim=1))
+        pred_matte = torch.sigmoid(f)
+        return pred_matte
+#------------------------------------------------------------------------------
+#  MODNet
+#------------------------------------------------------------------------------
+class MODNet(nn.Module):
+    """ Architecture of MODNet
+    """
+    def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True):
+        super(MODNet, self).__init__()
+        self.in_channels = in_channels
+        self.hr_channels = hr_channels
+        self.backbone_arch = backbone_arch
+        self.backbone_pretrained = backbone_pretrained
+        self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels)
+        self.lr_branch = LRBranch(self.backbone)
+        self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
+        self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                self._init_conv(m)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
+                self._init_norm(m)
+        if self.backbone_pretrained:
+            self.backbone.load_pretrained_ckpt()
+    def forward(self, img, inference):
+        pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(img, inference)
+        pred_detail, hr2x = self.hr_branch(img, enc2x, enc4x, lr8x, inference)
+        pred_matte = self.f_branch(img, lr8x, hr2x)
+        return pred_semantic, pred_detail, pred_matte
+    def freeze_norm(self):
+        norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
+        for m in self.modules():
+            for n in norm_types:
+                if isinstance(m, n):
+                    m.eval()
+                    continue
+    def _init_conv(self, conv):
+        nn.init.kaiming_uniform_(
+            conv.weight, a=0, mode='fan_in', nonlinearity='relu')
+        if conv.bias is not None:
+            nn.init.constant_(conv.bias, 0)
+    def _init_norm(self, norm):
+        if norm.weight is not None:
+            nn.init.constant_(norm.weight, 1)
+            nn.init.constant_(norm.bias, 0)

src/st_style.py ADDED Viewed

	@@ -0,0 +1,42 @@

+button_style = """
+<style>
+div.stButton > button:first-child {
+    background-color: rgb(255, 75, 75);
+    color: rgb(255, 255, 255);
+}
+div.stButton > button:hover {
+    background-color: rgb(255, 75, 75);
+    color: rgb(255, 255, 255);
+}
+div.stButton > button:active {
+    background-color: rgb(255, 75, 75);
+    color: rgb(255, 255, 255);
+}
+div.stButton > button:focus {
+    background-color: rgb(255, 75, 75);
+    color: rgb(255, 255, 255);
+}
+.css-1cpxqw2:focus:not(:active) {
+    background-color: rgb(255, 75, 75);
+    border-color: rgb(255, 75, 75);
+    color: rgb(255, 255, 255);
+}
+"""
+style = """
+<style>
+#MainMenu {
+    visibility: hidden;
+}
+footer {
+    visibility: hidden;
+}
+header {
+    visibility: hidden;
+}
+</style>
+"""
+def apply_prod_style(st):
+    return st.markdown(style, unsafe_allow_html=True)

src/trainer.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import math
+import scipy
+import numpy as np
+from scipy.ndimage import grey_dilation, grey_erosion
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = [
+    'supervised_training_iter',
+    'soc_adaptation_iter',
+]
+# ----------------------------------------------------------------------------------
+# Tool Classes/Functions
+# ----------------------------------------------------------------------------------
+class GaussianBlurLayer(nn.Module):
+    """ Add Gaussian Blur to a 4D tensors
+    This layer takes a 4D tensor of {N, C, H, W} as input.
+    The Gaussian blur will be performed in given channel number (C) splitly.
+    """
+    def __init__(self, channels, kernel_size):
+        """
+        Arguments:
+            channels (int): Channel for input tensor
+            kernel_size (int): Size of the kernel used in blurring
+        """
+        super(GaussianBlurLayer, self).__init__()
+        self.channels = channels
+        self.kernel_size = kernel_size
+        assert self.kernel_size % 2 != 0
+        self.op = nn.Sequential(
+            nn.ReflectionPad2d(math.floor(self.kernel_size / 2)),
+            nn.Conv2d(channels, channels, self.kernel_size,
+                      stride=1, padding=0, bias=None, groups=channels)
+        )
+        self._init_kernel()
+    def forward(self, x):
+        """
+        Arguments:
+            x (torch.Tensor): input 4D tensor
+        Returns:
+            torch.Tensor: Blurred version of the input
+        """
+        if not len(list(x.shape)) == 4:
+            print('\'GaussianBlurLayer\' requires a 4D tensor as input\n')
+            exit()
+        elif not x.shape[1] == self.channels:
+            print('In \'GaussianBlurLayer\', the required channel ({0}) is'
+                  'not the same as input ({1})\n'.format(self.channels, x.shape[1]))
+            exit()
+        return self.op(x)
+    def _init_kernel(self):
+        sigma = 0.3 * ((self.kernel_size - 1) * 0.5 - 1) + 0.8
+        n = np.zeros((self.kernel_size, self.kernel_size))
+        i = math.floor(self.kernel_size / 2)
+        n[i, i] = 1
+        kernel = scipy.ndimage.gaussian_filter(n, sigma)
+        for name, param in self.named_parameters():
+            param.data.copy_(torch.from_numpy(kernel))
+# ----------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------
+# MODNet Training Functions
+# ----------------------------------------------------------------------------------
+blurer = GaussianBlurLayer(1, 3).cuda()
+def supervised_training_iter(
+    modnet, optimizer, image, trimap, gt_matte,
+    semantic_scale=10.0, detail_scale=10.0, matte_scale=1.0):
+    """ Supervised training iteration of MODNet
+    This function trains MODNet for one iteration in a labeled dataset.
+    Arguments:
+        modnet (torch.nn.Module): instance of MODNet
+        optimizer (torch.optim.Optimizer): optimizer for supervised training
+        image (torch.autograd.Variable): input RGB image
+                                         its pixel values should be normalized
+        trimap (torch.autograd.Variable): trimap used to calculate the losses
+                                          its pixel values can be 0, 0.5, or 1
+                                          (foreground=1, background=0, unknown=0.5)
+        gt_matte (torch.autograd.Variable): ground truth alpha matte
+                                            its pixel values are between [0, 1]
+        semantic_scale (float): scale of the semantic loss
+                                NOTE: please adjust according to your dataset
+        detail_scale (float): scale of the detail loss
+                              NOTE: please adjust according to your dataset
+        matte_scale (float): scale of the matte loss
+                             NOTE: please adjust according to your dataset
+    Returns:
+        semantic_loss (torch.Tensor): loss of the semantic estimation [Low-Resolution (LR) Branch]
+        detail_loss (torch.Tensor): loss of the detail prediction [High-Resolution (HR) Branch]
+        matte_loss (torch.Tensor): loss of the semantic-detail fusion [Fusion Branch]
+    Example:
+        import torch
+        from src.models.modnet import MODNet
+        from src.trainer import supervised_training_iter
+        bs = 16         # batch size
+        lr = 0.01       # learn rate
+        epochs = 40     # total epochs
+        modnet = torch.nn.DataParallel(MODNet()).cuda()
+        optimizer = torch.optim.SGD(modnet.parameters(), lr=lr, momentum=0.9)
+        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(0.25 * epochs), gamma=0.1)
+        dataloader = CREATE_YOUR_DATALOADER(bs)     # NOTE: please finish this function
+        for epoch in range(0, epochs):
+            for idx, (image, trimap, gt_matte) in enumerate(dataloader):
+                semantic_loss, detail_loss, matte_loss = \
+                    supervised_training_iter(modnet, optimizer, image, trimap, gt_matte)
+            lr_scheduler.step()
+    """
+    global blurer
+    # set the model to train mode and clear the optimizer
+    modnet.train()
+    optimizer.zero_grad()
+    # forward the model
+    pred_semantic, pred_detail, pred_matte = modnet(image, False)
+    # calculate the boundary mask from the trimap
+    boundaries = (trimap < 0.5) + (trimap > 0.5)
+    # calculate the semantic loss
+    gt_semantic = F.interpolate(gt_matte, scale_factor=1/16, mode='bilinear')
+    gt_semantic = blurer(gt_semantic)
+    semantic_loss = torch.mean(F.mse_loss(pred_semantic, gt_semantic))
+    semantic_loss = semantic_scale * semantic_loss
+    # calculate the detail loss
+    pred_boundary_detail = torch.where(boundaries, trimap, pred_detail)
+    gt_detail = torch.where(boundaries, trimap, gt_matte)
+    detail_loss = torch.mean(F.l1_loss(pred_boundary_detail, gt_detail))
+    detail_loss = detail_scale * detail_loss
+    # calculate the matte loss
+    pred_boundary_matte = torch.where(boundaries, trimap, pred_matte)
+    matte_l1_loss = F.l1_loss(pred_matte, gt_matte) + 4.0 * F.l1_loss(pred_boundary_matte, gt_matte)
+    matte_compositional_loss = F.l1_loss(image * pred_matte, image * gt_matte) \
+        + 4.0 * F.l1_loss(image * pred_boundary_matte, image * gt_matte)
+    matte_loss = torch.mean(matte_l1_loss + matte_compositional_loss)
+    matte_loss = matte_scale * matte_loss
+    # calculate the final loss, backward the loss, and update the model
+    loss = semantic_loss + detail_loss + matte_loss
+    loss.backward()
+    optimizer.step()
+    # for test
+    return semantic_loss, detail_loss, matte_loss
+def soc_adaptation_iter(
+    modnet, backup_modnet, optimizer, image,
+    soc_semantic_scale=100.0, soc_detail_scale=1.0):
+    """ Self-Supervised sub-objective consistency (SOC) adaptation iteration of MODNet
+    This function fine-tunes MODNet for one iteration in an unlabeled dataset.
+    Note that SOC can only fine-tune a converged MODNet, i.e., MODNet that has been
+    trained in a labeled dataset.
+    Arguments:
+        modnet (torch.nn.Module): instance of MODNet
+        backup_modnet (torch.nn.Module): backup of the trained MODNet
+        optimizer (torch.optim.Optimizer): optimizer for self-supervised SOC
+        image (torch.autograd.Variable): input RGB image
+                                         its pixel values should be normalized
+        soc_semantic_scale (float): scale of the SOC semantic loss
+                                    NOTE: please adjust according to your dataset
+        soc_detail_scale (float): scale of the SOC detail loss
+                                  NOTE: please adjust according to your dataset
+    Returns:
+        soc_semantic_loss (torch.Tensor): loss of the semantic SOC
+        soc_detail_loss (torch.Tensor): loss of the detail SOC
+    Example:
+        import copy
+        import torch
+        from src.models.modnet import MODNet
+        from src.trainer import soc_adaptation_iter
+        bs = 1          # batch size
+        lr = 0.00001    # learn rate
+        epochs = 10     # total epochs
+        modnet = torch.nn.DataParallel(MODNet()).cuda()
+        modnet = LOAD_TRAINED_CKPT()    # NOTE: please finish this function
+        optimizer = torch.optim.Adam(modnet.parameters(), lr=lr, betas=(0.9, 0.99))
+        dataloader = CREATE_YOUR_DATALOADER(bs)     # NOTE: please finish this function
+        for epoch in range(0, epochs):
+            backup_modnet = copy.deepcopy(modnet)
+            for idx, (image) in enumerate(dataloader):
+                soc_semantic_loss, soc_detail_loss = \
+                    soc_adaptation_iter(modnet, backup_modnet, optimizer, image)
+    """
+    global blurer
+    # set the backup model to eval mode
+    backup_modnet.eval()
+    # set the main model to train mode and freeze its norm layers
+    modnet.train()
+    modnet.module.freeze_norm()
+    # clear the optimizer
+    optimizer.zero_grad()
+    # forward the main model
+    pred_semantic, pred_detail, pred_matte = modnet(image, False)
+    # forward the backup model
+    with torch.no_grad():
+        _, pred_backup_detail, pred_backup_matte = backup_modnet(image, False)
+    # calculate the boundary mask from `pred_matte` and `pred_semantic`
+    pred_matte_fg = (pred_matte.detach() > 0.1).float()
+    pred_semantic_fg = (pred_semantic.detach() > 0.1).float()
+    pred_semantic_fg = F.interpolate(pred_semantic_fg, scale_factor=16, mode='bilinear')
+    pred_fg = pred_matte_fg * pred_semantic_fg
+    n, c, h, w = pred_matte.shape
+    np_pred_fg = pred_fg.data.cpu().numpy()
+    np_boundaries = np.zeros([n, c, h, w])
+    for sdx in range(0, n):
+        sample_np_boundaries = np_boundaries[sdx, 0, ...]
+        sample_np_pred_fg = np_pred_fg[sdx, 0, ...]
+        side = int((h + w) / 2 * 0.05)
+        dilated = grey_dilation(sample_np_pred_fg, size=(side, side))
+        eroded = grey_erosion(sample_np_pred_fg, size=(side, side))
+        sample_np_boundaries[np.where(dilated - eroded != 0)] = 1
+        np_boundaries[sdx, 0, ...] = sample_np_boundaries
+    boundaries = torch.tensor(np_boundaries).float().cuda()
+    # sub-objectives consistency between `pred_semantic` and `pred_matte`
+    # generate pseudo ground truth for `pred_semantic`
+    downsampled_pred_matte = blurer(F.interpolate(pred_matte, scale_factor=1/16, mode='bilinear'))
+    pseudo_gt_semantic = downsampled_pred_matte.detach()
+    pseudo_gt_semantic = pseudo_gt_semantic * (pseudo_gt_semantic > 0.01).float()
+    # generate pseudo ground truth for `pred_matte`
+    pseudo_gt_matte = pred_semantic.detach()
+    pseudo_gt_matte = pseudo_gt_matte * (pseudo_gt_matte > 0.01).float()
+    # calculate the SOC semantic loss
+    soc_semantic_loss = F.mse_loss(pred_semantic, pseudo_gt_semantic) + F.mse_loss(downsampled_pred_matte, pseudo_gt_matte)
+    soc_semantic_loss = soc_semantic_scale * torch.mean(soc_semantic_loss)
+    # NOTE: using the formulas in our paper to calculate the following losses has similar results
+    # sub-objectives consistency between `pred_detail` and `pred_backup_detail` (on boundaries only)
+    backup_detail_loss = boundaries * F.l1_loss(pred_detail, pred_backup_detail, reduction='none')
+    backup_detail_loss = torch.sum(backup_detail_loss, dim=(1,2,3)) / torch.sum(boundaries, dim=(1,2,3))
+    backup_detail_loss = torch.mean(backup_detail_loss)
+    # sub-objectives consistency between pred_matte` and `pred_backup_matte` (on boundaries only)
+    backup_matte_loss = boundaries * F.l1_loss(pred_matte, pred_backup_matte, reduction='none')
+    backup_matte_loss = torch.sum(backup_matte_loss, dim=(1,2,3)) / torch.sum(boundaries, dim=(1,2,3))
+    backup_matte_loss = torch.mean(backup_matte_loss)
+    soc_detail_loss = soc_detail_scale * (backup_detail_loss + backup_matte_loss)
+    # calculate the final loss, backward the loss, and update the model
+    loss = soc_semantic_loss + soc_detail_loss
+    loss.backward()
+    optimizer.step()
+    return soc_semantic_loss, soc_detail_loss
+# ----------------------------------------------------------------------------------

src/utils.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# Credits to https://github.com/ZHKKKe/MODNet for the model.
+import streamlit as st
+import numpy as np
+import matplotlib.pyplot as plt
+import time
+import os
+from PIL import Image, ImageColor
+from copy import deepcopy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+from src.models.modnet import MODNet
+from src.st_style import apply_prod_style
+# apply(st)
+MODEL = "./assets/modnet_photographic_portrait_matting.ckpt"
+def change_background(image, matte, background_alpha: float=1.0, background_hex: str="#000000"):
+    """
+    image: PIL Image (RGBA)
+    matte: PIL Image (grayscale, if 255 it is foreground)
+    background_alpha: float
+    background_hex: string
+    """
+    img = deepcopy(image)
+    if image.mode != "RGBA":
+        img = img.convert("RGBA")
+    background_color = ImageColor.getrgb(background_hex)
+    background_alpha = int(255 * background_alpha)
+    background = Image.new("RGBA", img.size, color=background_color + (background_alpha,))
+    background.paste(img, mask=matte)
+    return background
+def matte(image):
+    # define hyper-parameters
+    ref_size = 512
+    # define image to tensor transform
+    im_transform = transforms.Compose(
+        [
+            transforms.ToTensor(),
+            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+        ]
+    )
+    # create MODNet and load the pre-trained ckpt
+    modnet = MODNet(backbone_pretrained=False)
+    modnet = nn.DataParallel(modnet)
+    if torch.cuda.is_available():
+        modnet = modnet.cuda()
+        weights = torch.load(MODEL)
+    else:
+        weights = torch.load(MODEL, map_location=torch.device('cpu'))
+    modnet.load_state_dict(weights)
+    modnet.eval()
+    # read image
+    im = deepcopy(image)
+    # unify image channels to 3
+    im = np.asarray(im)
+    if len(im.shape) == 2:
+        im = im[:, :, None]
+    if im.shape[2] == 1:
+        im = np.repeat(im, 3, axis=2)
+    elif im.shape[2] == 4:
+        im = im[:, :, 0:3]
+    # convert image to PyTorch tensor
+    im = Image.fromarray(im)
+    im = im_transform(im)
+    # add mini-batch dim
+    im = im[None, :, :, :]
+    # resize image for input
+    im_b, im_c, im_h, im_w = im.shape
+    if max(im_h, im_w) < ref_size or min(im_h, im_w) > ref_size:
+        if im_w >= im_h:
+            im_rh = ref_size
+            im_rw = int(im_w / im_h * ref_size)
+        elif im_w < im_h:
+            im_rw = ref_size
+            im_rh = int(im_h / im_w * ref_size)
+    else:
+        im_rh = im_h
+        im_rw = im_w
+    im_rw = im_rw - im_rw % 32
+    im_rh = im_rh - im_rh % 32
+    im = F.interpolate(im, size=(im_rh, im_rw), mode='area')
+    # inference
+    _, _, matte = modnet(im.cuda() if torch.cuda.is_available() else im, True)
+    # resize and save matte
+    matte = F.interpolate(matte, size=(im_h, im_w), mode='area')
+    matte = matte[0][0].data.cpu().numpy()
+    return Image.fromarray(((matte * 255).astype('uint8')), mode='L')