Spaces:

joaogante
/

tf_xla_generate_benchmarks

Running

App Files Files Community

tf_xla_generate_benchmarks / app.py

joaogante HF staff

update generation type names

7e13cda about 2 years ago

raw

history blame contribute delete

No virus

10.9 kB

	import matplotlib
	matplotlib.use('Agg')

	import functools

	import gradio as gr
	import matplotlib.pyplot as plt
	import seaborn as sns
	import pandas as pd


	# benchmark order: pytorch, tf eager, tf xla; units = ms
	BENCHMARK_DATA = {
	"Greedy Decoding": {
	"DistilGPT2": {
	"T4": [336.22, 3976.23, 115.84],
	"3090": [158.38, 1835.82, 46.56],
	"A100": [371.49, 4073.84, 60.94],
	},
	"GPT2": {
	"T4": [607.31, 7140.23, 185.12],
	"3090": [297.03, 3308.31, 76.68],
	"A100": [691.75, 7323.60, 110.72],
	},
	"OPT-1.3B": {
	"T4": [1303.41, 15939.07, 1488.15],
	"3090": [428.33, 7259.43, 468.37],
	"A100": [1125.00, 16713.63, 384.52],
	},
	"GPTJ-6B": {
	"T4": [0, 0, 0],
	"3090": [0, 0, 0],
	"A100": [2664.28, 32783.09, 1440.06],
	},
	"T5 Small": {
	"T4": [99.88, 1527.73, 18.78],
	"3090": [55.09, 665.70, 9.25],
	"A100": [124.91, 1642.07, 13.72],
	},
	"T5 Base": {
	"T4": [416.56, 6095.05, 106.12],
	"3090": [223.00, 2503.28, 46.67],
	"A100": [550.76, 6504.11, 64.57],
	},
	"T5 Large": {
	"T4": [645.05, 9587.67, 225.17],
	"3090": [377.74, 4216.41, 97.92],
	"A100": [944.17, 10572.43, 116.52],
	},
	"T5 3B": {
	"T4": [1493.61, 13629.80, 1494.80],
	"3090": [694.75, 6316.79, 489.33],
	"A100": [1801.68, 16707.71, 411.93],
	},
	},
	"Sampling": {
	"DistilGPT2": {
	"T4": [617.40, 6078.81, 221.65],
	"3090": [310.37, 2843.73, 85.44],
	"A100": [729.05, 7140.05, 121.83],
	},
	"GPT2": {
	"T4": [1205.34, 12256.98, 378.69],
	"3090": [577.12, 5637.11, 160.02],
	"A100": [1377.68, 15605.72, 234.47],
	},
	"OPT-1.3B": {
	"T4": [2166.72, 19126.25, 2341.32],
	"3090": [706.50, 9616.97, 731.58],
	"A100": [2019.70, 28621.09, 690.36],
	},
	"GPTJ-6B": {
	"T4": [0, 0, 0],
	"3090": [0, 0, 0],
	"A100": [5150.35, 70554.07, 2744.49],
	},
	"T5 Small": {
	"T4": [235.93, 3599.47, 41.07],
	"3090": [100.41, 1093.33, 23.24],
	"A100": [267.42, 3366.73, 28.53],
	},
	"T5 Base": {
	"T4": [812.59, 7966.73, 196.85],
	"3090": [407.81, 4904.54, 97.56],
	"A100": [1033.05, 11521.97, 123.93],
	},
	"T5 Large": {
	"T4": [1114.22, 16433.31, 424.91],
	"3090": [647.61, 7184.71, 160.97],
	"A100": [1668.73, 19962.78, 200.75],
	},
	"T5 3B": {
	"T4": [2282.56, 20891.22, 2196.02],
	"3090": [1011.32, 9735.97, 734.40],
	"A100": [2769.64, 26440.65, 612.98],
	},
	},
	"Beam Search": {
	"DistilGPT2": {
	"T4": [2407.89, 19442.60, 3313.92],
	"3090": [998.52, 8286.03, 900.28],
	"A100": [2237.41, 21771.40, 760.47],
	},
	"GPT2": {
	"T4": [3767.43, 34813.93, 5559.42],
	"3090": [1633.04, 14606.93, 1533.55],
	"A100": [3705.43, 34586.23, 1295.87],
	},
	"OPT-1.3B": {
	"T4": [16649.82, 78500.33, 21894.31],
	"3090": [508518, 32822.81, 5762.46],
	"A100": [5967.32, 78334.56, 4096.38],
	},
	"GPTJ-6B": {
	"T4": [0, 0, 0],
	"3090": [0, 0, 0],
	"A100": [15119.10, 134000.40, 10214.17],
	},
	"T5 Small": {
	"T4": [283.64, 25089.12, 1391.66],
	"3090": [137.38, 10680.28, 486.96],
	"A100": [329.28, 24747.38, 513.99],
	},
	"T5 Base": {
	"T4": [1383.21, 44809.14, 3920.40],
	"3090": [723.11, 18657.48, 1258.60],
	"A100": [2360.85, 45085.07, 1107.58],
	},
	"T5 Large": {
	"T4": [1663.50, 81902.41, 9551.29],
	"3090": [922.53, 35524.30, 2838.86],
	"A100": [2168.22, 86890.00, 2373.04],
	},
	"T5 3B": {
	"T4": [0, 0, 0],
	"3090": [1521.05, 35337.30, 8282.09],
	"A100": [3162.54, 88453.65, 5585.20],
	},
	},
	}
	FIGURE_PATH = "plt.png"
	FIG_DPI = 300


	def get_plot(model_name, plot_eager, generate_type):
	df = pd.DataFrame(BENCHMARK_DATA[generate_type][model_name])
	df["framework"] = ["PyTorch", "TF (Eager Execution)", "TF (XLA)"]
	df = pd.melt(df, id_vars=["framework"], value_vars=["T4", "3090", "A100"])
	if plot_eager == "No":
	df = df[df["framework"] != "TF (Eager Execution)"]

	g = sns.catplot(
	data=df,
	kind="bar",
	x="variable",
	y="value",
	hue="framework",
	palette={"PyTorch": "blue", "TF (Eager Execution)": "orange", "TF (XLA)": "red"},
	alpha=.9,
	)
	g.despine(left=True)
	g.set_axis_labels("GPU", "Generation time (ms)")
	g.legend.set_title("Framework")

	# Add the number to the top of each bar
	ax = g.facet_axis(0, 0)
	for i in ax.containers:
	ax.bar_label(i,)

	plt.savefig(FIGURE_PATH, dpi=FIG_DPI)
	return FIGURE_PATH

	demo = gr.Blocks()

	with demo:
	gr.Markdown(
	"""
	# TensorFlow XLA Text Generation Benchmark
	Instructions:
	1. Pick a tab for the type of generation (or for benchmark information);
	2. Select a model from the dropdown menu;
	3. Optionally omit results from TensorFlow Eager Execution, if you wish to better compare the performance of
	PyTorch to TensorFlow with XLA.
	"""
	)
	with gr.Tabs():
	with gr.TabItem("Greedy Decoding"):
	plot_fn = functools.partial(get_plot, generate_type="Greedy Decoding")
	with gr.Row():
	with gr.Column():
	model_selector = gr.Dropdown(
	choices=["DistilGPT2", "GPT2", "OPT-1.3B", "GPTJ-6B", "T5 Small", "T5 Base", "T5 Large", "T5 3B"],
	value="T5 Small",
	label="Model",
	interactive=True,
	)
	eager_enabler = gr.Radio(
	["Yes", "No"],
	value="Yes",
	label="Plot TF Eager Execution?",
	interactive=True
	)
	gr.Markdown(
	"""
	### Greedy Decoding benchmark parameters
	- `max_new_tokens = 64`;
	- `pad_to_multiple_of = 64` for Tensorflow XLA models. Others do not pad (input prompts between 2 and 33 tokens).
	"""
	)
	plot = gr.Image(value=plot_fn("T5 Small", "Yes")) # Show plot when the gradio app is initialized
	model_selector.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
	eager_enabler.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
	with gr.TabItem("Sampling"):
	plot_fn = functools.partial(get_plot, generate_type="Sampling")
	with gr.Row():
	with gr.Column():
	model_selector = gr.Dropdown(
	choices=["DistilGPT2", "GPT2", "OPT-1.3B", "GPTJ-6B", "T5 Small", "T5 Base", "T5 Large", "T5 3B"],
	value="T5 Small",
	label="Model",
	interactive=True,
	)
	eager_enabler = gr.Radio(
	["Yes", "No"],
	value="Yes",
	label="Plot TF Eager Execution?",
	interactive=True
	)
	gr.Markdown(
	"""
	### Sampling benchmark parameters
	- `max_new_tokens = 128`;
	- `temperature = 2.0`;
	- `top_k = 50`;
	- `pad_to_multiple_of = 64` for Tensorflow XLA models. Others do not pad (input prompts between 2 and 33 tokens).
	"""
	)
	plot = gr.Image(value=plot_fn("T5 Small", "Yes")) # Show plot when the gradio app is initialized
	model_selector.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
	eager_enabler.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
	with gr.TabItem("Beam Search"):
	plot_fn = functools.partial(get_plot, generate_type="Beam Search")
	with gr.Row():
	with gr.Column():
	model_selector = gr.Dropdown(
	choices=["DistilGPT2", "GPT2", "OPT-1.3B", "GPTJ-6B", "T5 Small", "T5 Base", "T5 Large", "T5 3B"],
	value="T5 Small",
	label="Model",
	interactive=True,
	)
	eager_enabler = gr.Radio(
	["Yes", "No"],
	value="Yes",
	label="Plot TF Eager Execution?",
	interactive=True
	)
	gr.Markdown(
	"""
	### Beam Search benchmark parameters
	- `max_new_tokens = 256`;
	- `num_beams = 16`;
	- `pad_to_multiple_of = 64` for Tensorflow XLA models. Others do not pad (input prompts between 2 and 33 tokens).
	"""
	)
	plot = gr.Image(value=plot_fn("T5 Small", "Yes")) # Show plot when the gradio app is initialized
	model_selector.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
	eager_enabler.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
	with gr.TabItem("Benchmark Information"):
	gr.Dataframe(
	headers=["Parameter", "Value"],
	value=[
	["Transformers Version", "4.21"],
	["TensorFlow Version", "2.9.1"],
	["Pytorch Version", "1.11.0"],
	["OS", "22.04 LTS (3090) / Debian 10 (other GPUs)"],
	["CUDA", "11.6 (3090) / 11.3 (others GPUs)"],
	["Number of Runs", "100 (the first run was discarded to ignore compilation time)"],
	["Is there code to reproduce?", "Yes -- https://gist.github.com/gante/f0017e3f13ac11b0c02e4e4db351f52f"],
	],
	)

	demo.launch()