Spaces:

Intel
/

low_bit_open_llm_leaderboard

Running

App Files Files Community

low_bit_open_llm_leaderboard / app.py

wenjiao

refactor: update code for latest Gradio API

e0f982a 26 days ago

raw

history blame contribute delete

33.8 kB

	import os
	import gradio as gr
	import pandas as pd
	import re
	from apscheduler.schedulers.background import BackgroundScheduler
	from huggingface_hub import snapshot_download

	from src.display.about import (
	CITATION_BUTTON_LABEL,
	CITATION_BUTTON_TEXT,
	EVALUATION_QUEUE_TEXT,
	INTRODUCTION_TEXT,
	LLM_BENCHMARKS_TEXT,
	FAQ_TEXT,
	TITLE,
	)
	from src.display.css_html_js import custom_css
	from src.display.utils import (
	BENCHMARK_COLS,
	COLS,
	EVAL_COLS,
	EVAL_TYPES,
	NUMERIC_INTERVALS,
	NUMERIC_MODELSIZE,
	TYPES,
	auto_eval_cols,
	GroupDtype,
	ModelType,
	fields,
	WeightType,
	Precision,
	ComputeDtype,
	WeightDtype,
	QuantType
	)
	from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO, REPO, GIT_REQUESTS_PATH, GIT_STATUS_PATH, GIT_RESULTS_PATH
	from src.populate import get_evaluation_queue_df, get_leaderboard_df
	from src.submission.submit import add_new_eval
	from src.scripts.update_all_request_files import update_dynamic_files
	from src.tools.collections import update_collections
	from src.tools.plots import (
	create_metric_plot_obj,
	create_plot_df,
	create_scores_df,
	)
	import plotly.graph_objects as go

	selected_indices = []
	selected_values = {}
	selected_dropdown_weight = 'All'

	# Start ephemeral Spaces on PRs (see config in README.md)
	#enable_space_ci()

	precision_to_dtype = {
	"2bit": ["int2"],
	"3bit": ["int3"],
	"4bit": ["int4", "nf4", "fp4"],
	"8bit": ["int8"],
	"16bit": ['float16', 'bfloat16'],
	"32bit": ["float32"],
	"?": ["?"],
	}

	dtype_to_precision = {
	"int2": ["2bit"],
	"int3": ["3bit"],
	"int4": ["4bit"],
	"nf4": ["4bit"],
	"fp4": ["4bit"],
	"int8": ["8bit"],
	"float16": ["16bit"],
	"bfloat16": ["16bit"],
	"float32": ["32bit"],
	"?": ["?"],
	}

	current_weightDtype = ["int2", "int3", "int4", "nf4", "fp4", "?"]
	current_computeDtype = ['int8', 'bfloat16', 'float16', 'float32']
	current_quant = [t.to_str() for t in QuantType if t != QuantType.QuantType_None]
	current_precision = ['2bit', '3bit', '4bit', '8bit', '?']


	def display_sort(key):
	order = {"All": 0, "?": 1, "int2": 2, "int3": 3, "int4": 4, "fp4": 5, "nf4": 6, "float16": 7, "bfloat16": 8, "float32": 9}
	return order.get(key, float('inf'))

	def comp_display_sort(key):
	order = {"All": 0, "?": 1, "int8": 2, "float16": 3, "bfloat16": 4, "float32": 5}
	return order.get(key, float('inf'))

	def update_quantization_types(selected_quant):
	global current_weightDtype
	global current_computeDtype
	global current_quant
	global current_precision

	if set(current_quant) == set(selected_quant):
	return [
	gr.Dropdown(choices=current_weightDtype, value=selected_dropdown_weight),
	gr.Dropdown(choices=current_computeDtype, value="All"),
	gr.CheckboxGroup(value=current_precision),
	]

	# print('update_quantization_types', selected_quant, current_quant)
	if any(value != '✖ None' for value in selected_quant):
	selected_weight = ['All', '?', 'int2', 'int3', 'int4', 'nf4', 'fp4', 'int8']
	selected_compute = ['All', '?', 'int8', 'float16', 'bfloat16', 'float32']
	selected_precision = ["2bit", "3bit", "4bit", "8bit", "?"]

	current_weightDtype = selected_weight
	current_computeDtype = selected_compute
	current_quant = selected_quant
	current_precision = selected_precision

	return [
	gr.Dropdown(choices=selected_weight, value="All"),
	gr.Dropdown(choices=selected_compute, value="All"),
	gr.CheckboxGroup(value=selected_precision),
	]

	def update_Weight_Precision(temp_precisions):
	global current_weightDtype
	global current_computeDtype
	global current_quant
	global current_precision
	global selected_dropdown_weight

	# print('temp_precisions', temp_precisions)
	if set(current_precision) == set(temp_precisions):
	return [
	gr.Dropdown(choices=current_weightDtype, value=selected_dropdown_weight),
	gr.Dropdown(choices=current_computeDtype, value="All"),
	gr.CheckboxGroup(value=current_precision),
	gr.CheckboxGroup(value=current_quant),
	] # No update needed

	selected_weight = []
	selected_compute = ['All', '?', 'int8', 'float16', 'bfloat16', 'float32']
	selected_quant = [t.to_str() for t in QuantType if t != QuantType.QuantType_None]

	if temp_precisions[-1] in ["16bit", "32bit"]:
	selected_precisions = [p for p in temp_precisions if p in ["16bit", "32bit"]]
	else:
	selected_precisions = [p for p in temp_precisions if p not in ["16bit", "32bit"]]

	current_precision = list(set(selected_precisions))
	# print('selected_dropdown_weight', selected_dropdown_weight)

	if len(current_precision) > 1:
	selected_dropdown_weight = 'All'
	elif selected_dropdown_weight != 'All' and set(dtype_to_precision[selected_dropdown_weight]) != set(current_precision):
	selected_dropdown_weight = 'All'

	# print('final', current_precision)
	# Map selected_precisions to corresponding weights
	for precision in current_precision:
	if precision in precision_to_dtype:
	selected_weight.extend(precision_to_dtype[precision])

	# Special rules for 16bit and 32bit
	if "16bit" in current_precision:
	selected_weight = [option for option in selected_weight if option in ["All", "?", "float16", "bfloat16"]]
	if "int8" in selected_compute:
	selected_compute.remove("int8")

	if "32bit" in current_precision:
	selected_weight = [option for option in selected_weight if option in ["All", "?", "float32"]]
	if "int8" in selected_compute:
	selected_compute.remove("int8")

	if "16bit" in current_precision or "32bit" in current_precision:
	selected_quant = ['✖ None']
	if "16bit" in current_precision and "32bit" in current_precision:
	selected_weight = ["All", "?", "float16", "bfloat16", "float32"]
	# Ensure "All" and "?" options are included
	selected_weight = ["All", "?"] + [opt for opt in selected_weight if opt not in ["All", "?"]]
	selected_compute = ["All", "?"] + [opt for opt in selected_compute if opt not in ["All", "?"]]

	# Remove duplicates
	selected_weight = list(set(selected_weight))
	selected_compute = list(set(selected_compute))

	# Update global variables
	current_weightDtype = selected_weight
	current_computeDtype = selected_compute
	current_quant = selected_quant

	# Return updated components
	return [
	gr.Dropdown(choices=selected_weight, value=selected_dropdown_weight),
	gr.Dropdown(choices=selected_compute, value="All"),
	gr.CheckboxGroup(value=selected_precisions),
	gr.CheckboxGroup(value=selected_quant),
	]

	def update_Weight_Dtype(weight):
	global selected_dropdown_weight
	# print('update_Weight_Dtype', weight)
	# Initialize selected_precisions
	if weight == selected_dropdown_weight or weight == 'All':
	return current_precision
	else:
	selected_precisions = []
	selected_precisions.extend(dtype_to_precision[weight])
	selected_dropdown_weight = weight
	# print('selected_precisions', selected_precisions)
	# Return updated components
	return selected_precisions




	def restart_space():
	API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)


	def init_space(full_init: bool = True):

	if full_init:
	try:
	branch = REPO.active_branch.name
	REPO.remotes.origin.pull(branch)
	except Exception as e:
	# print(str(e))
	restart_space()

	try:
	# print(DYNAMIC_INFO_PATH)
	snapshot_download(
	repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
	)
	except Exception:
	restart_space()

	raw_data, original_df = get_leaderboard_df(
	results_path=GIT_RESULTS_PATH,
	requests_path=GIT_STATUS_PATH,
	dynamic_path=DYNAMIC_INFO_FILE_PATH,
	cols=COLS,
	benchmark_cols=BENCHMARK_COLS
	)
	# update_collections(original_df.copy())
	leaderboard_df = original_df.copy()

	plot_df = create_plot_df(create_scores_df(raw_data))

	(
	finished_eval_queue_df,
	running_eval_queue_df,
	pending_eval_queue_df,
	) = get_evaluation_queue_df(GIT_STATUS_PATH, EVAL_COLS)

	return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df

	leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()

	def str_to_bool(value):
	if str(value).lower() == "true":
	return True
	elif str(value).lower() == "false":
	return False
	else:
	return False

	# Searching and filtering
	def update_table(
	hidden_df: pd.DataFrame,
	columns: list,
	type_query: list,
	precision_query: str,
	size_query: list,
	params_query: list,
	hide_models: list,
	query: str,
	compute_dtype: str,
	weight_dtype: str,
	double_quant: str,
	group_dtype: str
	):
	global init_select
	global current_weightDtype
	global current_computeDtype

	if weight_dtype == ['All'] or weight_dtype == 'All':
	weight_dtype = current_weightDtype
	else:
	weight_dtype = [weight_dtype]

	if compute_dtype == 'All':
	compute_dtype = current_computeDtype
	else:
	compute_dtype = [compute_dtype]

	if group_dtype == 'All':
	group_dtype = [-1, 1024, 256, 128, 64, 32]
	else:
	try:
	group_dtype = [int(group_dtype)]
	except ValueError:
	group_dtype = [-1]

	if double_quant == 'All':
	double_quant = [True, False]
	else:
	double_quant = [str_to_bool(double_quant)]

	filtered_df = filter_models(df=hidden_df, type_query=type_query, size_query=size_query, precision_query=precision_query, hide_models=hide_models, compute_dtype=compute_dtype, weight_dtype=weight_dtype, double_quant=double_quant, group_dtype=group_dtype, params_query=params_query)
	filtered_df = filter_queries(query, filtered_df)
	df = select_columns(filtered_df, columns)
	return df


	def load_query(request: gr.Request): # triggered only once at startup => read query parameter if it exists
	query = request.query_params.get("query") or ""
	return query, query # return one for the "search_bar", one for a hidden component that triggers a reload only if value has changed


	def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
	return df[(df[auto_eval_cols.dummy.name].str.contains(query, case=False))]


	def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
	always_here_cols = [c.name for c in fields(auto_eval_cols) if c.never_hidden]
	dummy_col = [auto_eval_cols.dummy.name]
	# We use COLS to maintain sorting
	filtered_df = df[
	always_here_cols + [c for c in COLS if c in df.columns and c in columns] + dummy_col
	]
	return filtered_df


	def filter_queries(query: str, filtered_df: pd.DataFrame):
	"""Added by Abishek"""
	final_df = []
	if query != "":
	queries = [q.strip() for q in query.split(";")]
	for _q in queries:
	_q = _q.strip()
	if _q != "":
	temp_filtered_df = search_table(filtered_df, _q)
	if len(temp_filtered_df) > 0:
	final_df.append(temp_filtered_df)
	if len(final_df) > 0:
	filtered_df = pd.concat(final_df)
	filtered_df = filtered_df.drop_duplicates(
	subset=[auto_eval_cols.model.name, auto_eval_cols.precision.name, auto_eval_cols.revision.name]
	)

	return filtered_df


	def filter_models(
	df: pd.DataFrame, type_query: list, size_query: list, params_query:list, precision_query: list, hide_models: list, compute_dtype: list, weight_dtype: list, double_quant: list, group_dtype: list,
	) -> pd.DataFrame:
	# Show all models
	if "Private or deleted" in hide_models:
	filtered_df = df[df[auto_eval_cols.still_on_hub.name] == True]
	else:
	filtered_df = df

	if "Contains a merge/moerge" in hide_models:
	filtered_df = filtered_df[filtered_df[auto_eval_cols.merged.name] == False]

	if "MoE" in hide_models:
	filtered_df = filtered_df[filtered_df[auto_eval_cols.moe.name] == False]

	if "Flagged" in hide_models:
	filtered_df = filtered_df[filtered_df[auto_eval_cols.flagged.name] == False]

	type_emoji = [t[0] for t in type_query]
	if any(emoji != '✖' for emoji in type_emoji):
	type_emoji = [emoji for emoji in type_emoji if emoji != '✖']
	else:
	type_emoji = ['✖']

	filtered_df = filtered_df.loc[df[auto_eval_cols.model_type_symbol.name].isin(type_emoji)]
	filtered_df = filtered_df.loc[df[auto_eval_cols.precision.name].isin(precision_query + ["None"])]

	filtered_df = filtered_df.loc[df[auto_eval_cols.weight_dtype.name].isin(weight_dtype)]

	filtered_df = filtered_df.loc[df[auto_eval_cols.compute_dtype.name].isin(compute_dtype)]

	filtered_df = filtered_df.loc[df[auto_eval_cols.double_quant.name].isin(double_quant)]

	filtered_df = filtered_df.loc[df[auto_eval_cols.group_size.name].isin(group_dtype)]

	numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
	params_column = pd.to_numeric(df[auto_eval_cols.params.name], errors="coerce")
	mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
	filtered_df = filtered_df.loc[mask]

	numeric_interval_params = pd.IntervalIndex(sorted([NUMERIC_MODELSIZE[s] for s in params_query]))
	params_column_params = pd.to_numeric(df[auto_eval_cols.model_size.name], errors="coerce")
	mask_params = params_column_params.apply(lambda x: any(numeric_interval_params.contains(x)))
	filtered_df = filtered_df.loc[mask_params]

	return filtered_df

	def select(df, data: gr.SelectData):
	global selected_indices
	global selected_values

	selected_index = data.index[0]
	if selected_index in selected_indices:
	selected_indices.remove(selected_index)

	value = df.iloc[selected_index].iloc[1]
	pattern = r'<a[^>]+>([^<]+)</a>'
	match = re.search(pattern, value)
	if match:
	text_content = match.group(1)
	if text_content in selected_values:
	del selected_values[text_content]
	else:
	selected_indices.append(selected_index)

	value = df.iloc[selected_index].iloc[1]
	pattern = r'<a[^>]+>([^<]+)</a>'
	match = re.search(pattern, value)
	if match:
	text_content = match.group(1)
	selected_values[text_content] = value

	return gr.CheckboxGroup(list(selected_values.keys()), value=list(selected_values.keys()))

	def init_comparison_data():
	global selected_values
	return gr.CheckboxGroup(list(selected_values.keys()), value=list(selected_values.keys()))

	def remove_html_tags(value):
	if isinstance(value, str):
	return re.sub(r'<[^>]*>', '', value)
	return value

	def show_modal():
	return gr.update(visible=True, elem_classes="custom-modal")

	def close_modal_logic():
	return gr.update(visible=False, elem_classes="modal-hidden")

	def generate_spider_chart(df, selected_keys):
	global selected_values
	current_selected_values = [selected_values[key] for key in selected_keys if key in selected_values]
	selected_rows = df[df.iloc[:, 1].isin(current_selected_values)]
	cleaned_rows = selected_rows.map(remove_html_tags)


	fig = go.Figure()
	for _, row in selected_rows.iterrows():
	fig.add_trace(go.Scatterpolar(
	r=[row['Average ⬆️'], row['ARC-c'], row['ARC-e'], row['Boolq'], row['HellaSwag'], row['Lambada'], row['MMLU'], row['Openbookqa'], row['Piqa'], row['Truthfulqa'], row['Winogrande']],
	theta=['Average ⬆️', 'ARC-c', 'ARC-e', 'Boolq', 'HellaSwag', 'Lambada', 'MMLU', 'Openbookqa', 'Piqa', 'Truthfulqa', 'Winogrande'],
	fill='toself',
	name=str(row['Model'])
	))
	fig.update_layout(
	polar=dict(
	radialaxis=dict(
	visible=False,
	)),
	showlegend=True,
	margin=dict(l=50, r=50, t=50, b=50),
	height=400,
	autosize=True
	)

	return fig, cleaned_rows

	leaderboard_df = filter_models(
	df=leaderboard_df,
	type_query=[t.to_str(" : ") for t in QuantType if t != QuantType.QuantType_None],
	size_query=list(NUMERIC_INTERVALS.keys()),
	params_query=list(NUMERIC_MODELSIZE.keys()),
	precision_query=[i.value.name for i in Precision],
	hide_models=["Private or deleted", "Contains a merge/moerge", "Flagged"], # Deleted, merges, flagged, MoEs,
	compute_dtype=[i.value.name for i in ComputeDtype],
	weight_dtype=[i.value.name for i in WeightDtype],
	double_quant=[True, False],
	group_dtype=[-1, 1024, 256, 128, 64, 32]
	)


	demo = gr.Blocks(fill_width=True)
	with demo:

	with gr.Column(elem_classes="custom-modal", visible=False, elem_id="my-modal-container") as modal_window:
	with gr.Column(elem_classes="modal-content"):
	with gr.Column():
	comparison_plot_inside = gr.Plot()
	comparison_df_inside = gr.Dataframe(interactive=False)

	close_btn = gr.Button("Close", variant="primary")

	gr.HTML(TITLE)
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
	with gr.Row():
	with gr.Column():
	with gr.Row(variant="compact"):
	search_bar = gr.Textbox(
	placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
	show_label=False,
	elem_id="search-bar",
	)
	with gr.Row():
	shown_columns = gr.CheckboxGroup(
	choices=[
	c.name
	for c in fields(auto_eval_cols)
	if not c.hidden and not c.never_hidden and not c.dummy
	],
	value=[
	c.name
	for c in fields(auto_eval_cols)
	if c.displayed_by_default and not c.hidden and not c.never_hidden
	],
	label="Select columns to show",
	elem_id="column-select",
	interactive=True,
	)

	with gr.Row():
	filter_columns_parameters = gr.CheckboxGroup(
	label="Model parameters (in billions of parameters)",
	choices=list(NUMERIC_INTERVALS.keys()),
	value=list(NUMERIC_INTERVALS.keys()),
	interactive=True,
	elem_id="filter-columns-size",
	)
	with gr.Row():
	filter_columns_size = gr.CheckboxGroup(
	label="Model sizes (GB, int4)",
	choices=list(NUMERIC_MODELSIZE.keys()),
	value=list(NUMERIC_MODELSIZE.keys()),
	interactive=True,
	elem_id="filter-columns-size",
	)
	with gr.Column(min_width=320):
	#with gr.Box(elem_id="box-filter"):
	filter_columns_type = gr.CheckboxGroup(
	label="Quantization types",
	choices=[t.to_str() for t in QuantType if t != QuantType.QuantType_None],
	value=[t.to_str() for t in QuantType if t != QuantType.QuantType_None],
	interactive=True,
	elem_id="filter-columns-type",
	)
	filter_columns_precision = gr.CheckboxGroup(
	label="Weight precision",
	choices=[i.value.name for i in Precision],
	value=[i.value.name for i in Precision if ( i.value.name != '16bit' and i.value.name != '32bit')],
	interactive=True,
	elem_id="filter-columns-precision",
	)
	with gr.Column(elem_id="quant-config-container") as config:
	gr.HTML("<div class='quant-config-header'>Quantization config</div>")
	with gr.Row():
	filter_columns_computeDtype = gr.Dropdown(choices=[i.value.name for i in ComputeDtype], label="Compute Dtype", multiselect=False, value="All", interactive=True,)
	filter_columns_weightDtype = gr.Dropdown(choices=[i.value.name for i in WeightDtype], label="Weight Dtype", multiselect=False, value="All", interactive=True,)
	filter_columns_doubleQuant = gr.Dropdown(choices=["All", "True", "False"], label="Double Quant", multiselect=False, value="All", interactive=True)
	filter_columns_groupDtype = gr.Dropdown(choices=[i.value.name for i in GroupDtype], label="Group Size", multiselect=False, value="All", interactive=True,)

	with gr.Row():
	with gr.Column(scale=4):
	model_comparison = gr.CheckboxGroup(label="Accuracy Comparison (Selected Models from Table)", choices=list(selected_values.keys()), value=list(selected_values.keys()), interactive=True, elem_id="model_comparison")
	with gr.Column(scale=1, min_width=150):
	spider_btn = gr.Button("Compare", variant="primary", elem_id="compare-button-full")

	never_hidden_cols = [c.name for c in fields(auto_eval_cols) if c.never_hidden]

	user_cols = shown_columns.value

	if len(user_cols) > 0:
	first_user_col = [user_cols[0]]
	remaining_user_cols = user_cols[1:]

	final_cols = first_user_col + never_hidden_cols + remaining_user_cols
	else:
	final_cols = never_hidden_cols

	leaderboard_table = gr.components.Dataframe(
	value=leaderboard_df[final_cols + [auto_eval_cols.dummy.name]],
	headers=final_cols,
	datatype="markdown",
	elem_id="leaderboard-table",
	interactive=False,
	visible=True,
	)

	# with gr.BrowserModal(visible=False) as modal:
	# map = gr.Plot()
	# data_table = gr.Dataframe()
	# gr.Column([map, data_table])

	leaderboard_table.select(select, leaderboard_table, model_comparison)
	spider_btn.click(
	fn=show_modal,
	outputs=modal_window
	).then(
	fn=generate_spider_chart,
	inputs=[leaderboard_table, model_comparison],
	outputs=[comparison_plot_inside, comparison_df_inside]
	)
	close_btn.click(
	fn=close_modal_logic,
	outputs=modal_window
	)
	demo.load(init_comparison_data, None, model_comparison)

	if "Weight type" not in original_df.columns:
	original_df["Weight type"] = "Unknown"

	# Dummy leaderboard for handling the case when the user uses backspace key
	hidden_leaderboard_table_for_search = gr.components.Dataframe(
	value=original_df[COLS],
	headers=COLS,
	datatype=TYPES,
	visible=False,
	)

	hide_models = gr.Textbox(
	placeholder="",
	show_label=False,
	elem_id="search-bar",
	value="",
	visible=False,

	)

	search_bar.submit(
	update_table,
	[
	hidden_leaderboard_table_for_search,
	shown_columns,
	filter_columns_type,
	filter_columns_precision,
	filter_columns_parameters,
	filter_columns_size,
	hide_models,
	search_bar,
	filter_columns_computeDtype,
	filter_columns_weightDtype,
	filter_columns_doubleQuant,
	filter_columns_groupDtype
	],
	leaderboard_table,
	)

	"""

	# Define a hidden component that will trigger a reload only if a query parameter has been set
	hidden_search_bar = gr.Textbox(value="", visible=False)
	hidden_search_bar.change(
	update_table,
	[
	hidden_leaderboard_table_for_search,
	shown_columns,
	filter_columns_type,
	filter_columns_precision,
	filter_columns_size,
	hide_models,
	search_bar,
	],
	leaderboard_table,
	)
	# Check query parameter once at startup and update search bar + hidden component
	demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])

	"""
	filter_columns_type.change(
	update_quantization_types,
	[filter_columns_type],
	[filter_columns_weightDtype, filter_columns_computeDtype, filter_columns_precision]
	)

	filter_columns_precision.change(
	update_Weight_Precision,
	[filter_columns_precision],
	[filter_columns_weightDtype, filter_columns_computeDtype, filter_columns_precision, filter_columns_type]
	)

	filter_columns_weightDtype.change(
	update_Weight_Dtype,
	[filter_columns_weightDtype],
	[filter_columns_precision]
	)
	# filter_columns_computeDtype.change(
	# Compute_Dtype_update,
	# [filter_columns_computeDtype, filter_columns_precision],
	# [filter_columns_precision, filter_columns_type]
	# )



	for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, filter_columns_parameters, hide_models, filter_columns_computeDtype, filter_columns_weightDtype, filter_columns_doubleQuant, filter_columns_groupDtype]:
	selector.change(
	update_table,
	[
	hidden_leaderboard_table_for_search,
	shown_columns,
	filter_columns_type,
	filter_columns_precision,
	filter_columns_parameters,
	filter_columns_size,
	hide_models,
	search_bar,
	filter_columns_computeDtype,
	filter_columns_weightDtype,
	filter_columns_doubleQuant,
	filter_columns_groupDtype
	],
	leaderboard_table,
	queue=True,
	)


	with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
	with gr.Row():
	with gr.Column():
	chart = create_metric_plot_obj(
	plot_df,
	[auto_eval_cols.average.name],
	title="Average of Top Scores and Human Baseline Over Time (from last update)",
	)
	gr.Plot(value=chart, min_width=500)
	with gr.Column():
	chart = create_metric_plot_obj(
	plot_df,
	BENCHMARK_COLS,
	title="Top Scores and Human Baseline Over Time (from last update)",
	)
	gr.Plot(value=chart, min_width=500)
	with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
	gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

	with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=4):
	gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")

	with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=5):
	with gr.Column():
	with gr.Row():
	gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

	with gr.Row():
	gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")

	with gr.Row():
	with gr.Column():
	model_name_textbox = gr.Textbox(label="Model name")
	revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
	private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)

	with gr.Column():
	"""
	precision = gr.Dropdown(
	choices=[i.value.name for i in Precision if i != Precision.Unknown],
	label="Precision",
	multiselect=False,
	value="4bit",
	interactive=True,
	)
	weight_type = gr.Dropdown(
	choices=[i.value.name for i in WeightDtype],
	label="Weights dtype",
	multiselect=False,
	value="int4",
	interactive=True,
	)
	"""
	base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)",
	visible=not IS_PUBLIC)
	compute_type = gr.Dropdown(
	choices=[i.value.name for i in ComputeDtype if i.value.name != "All"],
	label="Compute dtype",
	multiselect=False,
	value="float16",
	interactive=True,
	)

	submit_button = gr.Button("Submit Eval")
	submission_result = gr.Markdown()
	submit_button.click(
	add_new_eval,
	[
	model_name_textbox,
	revision_name_textbox,
	private,
	compute_type,
	],
	submission_result,
	)

	with gr.Column():
	with gr.Accordion(
	f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
	open=False,
	):
	with gr.Row():
	finished_eval_table = gr.components.Dataframe(
	value=finished_eval_queue_df,
	headers=EVAL_COLS,
	datatype=EVAL_TYPES,
	row_count=5,
	)
	with gr.Accordion(
	f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
	open=False,
	):
	with gr.Row():
	running_eval_table = gr.components.Dataframe(
	value=running_eval_queue_df,
	headers=EVAL_COLS,
	datatype=EVAL_TYPES,
	row_count=5,
	)

	with gr.Accordion(
	f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
	open=False,
	):
	with gr.Row():
	pending_eval_table = gr.components.Dataframe(
	value=pending_eval_queue_df,
	headers=EVAL_COLS,
	datatype=EVAL_TYPES,
	row_count=5,
	)

	with gr.Row():
	with gr.Accordion("📙 Citation", open=False):
	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	lines=20,
	elem_id="citation-button",
	buttons=["copy"],
	)

	scheduler = BackgroundScheduler()
	scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h
	scheduler.add_job(update_dynamic_files, "interval", hours=12) # launched every 2 hour
	scheduler.start()

	demo.queue(default_concurrency_limit=40).launch(css=custom_css)