Commit
·
1d11c02
1
Parent(s):
64071e4
feat: Optimise colour mapping for visible models only
Browse files
app.py
CHANGED
|
@@ -127,6 +127,8 @@ paper](https://aclanthology.org/2023.nodalida-1.20):
|
|
| 127 |
|
| 128 |
UPDATE_FREQUENCY_MINUTES = 5
|
| 129 |
MIN_COLOUR_DISTANCE_BETWEEN_MODELS = 200
|
|
|
|
|
|
|
| 130 |
|
| 131 |
|
| 132 |
class Task(BaseModel):
|
|
@@ -170,12 +172,14 @@ INFORMATION_EXTRACTION = Task(name="information extraction", metric="micro_f1_no
|
|
| 170 |
ALL_TASKS = [obj for obj in globals().values() if isinstance(obj, Task)]
|
| 171 |
|
| 172 |
DANISH = Language(code="da", name="Danish")
|
| 173 |
-
NORWEGIAN = Language(code="no", name="Norwegian")
|
| 174 |
-
SWEDISH = Language(code="sv", name="Swedish")
|
| 175 |
-
ICELANDIC = Language(code="is", name="Icelandic")
|
| 176 |
-
GERMAN = Language(code="de", name="German")
|
| 177 |
DUTCH = Language(code="nl", name="Dutch")
|
| 178 |
ENGLISH = Language(code="en", name="English")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
ALL_LANGUAGES = {
|
| 180 |
obj.name: obj for obj in globals().values() if isinstance(obj, Language)
|
| 181 |
}
|
|
@@ -187,6 +191,9 @@ DATASETS = [
|
|
| 187 |
Dataset(name="sb10k", language=GERMAN, task=TEXT_CLASSIFICATION),
|
| 188 |
Dataset(name="dutch-social", language=DUTCH, task=TEXT_CLASSIFICATION),
|
| 189 |
Dataset(name="sst5", language=ENGLISH, task=TEXT_CLASSIFICATION),
|
|
|
|
|
|
|
|
|
|
| 190 |
Dataset(name="suc3", language=SWEDISH, task=INFORMATION_EXTRACTION),
|
| 191 |
Dataset(name="dansk", language=DANISH, task=INFORMATION_EXTRACTION),
|
| 192 |
Dataset(name="norne-nb", language=NORWEGIAN, task=INFORMATION_EXTRACTION),
|
|
@@ -195,6 +202,9 @@ DATASETS = [
|
|
| 195 |
Dataset(name="germeval", language=GERMAN, task=INFORMATION_EXTRACTION),
|
| 196 |
Dataset(name="conll-nl", language=DUTCH, task=INFORMATION_EXTRACTION),
|
| 197 |
Dataset(name="conll-en", language=ENGLISH, task=INFORMATION_EXTRACTION),
|
|
|
|
|
|
|
|
|
|
| 198 |
Dataset(name="scala-sv", language=SWEDISH, task=GRAMMAR),
|
| 199 |
Dataset(name="scala-da", language=DANISH, task=GRAMMAR),
|
| 200 |
Dataset(name="scala-nb", language=NORWEGIAN, task=GRAMMAR),
|
|
@@ -203,6 +213,9 @@ DATASETS = [
|
|
| 203 |
Dataset(name="scala-de", language=GERMAN, task=GRAMMAR),
|
| 204 |
Dataset(name="scala-nl", language=DUTCH, task=GRAMMAR),
|
| 205 |
Dataset(name="scala-en", language=ENGLISH, task=GRAMMAR),
|
|
|
|
|
|
|
|
|
|
| 206 |
Dataset(name="scandiqa-da", language=DANISH, task=READING_COMPREHENSION),
|
| 207 |
Dataset(name="norquad", language=NORWEGIAN, task=READING_COMPREHENSION),
|
| 208 |
Dataset(name="scandiqa-sv", language=SWEDISH, task=READING_COMPREHENSION),
|
|
@@ -210,6 +223,9 @@ DATASETS = [
|
|
| 210 |
Dataset(name="germanquad", language=GERMAN, task=READING_COMPREHENSION),
|
| 211 |
Dataset(name="squad", language=ENGLISH, task=READING_COMPREHENSION),
|
| 212 |
Dataset(name="squad-nl", language=DUTCH, task=READING_COMPREHENSION),
|
|
|
|
|
|
|
|
|
|
| 213 |
Dataset(name="nordjylland-news", language=DANISH, task=SUMMARISATION),
|
| 214 |
Dataset(name="mlsum", language=GERMAN, task=SUMMARISATION),
|
| 215 |
Dataset(name="rrn", language=ICELANDIC, task=SUMMARISATION),
|
|
@@ -217,6 +233,8 @@ DATASETS = [
|
|
| 217 |
Dataset(name="wiki-lingua-nl", language=DUTCH, task=SUMMARISATION),
|
| 218 |
Dataset(name="swedn", language=SWEDISH, task=SUMMARISATION),
|
| 219 |
Dataset(name="cnn-dailymail", language=ENGLISH, task=SUMMARISATION),
|
|
|
|
|
|
|
| 220 |
Dataset(name="danish-citizen-tests", language=DANISH, task=KNOWLEDGE),
|
| 221 |
Dataset(name="danske-talemaader", language=DANISH, task=KNOWLEDGE),
|
| 222 |
Dataset(name="mmlu-no", language=NORWEGIAN, task=KNOWLEDGE),
|
|
@@ -225,6 +243,8 @@ DATASETS = [
|
|
| 225 |
Dataset(name="mmlu-de", language=GERMAN, task=KNOWLEDGE),
|
| 226 |
Dataset(name="mmlu-nl", language=DUTCH, task=KNOWLEDGE),
|
| 227 |
Dataset(name="mmlu", language=ENGLISH, task=KNOWLEDGE),
|
|
|
|
|
|
|
| 228 |
Dataset(name="hellaswag-da", language=DANISH, task=REASONING),
|
| 229 |
Dataset(name="hellaswag-no", language=NORWEGIAN, task=REASONING),
|
| 230 |
Dataset(name="hellaswag-sv", language=SWEDISH, task=REASONING),
|
|
@@ -232,6 +252,7 @@ DATASETS = [
|
|
| 232 |
Dataset(name="hellaswag-de", language=GERMAN, task=REASONING),
|
| 233 |
Dataset(name="hellaswag-nl", language=DUTCH, task=REASONING),
|
| 234 |
Dataset(name="hellaswag", language=ENGLISH, task=REASONING),
|
|
|
|
| 235 |
]
|
| 236 |
|
| 237 |
|
|
@@ -254,7 +275,8 @@ def main() -> None:
|
|
| 254 |
global colour_mapping
|
| 255 |
global seed
|
| 256 |
seed = 4242
|
| 257 |
-
|
|
|
|
| 258 |
|
| 259 |
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
| 260 |
gr.Markdown(INTRO_MARKDOWN)
|
|
@@ -266,7 +288,7 @@ def main() -> None:
|
|
| 266 |
choices=all_languages,
|
| 267 |
multiselect=True,
|
| 268 |
label="Languages",
|
| 269 |
-
value=
|
| 270 |
interactive=True,
|
| 271 |
scale=2,
|
| 272 |
)
|
|
@@ -274,7 +296,7 @@ def main() -> None:
|
|
| 274 |
choices=danish_models,
|
| 275 |
multiselect=True,
|
| 276 |
label="Models",
|
| 277 |
-
value=
|
| 278 |
interactive=True,
|
| 279 |
scale=2,
|
| 280 |
)
|
|
@@ -310,11 +332,6 @@ def main() -> None:
|
|
| 310 |
interactive=True,
|
| 311 |
scale=1,
|
| 312 |
)
|
| 313 |
-
update_colours_button = gr.Button(
|
| 314 |
-
value="Update colours",
|
| 315 |
-
interactive=True,
|
| 316 |
-
scale=1,
|
| 317 |
-
)
|
| 318 |
with gr.Row():
|
| 319 |
plot = gr.Plot(
|
| 320 |
value=produce_radial_plot(
|
|
@@ -339,7 +356,7 @@ def main() -> None:
|
|
| 339 |
fn=partial(update_model_ids_dropdown, results_dfs=results_dfs),
|
| 340 |
inputs=[language_names_dropdown, model_ids_dropdown],
|
| 341 |
outputs=model_ids_dropdown,
|
| 342 |
-
)
|
| 343 |
|
| 344 |
# Update plot when anything changes
|
| 345 |
update_plot_kwargs = dict(
|
|
@@ -357,16 +374,23 @@ def main() -> None:
|
|
| 357 |
],
|
| 358 |
outputs=plot,
|
| 359 |
)
|
| 360 |
-
language_names_dropdown.change(
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
).then(**update_plot_kwargs)
|
| 371 |
|
| 372 |
demo.launch()
|
|
@@ -703,29 +727,23 @@ def fetch_results() -> dict[Language, pd.DataFrame]:
|
|
| 703 |
return results_dfs
|
| 704 |
|
| 705 |
|
| 706 |
-
def update_colour_mapping(
|
| 707 |
"""Get a mapping from model ids to RGB triplets.
|
| 708 |
|
| 709 |
Args:
|
| 710 |
-
|
| 711 |
-
The
|
| 712 |
"""
|
| 713 |
global colour_mapping
|
| 714 |
global seed
|
| 715 |
seed += 1
|
| 716 |
|
| 717 |
-
gr.Info(f"Updating colour mapping...")
|
| 718 |
-
|
| 719 |
-
# Get distinct RGB values for all models
|
| 720 |
-
all_models = list(
|
| 721 |
-
{model_id for df in results_dfs.values() for model_id in df.index}
|
| 722 |
-
)
|
| 723 |
-
colour_mapping = dict()
|
| 724 |
-
|
| 725 |
for i in it.count():
|
| 726 |
min_colour_distance = MIN_COLOUR_DISTANCE_BETWEEN_MODELS - i
|
| 727 |
-
retries_left = 10 * len(
|
| 728 |
-
for model_id in
|
|
|
|
|
|
|
| 729 |
random.seed(hash(model_id) + i + seed)
|
| 730 |
r, g, b = 0, 0, 0
|
| 731 |
too_bright, similar_to_other_model = True, True
|
|
|
|
| 127 |
|
| 128 |
UPDATE_FREQUENCY_MINUTES = 5
|
| 129 |
MIN_COLOUR_DISTANCE_BETWEEN_MODELS = 200
|
| 130 |
+
DEFAULT_LANGUAGES = ["Danish"]
|
| 131 |
+
DEFAULT_MODELS = ["gpt-4-0613", "mistralai/Mistral-7B-v0.1"]
|
| 132 |
|
| 133 |
|
| 134 |
class Task(BaseModel):
|
|
|
|
| 172 |
ALL_TASKS = [obj for obj in globals().values() if isinstance(obj, Task)]
|
| 173 |
|
| 174 |
DANISH = Language(code="da", name="Danish")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
DUTCH = Language(code="nl", name="Dutch")
|
| 176 |
ENGLISH = Language(code="en", name="English")
|
| 177 |
+
FAROESE = Language(code="fo", name="Faroese")
|
| 178 |
+
FRENCH = Language(code="fr", name="French")
|
| 179 |
+
GERMAN = Language(code="de", name="German")
|
| 180 |
+
ICELANDIC = Language(code="is", name="Icelandic")
|
| 181 |
+
NORWEGIAN = Language(code="no", name="Norwegian")
|
| 182 |
+
SWEDISH = Language(code="sv", name="Swedish")
|
| 183 |
ALL_LANGUAGES = {
|
| 184 |
obj.name: obj for obj in globals().values() if isinstance(obj, Language)
|
| 185 |
}
|
|
|
|
| 191 |
Dataset(name="sb10k", language=GERMAN, task=TEXT_CLASSIFICATION),
|
| 192 |
Dataset(name="dutch-social", language=DUTCH, task=TEXT_CLASSIFICATION),
|
| 193 |
Dataset(name="sst5", language=ENGLISH, task=TEXT_CLASSIFICATION),
|
| 194 |
+
Dataset(name="fosent", language=FAROESE, task=TEXT_CLASSIFICATION),
|
| 195 |
+
Dataset(name="allocine", language=FRENCH, task=TEXT_CLASSIFICATION),
|
| 196 |
+
|
| 197 |
Dataset(name="suc3", language=SWEDISH, task=INFORMATION_EXTRACTION),
|
| 198 |
Dataset(name="dansk", language=DANISH, task=INFORMATION_EXTRACTION),
|
| 199 |
Dataset(name="norne-nb", language=NORWEGIAN, task=INFORMATION_EXTRACTION),
|
|
|
|
| 202 |
Dataset(name="germeval", language=GERMAN, task=INFORMATION_EXTRACTION),
|
| 203 |
Dataset(name="conll-nl", language=DUTCH, task=INFORMATION_EXTRACTION),
|
| 204 |
Dataset(name="conll-en", language=ENGLISH, task=INFORMATION_EXTRACTION),
|
| 205 |
+
Dataset(name="fone", language=FAROESE, task=INFORMATION_EXTRACTION),
|
| 206 |
+
Dataset(name="eltec", language=FRENCH, task=INFORMATION_EXTRACTION),
|
| 207 |
+
|
| 208 |
Dataset(name="scala-sv", language=SWEDISH, task=GRAMMAR),
|
| 209 |
Dataset(name="scala-da", language=DANISH, task=GRAMMAR),
|
| 210 |
Dataset(name="scala-nb", language=NORWEGIAN, task=GRAMMAR),
|
|
|
|
| 213 |
Dataset(name="scala-de", language=GERMAN, task=GRAMMAR),
|
| 214 |
Dataset(name="scala-nl", language=DUTCH, task=GRAMMAR),
|
| 215 |
Dataset(name="scala-en", language=ENGLISH, task=GRAMMAR),
|
| 216 |
+
Dataset(name="scala-fo", language=FAROESE, task=GRAMMAR),
|
| 217 |
+
Dataset(name="scala-fr", language=FRENCH, task=GRAMMAR),
|
| 218 |
+
|
| 219 |
Dataset(name="scandiqa-da", language=DANISH, task=READING_COMPREHENSION),
|
| 220 |
Dataset(name="norquad", language=NORWEGIAN, task=READING_COMPREHENSION),
|
| 221 |
Dataset(name="scandiqa-sv", language=SWEDISH, task=READING_COMPREHENSION),
|
|
|
|
| 223 |
Dataset(name="germanquad", language=GERMAN, task=READING_COMPREHENSION),
|
| 224 |
Dataset(name="squad", language=ENGLISH, task=READING_COMPREHENSION),
|
| 225 |
Dataset(name="squad-nl", language=DUTCH, task=READING_COMPREHENSION),
|
| 226 |
+
Dataset(name="foqa", language=FAROESE, task=READING_COMPREHENSION),
|
| 227 |
+
Dataset(name="fquad", language=FRENCH, task=READING_COMPREHENSION),
|
| 228 |
+
|
| 229 |
Dataset(name="nordjylland-news", language=DANISH, task=SUMMARISATION),
|
| 230 |
Dataset(name="mlsum", language=GERMAN, task=SUMMARISATION),
|
| 231 |
Dataset(name="rrn", language=ICELANDIC, task=SUMMARISATION),
|
|
|
|
| 233 |
Dataset(name="wiki-lingua-nl", language=DUTCH, task=SUMMARISATION),
|
| 234 |
Dataset(name="swedn", language=SWEDISH, task=SUMMARISATION),
|
| 235 |
Dataset(name="cnn-dailymail", language=ENGLISH, task=SUMMARISATION),
|
| 236 |
+
Dataset(name="orange-sum", language=FRENCH, task=SUMMARISATION),
|
| 237 |
+
|
| 238 |
Dataset(name="danish-citizen-tests", language=DANISH, task=KNOWLEDGE),
|
| 239 |
Dataset(name="danske-talemaader", language=DANISH, task=KNOWLEDGE),
|
| 240 |
Dataset(name="mmlu-no", language=NORWEGIAN, task=KNOWLEDGE),
|
|
|
|
| 243 |
Dataset(name="mmlu-de", language=GERMAN, task=KNOWLEDGE),
|
| 244 |
Dataset(name="mmlu-nl", language=DUTCH, task=KNOWLEDGE),
|
| 245 |
Dataset(name="mmlu", language=ENGLISH, task=KNOWLEDGE),
|
| 246 |
+
Dataset(name="mmlu-fr", language=FRENCH, task=KNOWLEDGE),
|
| 247 |
+
|
| 248 |
Dataset(name="hellaswag-da", language=DANISH, task=REASONING),
|
| 249 |
Dataset(name="hellaswag-no", language=NORWEGIAN, task=REASONING),
|
| 250 |
Dataset(name="hellaswag-sv", language=SWEDISH, task=REASONING),
|
|
|
|
| 252 |
Dataset(name="hellaswag-de", language=GERMAN, task=REASONING),
|
| 253 |
Dataset(name="hellaswag-nl", language=DUTCH, task=REASONING),
|
| 254 |
Dataset(name="hellaswag", language=ENGLISH, task=REASONING),
|
| 255 |
+
Dataset(name="hellaswag-fr", language=FRENCH, task=REASONING),
|
| 256 |
]
|
| 257 |
|
| 258 |
|
|
|
|
| 275 |
global colour_mapping
|
| 276 |
global seed
|
| 277 |
seed = 4242
|
| 278 |
+
colour_mapping = dict()
|
| 279 |
+
update_colour_mapping(model_ids=DEFAULT_MODELS)
|
| 280 |
|
| 281 |
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
| 282 |
gr.Markdown(INTRO_MARKDOWN)
|
|
|
|
| 288 |
choices=all_languages,
|
| 289 |
multiselect=True,
|
| 290 |
label="Languages",
|
| 291 |
+
value=DEFAULT_LANGUAGES,
|
| 292 |
interactive=True,
|
| 293 |
scale=2,
|
| 294 |
)
|
|
|
|
| 296 |
choices=danish_models,
|
| 297 |
multiselect=True,
|
| 298 |
label="Models",
|
| 299 |
+
value=DEFAULT_MODELS,
|
| 300 |
interactive=True,
|
| 301 |
scale=2,
|
| 302 |
)
|
|
|
|
| 332 |
interactive=True,
|
| 333 |
scale=1,
|
| 334 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
with gr.Row():
|
| 336 |
plot = gr.Plot(
|
| 337 |
value=produce_radial_plot(
|
|
|
|
| 356 |
fn=partial(update_model_ids_dropdown, results_dfs=results_dfs),
|
| 357 |
inputs=[language_names_dropdown, model_ids_dropdown],
|
| 358 |
outputs=model_ids_dropdown,
|
| 359 |
+
).then(fn=update_colour_mapping, inputs=model_ids_dropdown)
|
| 360 |
|
| 361 |
# Update plot when anything changes
|
| 362 |
update_plot_kwargs = dict(
|
|
|
|
| 374 |
],
|
| 375 |
outputs=plot,
|
| 376 |
)
|
| 377 |
+
language_names_dropdown.change(
|
| 378 |
+
fn=update_colour_mapping, inputs=model_ids_dropdown
|
| 379 |
+
).then(**update_plot_kwargs)
|
| 380 |
+
model_ids_dropdown.change(
|
| 381 |
+
fn=update_colour_mapping, inputs=model_ids_dropdown
|
| 382 |
+
).then(**update_plot_kwargs)
|
| 383 |
+
use_rank_score_checkbox.change(
|
| 384 |
+
fn=update_colour_mapping, inputs=model_ids_dropdown
|
| 385 |
+
).then(**update_plot_kwargs)
|
| 386 |
+
show_scale_checkbox.change(
|
| 387 |
+
fn=update_colour_mapping, inputs=model_ids_dropdown
|
| 388 |
+
).then(**update_plot_kwargs)
|
| 389 |
+
plot_width_slider.change(
|
| 390 |
+
fn=update_colour_mapping, inputs=model_ids_dropdown
|
| 391 |
+
).then(**update_plot_kwargs)
|
| 392 |
+
plot_height_slider.change(
|
| 393 |
+
fn=update_colour_mapping, inputs=model_ids_dropdown
|
| 394 |
).then(**update_plot_kwargs)
|
| 395 |
|
| 396 |
demo.launch()
|
|
|
|
| 727 |
return results_dfs
|
| 728 |
|
| 729 |
|
| 730 |
+
def update_colour_mapping(model_ids: list[str]) -> None:
|
| 731 |
"""Get a mapping from model ids to RGB triplets.
|
| 732 |
|
| 733 |
Args:
|
| 734 |
+
model_ids:
|
| 735 |
+
The model ids to update the colour
|
| 736 |
"""
|
| 737 |
global colour_mapping
|
| 738 |
global seed
|
| 739 |
seed += 1
|
| 740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 741 |
for i in it.count():
|
| 742 |
min_colour_distance = MIN_COLOUR_DISTANCE_BETWEEN_MODELS - i
|
| 743 |
+
retries_left = 10 * len(model_ids)
|
| 744 |
+
for model_id in model_ids:
|
| 745 |
+
if model_id in colour_mapping:
|
| 746 |
+
continue
|
| 747 |
random.seed(hash(model_id) + i + seed)
|
| 748 |
r, g, b = 0, 0, 0
|
| 749 |
too_bright, similar_to_other_model = True, True
|