Spaces:
Running
Running
Update scores.
Browse files- data/scores.jsonl +1 -1
data/scores.jsonl
CHANGED
|
@@ -35,7 +35,7 @@
|
|
| 35 |
{"225933": {"metadata": {"team_name": "HausaNLP", "email": "[email protected]", "submission_name": "Gemini-0shot", "submission_description": "A prompt template was created to use the Gemini model in translating test data from English to the 10 languages. The template instructed the model to pay attention to entities while making the translations. This was a zero-shot approach so no sample translations were provided to the model.", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "gemini-1.5-flash", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 34.671389216843764, "comet_score": 90.71432484716283, "overall_score": 50.168261795975155}, "th_TH": {"meta_score": 18.79895561357702, "comet_score": 83.41229118261954, "overall_score": 30.68280661315985}, "de_DE": {"meta_score": 38.15520762423417, "comet_score": 89.2959383041823, "overall_score": 53.46527159372105}, "fr_FR": {"meta_score": 38.7740164684355, "comet_score": 88.34536861086372, "overall_score": 53.89429433269751}, "tr_TR": {"meta_score": 40.82271406215068, "comet_score": 92.41551555911354, "overall_score": 56.63017554796197}, "ar_AE": {"meta_score": 32.658895975368374, "comet_score": 88.56474659797881, "overall_score": 47.720507069867466}, "it_IT": {"meta_score": 40.30992546096508, "comet_score": 89.98675601605856, "overall_score": 55.678462204289}, "es_ES": {"meta_score": 47.920569501686025, "comet_score": 91.70959686609903, "overall_score": 62.94880576189662}, "zh_TW": {"meta_score": 8.529525279814743, "comet_score": 87.84999996423721, "overall_score": 15.549335683678928}, "ja_JP": {"meta_score": 35.1018010963195, "comet_score": 91.05520215565186, "overall_score": 50.67022063720286}, "overall": {"meta_score": 33.57430002993948, "comet_score": 89.33497401039673, "overall_score": 47.74081412404504}}}}
|
| 36 |
{"225961": {"metadata": {"team_name": "arancini", "email": "[email protected]", "submission_name": "WikiGemmaMT", "submission_description": "Entity-aware machine translation task submission where we use an entity linking part to retrieve the correct translation of the critical entity and then insert this translation into the gemma-2-9b-it prompt", "uses_gold": true, "uses_rag": false, "uses_llm": false, "llm_name": "gemma-2-9b-it", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 90.73199527744983, "comet_score": 94.26169080651684, "overall_score": 92.46316959400883}, "th_TH": {"meta_score": 90.80359733101247, "comet_score": 92.18212720584897, "overall_score": 91.48766966496227}, "de_DE": {"meta_score": 84.80258679373723, "comet_score": 93.68167441817593, "overall_score": 89.02127584681102}, "fr_FR": {"meta_score": 90.70448307410796, "comet_score": 93.41467595438507, "overall_score": 92.03963279743807}, "tr_TR": {"meta_score": 82.29376257545272, "comet_score": 94.38131328546412, "overall_score": 87.92404190829706}, "ar_AE": {"meta_score": 90.14734990103365, "comet_score": 92.61821818851182, "overall_score": 91.36608180113384}, "it_IT": {"meta_score": 92.42840329540996, "comet_score": 95.34975656952376, "overall_score": 93.86635549806427}, "es_ES": {"meta_score": 89.5841139003372, "comet_score": 94.96621835394807, "overall_score": 92.19668605074357}, "zh_TW": {"meta_score": 50.7912003087611, "comet_score": 91.43674093374456, "overall_score": 65.30618082177756}, "ja_JP": {"meta_score": 90.74001566170713, "comet_score": 93.9324154347158, "overall_score": 92.30862232205908}, "overall": {"meta_score": 85.30275081190094, "comet_score": 93.6224831150835, "overall_score": 88.79797163052956}}}}
|
| 37 |
{"224195": {"metadata": {"team_name": "AMM_CUET", "email": "[email protected]", "submission_name": "EA-MT-GPT4o-FR-IT-NER", "submission_description": "This system is based on GPT-4o and integrates named entity recognition (NER) and entity linking techniques to improve translation quality. The model ensures context-aware entity translations for French and Italian, leveraging fine-tuned prompts and re-ranking strategies to enhance performance.", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "GPT-4o", "is_finetuned": true}, "scores": {}}}
|
| 38 |
-
{"226725": {"metadata": {"team_name": "
|
| 39 |
{"226834": {"metadata": {"team_name": "UAlberta", "email": "[email protected]", "submission_name": "PromptGPT", "submission_description": "We prompt a state-of-the-art language model with instructions designed to increase the model's attention in the named entity. We also leverage in context learning to showcase example input output pairs in the prompt.", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "GPT4o", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 48.85871704053522, "comet_score": 93.45366500496911, "overall_score": 64.16906398796374}, "th_TH": {"meta_score": 25.26834928923702, "comet_score": 87.31730042902463, "overall_score": 39.194409798320194}, "de_DE": {"meta_score": 49.57454050374404, "comet_score": 91.58179552590384, "overall_score": 64.32761800718511}, "fr_FR": {"meta_score": 50.39341262580055, "comet_score": 90.61494872151223, "overall_score": 64.76774082554711}, "tr_TR": {"meta_score": 48.200312989045386, "comet_score": 93.27380055286409, "overall_score": 63.556876488136126}, "ar_AE": {"meta_score": 43.985045084671206, "comet_score": 91.6797200133047, "overall_score": 59.44854753145341}, "it_IT": {"meta_score": 51.981169085916044, "comet_score": 92.21800744662148, "overall_score": 66.48581431763522}, "es_ES": {"meta_score": 57.02510303484451, "comet_score": 93.21990250123044, "overall_score": 70.76274550443468}, "zh_TW": {"meta_score": 39.03898108838286, "comet_score": 92.09323646259878, "overall_score": 54.83360510141013}, "ja_JP": {"meta_score": 52.36883320281911, "comet_score": 93.51961281188504, "overall_score": 67.14051918880828}, "overall": {"meta_score": 46.669446394499595, "comet_score": 91.89719894699144, "overall_score": 61.46869407508941}}}}
|
| 40 |
{"226819": {"metadata": {"team_name": "GinGer", "email": "[email protected]", "submission_name": "LoRA-nllb-distilled-200-distilled-600M", "submission_description": "Due to the hardware limitation, I applied several standard common MT models. To handle the models complexity and overfitting, I tried the Low Rank Adaptation settings with an early stopping mechanism.", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "N/A", "is_finetuned": true}, "scores": {"ar_AE": {"meta_score": 18.4077413679349, "comet_score": 87.0979045485176, "overall_score": 30.392225680278568}, "it_IT": {"meta_score": 25.51981169085916, "comet_score": 89.28317988489412, "overall_score": 39.69391226743653}, "ja_JP": {"meta_score": 0.0, "comet_score": 0.0, "overall_score": 0.0}}}}
|
| 41 |
{"226655": {"metadata": {"team_name": "RAGthoven", "email": "[email protected]", "submission_name": "GPT-4o + WikiData + RAG", "submission_description": "Prompted GPT-4o to do the translation. We utilised RAGthoven preprocessor to include wikidata entity name and it's translation in the user prompt. In order to improve translations we used RAG and included most similar examples in system prompt.", "uses_gold": true, "uses_rag": true, "uses_llm": false, "llm_name": "GPT-4o", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 90.22038567493112, "comet_score": 95.79226015145176, "overall_score": 92.92287217508657}, "th_TH": {"meta_score": 90.4844792573252, "comet_score": 94.53330028392558, "overall_score": 92.46458875332293}, "de_DE": {"meta_score": 85.07488087134105, "comet_score": 94.33062866817046, "overall_score": 89.46399714325253}, "fr_FR": {"meta_score": 91.01555352241537, "comet_score": 94.03287318575197, "overall_score": 92.49961379894908}, "tr_TR": {"meta_score": 82.76324614352784, "comet_score": 95.82904141713597, "overall_score": 88.81819759221935}, "ar_AE": {"meta_score": 91.88475918187817, "comet_score": 94.6260689089842, "overall_score": 93.23526835443606}, "it_IT": {"meta_score": 92.78148293448412, "comet_score": 95.92631808527781, "overall_score": 94.327696007279}, "es_ES": {"meta_score": 89.88385162982391, "comet_score": 95.10102187282989, "overall_score": 92.41886623491352}, "zh_TW": {"meta_score": 81.47433423388652, "comet_score": 94.49984784098217, "overall_score": 87.50502030771659}, "ja_JP": {"meta_score": 89.56538762725137, "comet_score": 95.75071582452524, "overall_score": 92.5548273320138}, "overall": {"meta_score": 88.51483610768646, "comet_score": 95.0422076239035, "overall_score": 91.62109476991893}}}}
|
|
|
|
| 35 |
{"225933": {"metadata": {"team_name": "HausaNLP", "email": "[email protected]", "submission_name": "Gemini-0shot", "submission_description": "A prompt template was created to use the Gemini model in translating test data from English to the 10 languages. The template instructed the model to pay attention to entities while making the translations. This was a zero-shot approach so no sample translations were provided to the model.", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "gemini-1.5-flash", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 34.671389216843764, "comet_score": 90.71432484716283, "overall_score": 50.168261795975155}, "th_TH": {"meta_score": 18.79895561357702, "comet_score": 83.41229118261954, "overall_score": 30.68280661315985}, "de_DE": {"meta_score": 38.15520762423417, "comet_score": 89.2959383041823, "overall_score": 53.46527159372105}, "fr_FR": {"meta_score": 38.7740164684355, "comet_score": 88.34536861086372, "overall_score": 53.89429433269751}, "tr_TR": {"meta_score": 40.82271406215068, "comet_score": 92.41551555911354, "overall_score": 56.63017554796197}, "ar_AE": {"meta_score": 32.658895975368374, "comet_score": 88.56474659797881, "overall_score": 47.720507069867466}, "it_IT": {"meta_score": 40.30992546096508, "comet_score": 89.98675601605856, "overall_score": 55.678462204289}, "es_ES": {"meta_score": 47.920569501686025, "comet_score": 91.70959686609903, "overall_score": 62.94880576189662}, "zh_TW": {"meta_score": 8.529525279814743, "comet_score": 87.84999996423721, "overall_score": 15.549335683678928}, "ja_JP": {"meta_score": 35.1018010963195, "comet_score": 91.05520215565186, "overall_score": 50.67022063720286}, "overall": {"meta_score": 33.57430002993948, "comet_score": 89.33497401039673, "overall_score": 47.74081412404504}}}}
|
| 36 |
{"225961": {"metadata": {"team_name": "arancini", "email": "[email protected]", "submission_name": "WikiGemmaMT", "submission_description": "Entity-aware machine translation task submission where we use an entity linking part to retrieve the correct translation of the critical entity and then insert this translation into the gemma-2-9b-it prompt", "uses_gold": true, "uses_rag": false, "uses_llm": false, "llm_name": "gemma-2-9b-it", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 90.73199527744983, "comet_score": 94.26169080651684, "overall_score": 92.46316959400883}, "th_TH": {"meta_score": 90.80359733101247, "comet_score": 92.18212720584897, "overall_score": 91.48766966496227}, "de_DE": {"meta_score": 84.80258679373723, "comet_score": 93.68167441817593, "overall_score": 89.02127584681102}, "fr_FR": {"meta_score": 90.70448307410796, "comet_score": 93.41467595438507, "overall_score": 92.03963279743807}, "tr_TR": {"meta_score": 82.29376257545272, "comet_score": 94.38131328546412, "overall_score": 87.92404190829706}, "ar_AE": {"meta_score": 90.14734990103365, "comet_score": 92.61821818851182, "overall_score": 91.36608180113384}, "it_IT": {"meta_score": 92.42840329540996, "comet_score": 95.34975656952376, "overall_score": 93.86635549806427}, "es_ES": {"meta_score": 89.5841139003372, "comet_score": 94.96621835394807, "overall_score": 92.19668605074357}, "zh_TW": {"meta_score": 50.7912003087611, "comet_score": 91.43674093374456, "overall_score": 65.30618082177756}, "ja_JP": {"meta_score": 90.74001566170713, "comet_score": 93.9324154347158, "overall_score": 92.30862232205908}, "overall": {"meta_score": 85.30275081190094, "comet_score": 93.6224831150835, "overall_score": 88.79797163052956}}}}
|
| 37 |
{"224195": {"metadata": {"team_name": "AMM_CUET", "email": "[email protected]", "submission_name": "EA-MT-GPT4o-FR-IT-NER", "submission_description": "This system is based on GPT-4o and integrates named entity recognition (NER) and entity linking techniques to improve translation quality. The model ensures context-aware entity translations for French and Italian, leveraging fine-tuned prompts and re-ranking strategies to enhance performance.", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "GPT-4o", "is_finetuned": true}, "scores": {}}}
|
| 38 |
+
{"226725": {"metadata": {"team_name": "UAlberta", "email": "[email protected]", "submission_name": "WikiEnsemble", "submission_description": "We create an ensemble of three distinct translation methods, including in-context learning with a LLM, and two commercial MT systems. We select, for each instance, a system that accurately translates the named entity, using validation set performance to break ties. We provided GPT with translations of the given named entity. We also check that translations contained those NE translations, for ensembling purposes.", "uses_gold": true, "uses_rag": true, "uses_llm": false, "llm_name": "GPT-4o", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 90.43683589138135, "comet_score": 95.61710354264727, "overall_score": 92.9548530689595}, "th_TH": {"meta_score": 90.0203075137801, "comet_score": 94.11107900611621, "overall_score": 92.02025176374595}, "de_DE": {"meta_score": 85.2280462899932, "comet_score": 94.27794513449204, "overall_score": 89.52486775827927}, "fr_FR": {"meta_score": 89.62488563586459, "comet_score": 94.26329986992388, "overall_score": 91.88559283744956}, "tr_TR": {"meta_score": 83.21037335121842, "comet_score": 95.93105493390318, "overall_score": 89.11907171267431}, "ar_AE": {"meta_score": 91.68682647899715, "comet_score": 94.8610689250983, "overall_score": 93.24694172836786}, "it_IT": {"meta_score": 91.74185955276579, "comet_score": 95.92379437866937, "overall_score": 93.78623192149469}, "es_ES": {"meta_score": 89.35931060322217, "comet_score": 95.30632744437526, "overall_score": 92.23705943992896}, "zh_TW": {"meta_score": 81.14627556927827, "comet_score": 94.2760098675235, "overall_score": 87.2197857556488}, "ja_JP": {"meta_score": 90.38762725137039, "comet_score": 95.7894285465463, "overall_score": 93.01016309426342}, "overall": {"meta_score": 88.28423481378715, "comet_score": 95.03571116492954, "overall_score": 91.50048190808124}}}}
|
| 39 |
{"226834": {"metadata": {"team_name": "UAlberta", "email": "[email protected]", "submission_name": "PromptGPT", "submission_description": "We prompt a state-of-the-art language model with instructions designed to increase the model's attention in the named entity. We also leverage in context learning to showcase example input output pairs in the prompt.", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "GPT4o", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 48.85871704053522, "comet_score": 93.45366500496911, "overall_score": 64.16906398796374}, "th_TH": {"meta_score": 25.26834928923702, "comet_score": 87.31730042902463, "overall_score": 39.194409798320194}, "de_DE": {"meta_score": 49.57454050374404, "comet_score": 91.58179552590384, "overall_score": 64.32761800718511}, "fr_FR": {"meta_score": 50.39341262580055, "comet_score": 90.61494872151223, "overall_score": 64.76774082554711}, "tr_TR": {"meta_score": 48.200312989045386, "comet_score": 93.27380055286409, "overall_score": 63.556876488136126}, "ar_AE": {"meta_score": 43.985045084671206, "comet_score": 91.6797200133047, "overall_score": 59.44854753145341}, "it_IT": {"meta_score": 51.981169085916044, "comet_score": 92.21800744662148, "overall_score": 66.48581431763522}, "es_ES": {"meta_score": 57.02510303484451, "comet_score": 93.21990250123044, "overall_score": 70.76274550443468}, "zh_TW": {"meta_score": 39.03898108838286, "comet_score": 92.09323646259878, "overall_score": 54.83360510141013}, "ja_JP": {"meta_score": 52.36883320281911, "comet_score": 93.51961281188504, "overall_score": 67.14051918880828}, "overall": {"meta_score": 46.669446394499595, "comet_score": 91.89719894699144, "overall_score": 61.46869407508941}}}}
|
| 40 |
{"226819": {"metadata": {"team_name": "GinGer", "email": "[email protected]", "submission_name": "LoRA-nllb-distilled-200-distilled-600M", "submission_description": "Due to the hardware limitation, I applied several standard common MT models. To handle the models complexity and overfitting, I tried the Low Rank Adaptation settings with an early stopping mechanism.", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "N/A", "is_finetuned": true}, "scores": {"ar_AE": {"meta_score": 18.4077413679349, "comet_score": 87.0979045485176, "overall_score": 30.392225680278568}, "it_IT": {"meta_score": 25.51981169085916, "comet_score": 89.28317988489412, "overall_score": 39.69391226743653}, "ja_JP": {"meta_score": 0.0, "comet_score": 0.0, "overall_score": 0.0}}}}
|
| 41 |
{"226655": {"metadata": {"team_name": "RAGthoven", "email": "[email protected]", "submission_name": "GPT-4o + WikiData + RAG", "submission_description": "Prompted GPT-4o to do the translation. We utilised RAGthoven preprocessor to include wikidata entity name and it's translation in the user prompt. In order to improve translations we used RAG and included most similar examples in system prompt.", "uses_gold": true, "uses_rag": true, "uses_llm": false, "llm_name": "GPT-4o", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 90.22038567493112, "comet_score": 95.79226015145176, "overall_score": 92.92287217508657}, "th_TH": {"meta_score": 90.4844792573252, "comet_score": 94.53330028392558, "overall_score": 92.46458875332293}, "de_DE": {"meta_score": 85.07488087134105, "comet_score": 94.33062866817046, "overall_score": 89.46399714325253}, "fr_FR": {"meta_score": 91.01555352241537, "comet_score": 94.03287318575197, "overall_score": 92.49961379894908}, "tr_TR": {"meta_score": 82.76324614352784, "comet_score": 95.82904141713597, "overall_score": 88.81819759221935}, "ar_AE": {"meta_score": 91.88475918187817, "comet_score": 94.6260689089842, "overall_score": 93.23526835443606}, "it_IT": {"meta_score": 92.78148293448412, "comet_score": 95.92631808527781, "overall_score": 94.327696007279}, "es_ES": {"meta_score": 89.88385162982391, "comet_score": 95.10102187282989, "overall_score": 92.41886623491352}, "zh_TW": {"meta_score": 81.47433423388652, "comet_score": 94.49984784098217, "overall_score": 87.50502030771659}, "ja_JP": {"meta_score": 89.56538762725137, "comet_score": 95.75071582452524, "overall_score": 92.5548273320138}, "overall": {"meta_score": 88.51483610768646, "comet_score": 95.0422076239035, "overall_score": 91.62109476991893}}}}
|