Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from time import time | |
| import torch | |
| import os | |
| # import nltk | |
| import argparse | |
| import random | |
| import numpy as np | |
| import faiss | |
| from argparse import Namespace | |
| from tqdm.notebook import tqdm | |
| from torch.utils.data import DataLoader | |
| from functools import partial | |
| from sklearn.manifold import TSNE | |
| from transformers import AutoTokenizer, MarianTokenizer, AutoModel, AutoModelForSeq2SeqLM, MarianMTModel | |
| metadata_all = {} | |
| model_es = "Helsinki-NLP/opus-mt-en-es" | |
| model_fr = "Helsinki-NLP/opus-mt-en-fr" | |
| model_zh = "Helsinki-NLP/opus-mt-en-zh" | |
| tokenizer_es = AutoTokenizer.from_pretrained(model_es) | |
| tokenizer_fr = AutoTokenizer.from_pretrained(model_fr) | |
| tokenizer_zh = AutoTokenizer.from_pretrained(model_zh) | |
| model_tr_es = MarianMTModel.from_pretrained(model_es) | |
| model_tr_fr = MarianMTModel.from_pretrained(model_fr) | |
| model_tr_zh = MarianMTModel.from_pretrained(model_zh) | |
| dict_models = { | |
| 'en-es': model_es, | |
| 'en-fr': model_fr, | |
| 'en-zh': model_zh, | |
| } | |
| dict_models_tr = { | |
| 'en-es': model_tr_es, | |
| 'en-fr': model_tr_fr, | |
| 'en-zh': model_tr_zh, | |
| } | |
| dict_tokenizer_tr = { | |
| 'en-es': tokenizer_es, | |
| 'en-fr': tokenizer_fr, | |
| 'en-zh': tokenizer_zh, | |
| } | |
| def translation_model(w1,model ): | |
| inputs = dict_tokenizer_tr[model](w1, return_tensors="pt") | |
| # embeddings = get_tokens_embeddings(inputs, model) | |
| input_embeddings = dict_models_tr[model].get_encoder().embed_tokens(inputs.input_ids) | |
| # model_tr_es.get_input_embeddings() | |
| print(inputs) | |
| num_ret_seq = 1 | |
| translated = dict_models_tr[model].generate(**inputs, | |
| num_beams=5, | |
| num_return_sequences=num_ret_seq, | |
| return_dict_in_generate=True, | |
| output_attentions =False, | |
| output_hidden_states = True, | |
| output_scores=True,) | |
| tgt_text = dict_tokenizer_tr[model].decode(translated.sequences[0], skip_special_tokens=True) | |
| target_embeddings = dict_models_tr[model].get_decoder().embed_tokens(translated.sequences) | |
| return tgt_text, translated, inputs.input_ids, input_embeddings, target_embeddings | |
| def create_vocab_multiple(embeddings_list, model): | |
| """_summary_ | |
| Args: | |
| embeddings_list (list): embedding array | |
| Returns: | |
| Dict: vocabulary of tokens' embeddings | |
| """ | |
| print("START VOCAB CREATION MULTIPLE \n \n ") | |
| vocab = {} ## add embedds. | |
| sentence_tokens_text_list = [] | |
| for embeddings in embeddings_list: | |
| tokens_id = embeddings['tokens'] # [[tokens_id]x n_sentences ] | |
| for sent_i, sentence in enumerate(tokens_id): | |
| sentence_tokens = [] | |
| for tok_i, token in enumerate(sentence): | |
| sentence_tokens.append(token) | |
| if not (token in vocab): | |
| vocab[token] = { | |
| 'token' : token, | |
| 'count': 1, | |
| # 'text': embeddings['texts'][sent_i][tok_i], | |
| 'text': dict_tokenizer_tr[model].decode([token]), | |
| # 'text': src_token_lists[sent_i][tok_i], | |
| 'embed': embeddings['embeddings'][sent_i][tok_i]} | |
| else: | |
| vocab[token]['count'] = vocab[token]['count'] + 1 | |
| # print(vocab) | |
| sentence_tokens_text_list.append(sentence_tokens) | |
| print("END VOCAB CREATION MULTIPLE \n \n ") | |
| return vocab, sentence_tokens_text_list | |
| def vocab_words_all_prefix(token_embeddings, model, sufix="@@",prefix = '▁' ): | |
| vocab = {} | |
| # inf_model = dict_models_tr[model] | |
| sentence_words_text_list = [] | |
| if prefix : | |
| n_prefix = len(prefix) | |
| for input_sentences in token_embeddings: | |
| # n_tokens_in_word | |
| for sent_i, sentence in enumerate(input_sentences['tokens']): | |
| words_text_list = [] | |
| # embedding = input_sentences['embed'][sent_i] | |
| word = '' | |
| tokens_ids = [] | |
| embeddings = [] | |
| ids_to_tokens = dict_tokenizer_tr[model].convert_ids_to_tokens(sentence) | |
| # print("validate same len", len(sentence) == len(ids_to_tokens), len(sentence), len(ids_to_tokens), ids_to_tokens) | |
| to_save= False | |
| for tok_i, token_text in enumerate(ids_to_tokens): | |
| token_id = sentence[tok_i] | |
| if token_text[:n_prefix] == prefix : | |
| #first we save the previous word | |
| if to_save: | |
| vocab[word] = { | |
| 'word' : word, | |
| 'text': word, | |
| 'count': 1, | |
| 'tokens_ids' : tokens_ids, | |
| 'embed': np.mean(np.array(embeddings), 0).tolist() | |
| } | |
| words_text_list.append(word) | |
| #word is starting if prefix | |
| tokens_ids = [token_id] | |
| embeddings = [input_sentences['embeddings'][sent_i][tok_i]] | |
| word = token_text[n_prefix:] | |
| ## if word | |
| to_save = True | |
| else : | |
| if (token_text in dict_tokenizer_tr[model].special_tokens_map.values()): | |
| # print('final or save', token_text, token_id, to_save, word) | |
| if to_save: | |
| # vocab[word] = ids | |
| vocab[word] = { | |
| 'word' : word, | |
| 'text': word, | |
| 'count': 1, | |
| 'tokens_ids' : tokens_ids, | |
| 'embed': np.mean(np.array(embeddings), 0).tolist() | |
| } | |
| words_text_list.append(word) | |
| #special token is one token element, no continuation | |
| # vocab[token_text] = [token_id] | |
| tokens_ids = [token_id] | |
| embeddings = [input_sentences['embeddings'][sent_i][tok_i]] | |
| vocab[token_text] = { | |
| 'word' : token_text, | |
| 'count': 1, | |
| 'text': word, | |
| 'tokens_ids' : tokens_ids, | |
| 'embed': np.mean(np.array(embeddings), 0).tolist() | |
| } | |
| words_text_list.append(token_text) | |
| to_save = False | |
| else: | |
| # is a continuation; we do not know if it is final; we don't save here. | |
| to_save = True | |
| word += token_text | |
| tokens_ids.append(token_id) | |
| embeddings.append(input_sentences['embeddings'][sent_i][tok_i]) | |
| if to_save: | |
| # print('final save', token_text, token_id, to_save, word) | |
| vocab[word] = tokens_ids | |
| if not (word in vocab): | |
| vocab[word] = { | |
| 'word' : word, | |
| 'count': 1, | |
| 'text': word, | |
| 'tokens_ids' : tokens_ids, | |
| 'embed': np.mean(np.array(embeddings), 0).tolist() | |
| } | |
| words_text_list.append(word) | |
| else: | |
| vocab[word]['count'] = vocab[word]['count'] + 1 | |
| sentence_words_text_list.append(words_text_list) | |
| return vocab, sentence_words_text_list | |
| # nb_ids.append(token_values['token']) # for x in vocab_tokens] | |
| # nb_embds.append(token_values['embed']) # for x in vocab_tokens] | |
| def create_index_voronoi(vocab): | |
| """ | |
| it returns an index of words and a metadata of ids. | |
| """ | |
| d = 1024 | |
| nb_embds = [] ##ordered embeddings list | |
| metadata = {} | |
| i_pos = 0 | |
| for key_token, token_values in vocab.items(): | |
| nb_embds.append(token_values['embed']) # for x in vocab_tokens] | |
| metadata[i_pos] = {'token': token_values['token'], 'text': token_values['text']} | |
| i_pos += 1 | |
| # nb_embds = [x['embed'] for x in vocab_tokens] | |
| # print(len(nb_embds),len(nb_embds[0]) ) | |
| xb = np.array(nb_embds).astype('float32') #elements to index | |
| # ids = np.array(nb_ids) | |
| d = len(xb[0]) # dimension of each element | |
| nlist = 5 # Nb of Voronois | |
| quantizer = faiss.IndexFlatL2(d) | |
| index = faiss.IndexIVFFlat(quantizer, d, nlist) | |
| index.train(xb) | |
| index.add(xb) | |
| # index.add(xb) | |
| return index, metadata## , nb_embds, nb_ids | |
| def create_index_voronoi_words(vocab): | |
| """ | |
| it returns an index of words and a metadata of ids. | |
| """ | |
| d = 1024 | |
| nb_embds = [] ##ordered embeddings list | |
| metadata = {} | |
| i_pos = 0 | |
| for key_token, token_values in vocab.items(): | |
| nb_embds.append(token_values['embed']) # for x in vocab_tokens] | |
| metadata[i_pos] = {'word': token_values['word'], 'tokens': token_values['tokens_ids'],'text': token_values['text']} | |
| i_pos += 1 | |
| # nb_embds = [x['embed'] for x in vocab_tokens] | |
| # print(len(nb_embds),len(nb_embds[0]) ) | |
| xb = np.array(nb_embds).astype('float32') #elements to index | |
| # ids = np.array(nb_ids) | |
| d = len(xb[0]) # dimension of each element | |
| nlist = 5 # Nb of Voronois | |
| quantizer = faiss.IndexFlatL2(d) | |
| index = faiss.IndexIVFFlat(quantizer, d, nlist) | |
| index.train(xb) | |
| index.add(xb) | |
| # index.add(xb) | |
| return index, metadata## , nb_embds, nb_ids | |
| def search_query_vocab(index, vocab_queries, topk = 10, limited_search = []): | |
| """ the embed queries are a vocabulary of words : embds_input_voc | |
| Args: | |
| index (_type_): faiss index | |
| embed_queries (_type_): vocab format. | |
| { 'token' : token, | |
| 'count': 1, | |
| 'text': src_token_lists[sent_i][tok_i], | |
| 'embed': embeddings[0]['embeddings'][sent_i][tok_i] } | |
| nb_ids (_type_): hash to find the token_id w.r.t the faiss index id. | |
| topk (int, optional): nb of similar tokens. Defaults to 10. | |
| Returns: | |
| _type_: Distance matrix D, indices matrix I and tokens ids (using nb_ids) | |
| """ | |
| # nb_qi_ids = [] ##ordered ids list | |
| nb_q_embds = [] ##ordered embeddings list | |
| metadata = {} | |
| qi_pos = 0 | |
| for key , token_values in vocab_queries.items(): | |
| # nb_qi_ids.append(token_values['token']) # for x in vocab_tokens] | |
| metadata[qi_pos] = {'word': token_values['word'], 'tokens': token_values['tokens_ids'], 'text': token_values['text']} | |
| qi_pos += 1 | |
| nb_q_embds.append(token_values['embed']) # for x in vocab_tokens] | |
| xq = np.array(nb_q_embds).astype('float32') #elements to query | |
| D,I = index.search(xq, topk) | |
| return D,I, metadata | |
| def search_query_vocab_token(index, vocab_queries, topk = 10, limited_search = []): | |
| """ the embed queries are a vocabulary of words : embds_input_vov | |
| Returns: | |
| _type_: Distance matrix D, indices matrix I and tokens ids (using nb_ids) | |
| """ | |
| # nb_qi_ids = [] ##ordered ids list | |
| nb_q_embds = [] ##ordered embeddings list | |
| metadata = {} | |
| qi_pos = 0 | |
| for key , token_values in vocab_queries.items(): | |
| # nb_qi_ids.append(token_values['token']) # for x in vocab_tokens] | |
| metadata[qi_pos] = {'token': token_values['token'], 'text': token_values['text']} | |
| qi_pos += 1 | |
| nb_q_embds.append(token_values['embed']) # for x in vocab_tokens] | |
| xq = np.array(nb_q_embds).astype('float32') #elements to query | |
| D,I = index.search(xq, topk) | |
| return D,I, metadata | |
| def build_search(query_embeddings, model,type="input"): | |
| global metadata_all | |
| # ## biuld vocab for index | |
| vocab_queries, sentence_tokens_list = create_vocab_multiple(query_embeddings, model) | |
| words_vocab_queries, sentence_words_list = vocab_words_all_prefix(query_embeddings, model, sufix="@@",prefix="▁") | |
| index_vor_tokens = metadata_all[type]['tokens'][1] | |
| md_tokens = metadata_all[type]['tokens'][2] | |
| D, I, meta = search_query_vocab_token(index_vor_tokens, vocab_queries) | |
| qi_pos = 0 | |
| similar_tokens = {} | |
| # similar_tokens = [] | |
| for dist, ind in zip(D,I): | |
| try: | |
| # similar_tokens.append({ | |
| similar_tokens[str(meta[qi_pos]['token'])] = { | |
| 'token': meta[qi_pos]['token'], | |
| 'text': meta[qi_pos]['text'], | |
| # 'text': dict_tokenizer_tr[model].decode(meta[qi_pos]['token']) | |
| # 'text': meta[qi_pos]['text'], | |
| "similar_topk": [md_tokens[i_index]['token'] for i_index in ind if (i_index != -1) ], | |
| "distance": [dist[i] for (i, i_index) in enumerate(ind) if (i_index != -1)], | |
| } | |
| # ) | |
| except: | |
| print("\n ERROR ", qi_pos, dist, ind) | |
| qi_pos += 1 | |
| index_vor_words = metadata_all[type]['words'][1] | |
| md_words = metadata_all[type]['words'][2] | |
| Dw, Iw, metaw = search_query_vocab(index_vor_words, words_vocab_queries) | |
| # D, I, meta, vocab_words, sentence_words_list = result_input['words']# [2] # D ; I ; meta | |
| qi_pos = 0 | |
| # similar_words = [] | |
| similar_words = {} | |
| for dist, ind in zip(Dw,Iw): | |
| try: | |
| # similar_words.append({ | |
| similar_words[str(metaw[qi_pos]['word']) ] = { | |
| 'word': metaw[qi_pos]['word'], | |
| 'text': metaw[qi_pos]['word'], | |
| "similar_topk": [md_words[i_index]['word'] for i_index in ind if (i_index != -1) ], | |
| "distance": [dist[i] for (i, i_index) in enumerate(ind) if (i_index != -1)], | |
| } | |
| # ) | |
| except: | |
| print("\n ERROR ", qi_pos, dist, ind) | |
| qi_pos += 1 | |
| return {'tokens': {'D': D, 'I': I, 'meta': meta, 'vocab_queries': vocab_queries, 'similar':similar_tokens, 'sentence_key_list': sentence_tokens_list}, | |
| 'words': {'D':Dw,'I': Iw, 'meta': metaw, 'vocab_queries':words_vocab_queries, 'sentence_key_list': sentence_words_list, 'similar': similar_words} | |
| } | |
| def build_reference(all_embeddings, model): | |
| # ## biuld vocab for index | |
| vocab, sentence_tokens = create_vocab_multiple(all_embeddings,model) | |
| words_vocab, sentences = vocab_words_all_prefix(all_embeddings, model, sufix="@@",prefix="▁") | |
| index_tokens, meta_tokens = create_index_voronoi(vocab) | |
| index_words, meta_words = create_index_voronoi_words(words_vocab) | |
| return {'tokens': [vocab, index_tokens, meta_tokens], | |
| 'words': [words_vocab, index_words, meta_words] | |
| } # , index, meta | |
| def embds_input_projection_vocab(vocab, key="token"): | |
| t0 = time() | |
| nb_ids = [] ##ordered ids list | |
| nb_embds = [] ##ordered embeddings list | |
| nb_text = [] ##ordered embeddings list | |
| tnse_error = [] | |
| for _ , token_values in vocab.items(): | |
| tnse_error.append([0,0]) | |
| nb_ids.append(token_values[key]) # for x in vocab_tokens] | |
| nb_text.append(token_values['text']) # for x in vocab_tokens] | |
| nb_embds.append(token_values['embed']) # for x in vocab_tokens] | |
| X = np.array(nb_embds).astype('float32') #elements to project | |
| try: | |
| tsne = TSNE(random_state=0, n_iter=1000) | |
| tsne_results = tsne.fit_transform(X) | |
| tsne_results = np.c_[tsne_results, nb_ids, nb_text, range(len(nb_ids))] ## creates a zip array : [[TNSE[X,Y], tokenid, token_text], ...] | |
| except: | |
| tsne_results = np.c_[tnse_error, nb_ids, nb_text, range(len(nb_ids))] ## creates a zip array : [[TNSE[X,Y], tokenid, token_text], ...] | |
| t1 = time() | |
| print("t-SNE: %.2g sec" % (t1 - t0)) | |
| print(tsne_results) | |
| return tsne_results.tolist() | |
| def filtered_projection(similar_key, vocab, type="input", key="word"): | |
| global metadata_all | |
| vocab_proj = vocab.copy() | |
| ## tnse projection Input words | |
| source_words_voc_similar = set() | |
| # for words_set in similar_key: | |
| for key_i in similar_key: | |
| words_set = similar_key[key_i] | |
| source_words_voc_similar.update(words_set['similar_topk']) | |
| print(len(source_words_voc_similar)) | |
| # source_embeddings_filtered = {key: metadata_all['input']['words'][0][key] for key in source_words_voc_similar} | |
| source_embeddings_filtered = {key_value: metadata_all[type][key][0][key_value] for key_value in source_words_voc_similar} | |
| vocab_proj.update(source_embeddings_filtered) | |
| ## vocab_proj add | |
| try: | |
| result_TSNE = embds_input_projection_vocab(vocab_proj, key=key[:-1]) ## singular => without 's' | |
| dict_projected_embds_all = {str(embds[2]): [embds[0], embds[1], embds[2], embds[3], embds[4]] for embds in result_TSNE} | |
| except: | |
| print('TSNE error', type, key) | |
| dict_projected_embds_all = {} | |
| # print(result_TSNE) | |
| return dict_projected_embds_all | |
| def first_function(w1, model): | |
| global metadata_all | |
| #translate and get internal values | |
| # print(w1) | |
| sentences = w1.split("\n") | |
| all_sentences = [] | |
| translated_text = '' | |
| input_embeddings = [] | |
| output_embeddings = [] | |
| for sentence in sentences : | |
| # print(sentence, end=";") | |
| params = translation_model(sentence, model) | |
| all_sentences.append(params) | |
| # print(len(params)) | |
| translated_text += params[0] + ' \n' | |
| input_embeddings.append({ | |
| 'embeddings': params[3].detach(), ## create a vocabulary with the set of embeddings | |
| 'tokens': params[2].tolist(), # one translation = one sentence | |
| # 'texts' : dict_tokenizer_tr[model].decode(params[2].tolist()) | |
| }) | |
| output_embeddings.append({ | |
| 'embeddings' : params[4].detach(), | |
| 'tokens': params[1].sequences.tolist(), | |
| # 'texts' : dict_tokenizer_tr[model].decode(params[1].sequences.tolist()) | |
| }) | |
| # print(input_embeddings) | |
| # print(output_embeddings) | |
| ## Build FAISS index | |
| # ---> preload faiss using the respective model with a initial dataset. | |
| result_input = build_reference(input_embeddings,model) | |
| result_output = build_reference(output_embeddings,model) | |
| # print(result_input, result_output) | |
| metadata_all = {'input': result_input, 'output': result_output} | |
| ### get translation | |
| return [translated_text, params] | |
| def first_function_tr(w1, model, var2={}): | |
| global metadata_all | |
| #Translate and find similar tokens in token | |
| print("SEARCH -- ") | |
| sentences = w1.split("\n") | |
| all_sentences = [] | |
| translated_text = '' | |
| input_embeddings = [] | |
| output_embeddings = [] | |
| for sentence in sentences : | |
| # print(sentence, end=";") | |
| params = translation_model(sentence, model) | |
| all_sentences.append(params) | |
| # print(len(params)) | |
| translated_text += params[0] + ' \n' | |
| input_embeddings.append({ | |
| 'embeddings': params[3].detach(), ## create a vocabulary with the set of embeddings | |
| 'tokens': params[2].tolist(), # one translation = one sentence | |
| # 'texts' : dict_tokenizer_tr[model].decode(params[2].tolist()[0]) | |
| }) | |
| output_embeddings.append({ | |
| 'embeddings' : params[4].detach(), | |
| 'tokens': params[1].sequences.tolist(), | |
| # 'texts' : dict_tokenizer_tr[model].decode(params[1].sequences.tolist()) | |
| }) | |
| ## Build FAISS index | |
| # ---> preload faiss using the respective model with a initial dataset. | |
| result_search = {} | |
| result_search['input'] = build_search(input_embeddings, model, type='input') | |
| result_search['output'] = build_search(output_embeddings, model, type='output') | |
| # D, I, meta, vocab_words, sentence_words_list = result_input['words']# [2] # D ; I ; meta | |
| # md = metadata_all['input']['words'][2] | |
| # qi_pos = 0 | |
| # similar_words = [] | |
| # for dist, ind in zip(D,I): | |
| # try: | |
| # similar_words.append({ | |
| # 'word': meta[qi_pos]['word'], | |
| # "similar_topk": [md[i_index]['word'] for i_index in ind if (i_index != -1) ], | |
| # "distance": [D[qi_pos][i] for (i, i_index) in enumerate(ind) if (i_index != -1)], | |
| # }) | |
| # except: | |
| # print("\n ERROR ", qi_pos, dist, ind) | |
| # qi_pos += 1 | |
| # similar_vocab_queries = similar_vocab_queries[3] | |
| # result_output = build_search(output_embeddings, model, type="output") | |
| ## {'tokens': {'D': D, 'I': I, 'meta': meta, 'vocab_queries': vocab_queries, 'similar':similar_tokens}, | |
| ## 'words': {'D':Dw,'I': Iw, 'meta': metaw, 'vocab_queries':words_vocab_queries, 'sentence_key_list': sentence_words_list, 'similar': similar_words} | |
| ## } | |
| # print(result_input, result_output) | |
| # json_out['input']['tokens'] = { 'similar_queries' : result_input['token'][5], # similarity and distance dict. | |
| # 'tnse': dict_projected_embds_all, #projected points (all) | |
| # 'key_text_list': result_input['token'][4], # current sentences keys | |
| # } | |
| json_out = {'input': {'tokens': {}, 'words': {}}, 'output': {'tokens': {}, 'words': {}}} | |
| dict_projected = {} | |
| for type in ['input', 'output']: | |
| dict_projected[type] = {} | |
| for key in ['tokens', 'words']: | |
| similar_key = result_search[type][key]['similar'] | |
| vocab = result_search[type][key]['vocab_queries'] | |
| dict_projected[type][key] = filtered_projection(similar_key, vocab, type=type, key=key) | |
| json_out[type][key]['similar_queries'] = similar_key | |
| json_out[type][key]['tnse'] = dict_projected[type][key] | |
| json_out[type][key]['key_text_list'] = result_search[type][key]['sentence_key_list'] | |
| return [translated_text, [ json_out, json_out['output']['words'], json_out['output']['tokens']] ] | |
| ## First create html and divs | |
| html = """ | |
| <html> | |
| <script async src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.0/jquery.min"></script> | |
| <script async data-require="[email protected]" data-semver="3.5.3" | |
| src="//cdnjs.cloudflare.com/ajax/libs/d3/3.5.3/d3.js"></script> | |
| <body> | |
| <div id="select_div"> | |
| <select id="select_type" class="form-select" aria-label="select example" hidden> | |
| <option selected value="words">Words</option> | |
| <option value="tokens">Tokens</option> | |
| </select> | |
| </div> | |
| <div id="d3_embed_div"> | |
| <div class="row"> | |
| <div class="col-6"> | |
| <div id="d3_embeds_input_words" class="d3_embed words"></div> | |
| </div> | |
| <div class="col-6"> | |
| <div id="d3_embeds_output_words" class="d3_embed words"></div> | |
| </div> | |
| <div class="col-6"> | |
| <div id="d3_embeds_input_tokens" class="d3_embed tokens"></div> | |
| </div> | |
| <div class="col-6"> | |
| <div id="d3_embeds_output_tokens" class="d3_embed tokens"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <div id="d3_graph_div"> | |
| <div class="row"> | |
| <div class="col-4"> | |
| <div id="d3_graph_input_words" class="d3_graph words"></div> | |
| </div> | |
| <div class="col-4"> | |
| <div id="similar_input_words" class=""></div> | |
| </div> | |
| <div class="col-4"> | |
| <div id="d3_graph_output_words" class="d3_graph words"></div> | |
| <div id="similar_output_words" class="d3_graph words"></div> | |
| </div> | |
| </div> | |
| <div class="row"> | |
| <div class="col-6"> | |
| <div id="d3_graph_input_tokens" class="d3_graph tokens"></div> | |
| <div id="similar_input_tokens" class="d3_graph tokens"></div> | |
| </div> | |
| <div class="col-6"> | |
| <div id="d3_graph_output_tokens" class="d3_graph tokens"></div> | |
| <div id="similar_output_tokens" class="d3_graph tokens"></div> | |
| </div> | |
| </div> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| html0 = """ | |
| <html> | |
| <script async src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.0/jquery.min"></script> | |
| <script async data-require="[email protected]" data-semver="3.5.3" | |
| src="//cdnjs.cloudflare.com/ajax/libs/d3/3.5.3/d3.js"></script> | |
| <body> | |
| <div id="select_div"> | |
| <select id="select_type" class="form-select" aria-label="select example" hidden> | |
| <option selected value="words">Words</option> | |
| <option value="tokens">Tokens</option> | |
| </select> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| html_col1 = """ | |
| <div id="d3_graph_input_words" class="d3_graph words"></div> | |
| <div id="d3_graph_input_tokens" class="d3_graph tokens"></div> | |
| """ | |
| html_col2 = """ | |
| <div id="similar_input_words" class=""></div> | |
| <div id="similar_output_words" class=""></div> | |
| <div id="similar_input_tokens" class=" "></div> | |
| <div id="similar_output_tokens" class=" "></div> | |
| """ | |
| html_col3 = """ | |
| <div id="d3_graph_output_words" class="d3_graph words"></div> | |
| <div id="d3_graph_output_tokens" class="d3_graph tokens"></div> | |
| """ | |
| # # <div class="row"> | |
| # <div class="col-6" id="d3_legend_data_source"> </div> | |
| # <div class="col-6" id="d3_legend_similar_source"> </div> | |
| # </div> | |
| def second_function(w1,j2): | |
| # json_value = {'one':1}# return f"{w1['two']} in sentence22..." | |
| # to transfer the data to json. | |
| print("second_function -- after the js", w1,j2) | |
| return "transition to second js function finished." | |
| with gr.Blocks(js="plotsjs.js") as demo: | |
| gr.Markdown( | |
| """ | |
| # MAKE NMT Workshop \t `Embeddings representation` | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| model_radio_c = gr.Radio(choices=['en-es', 'en-zh', 'en-fr'], value="en-es", label= '', container=False) | |
| with gr.Column(scale=2): | |
| gr.Markdown( | |
| """ | |
| ### Reference Translation Sentences | |
| Enter at least 50 sentences to be used as comparison. | |
| This is submitted just once. | |
| """) | |
| in_text = gr.Textbox(lines=2, label="reference source text") | |
| out_text = gr.Textbox(label="reference target text", interactive=False) | |
| out_text2 = gr.Textbox(visible=False) | |
| var2 = gr.JSON(visible=False) | |
| btn = gr.Button("Reference Translation") | |
| with gr.Column(scale=3): | |
| gr.Markdown( | |
| """ | |
| ### Translation Sentences | |
| Sentences to be analysed. | |
| """) | |
| in_text_tr = gr.Textbox(lines=2, label="source text") | |
| out_text_tr = gr.Textbox(label="target text", interactive=False) | |
| out_text2_tr = gr.Textbox(visible=False) | |
| var2_tr = gr.JSON(visible=False) | |
| btn_faiss= gr.Button("Translation ") | |
| with gr.Row(): | |
| # input_mic = gr.HTML(html) | |
| with gr.Column(scale=1): | |
| input_mic = gr.HTML(html0) | |
| input_html2 = gr.HTML(html_col2) | |
| with gr.Column(scale=2): | |
| input_html1 = gr.HTML(html_col1) | |
| # with gr.Column(scale=2): | |
| with gr.Column(scale=2): | |
| input_html3 = gr.HTML(html_col3) | |
| ## first function input w1, model ; return out_text, var2; it does first function and js; | |
| btn.click(first_function, [in_text, model_radio_c], [out_text,var2], js="(in_text,model_radio_c) => testFn_out(in_text,model_radio_c)") #should return an output comp. | |
| btn_faiss.click(first_function_tr, [in_text_tr, model_radio_c], [out_text_tr,var2_tr], js="(in_text_tr,model_radio_c) => testFn_out(in_text_tr,model_radio_c)") #should return an output comp. | |
| ## second function input out_text(returned in first_function), [json]var2(returned in first_function) ; | |
| ## second function returns out_text2, var2; it does second function and js(with the input params); | |
| out_text.change(second_function, [out_text, var2], out_text2, js="(out_text,var2) => testFn_out_json(var2)") # | |
| out_text_tr.change(second_function, [out_text_tr, var2_tr], out_text2_tr, js="(out_text_tr,var2_tr) => testFn_out_json_tr(var2_tr)") # | |
| # run script function on load, | |
| # demo.load(None,None,None,js="plotsjs.js") | |
| if __name__ == "__main__": | |
| demo.launch() |