Orpheus-MIDI-Loops-Mixer

Running on Zero

File size: 14,433 Bytes

ef689dc
be84f49
ef689dc
f6d08a7
 
be84f49
f6d08a7
 
be84f49
f6d08a7
 
 
 
 
 
 
 
 
be84f49
f6d08a7
 
 
 
 
ef689dc
f6d08a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be84f49
f6d08a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
913cbef
f6d08a7
 
 
 
 
 
 
 
 
 
 
 
 
 
a609809
35840fb
9368837
 
 
 
 
 
 
 
 
 
f6d08a7
 
 
 
 
 
bfb9e64
 
 
f6d08a7
bfb9e64
 
 
 
 
f6d08a7
 
 
9368837
 
 
f6d08a7
 
 
 
 
 
 
 
bfb9e64
 
 
 
a76fb9a
bfb9e64
 
a4fdd27
bfb9e64
 
 
 
a4fdd27
bfb9e64
 
 
 
93514f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6d08a7
a4fdd27
 
 
 
 
f6d08a7
 
 
 
 
 
 
 
 
 
 
a4fdd27
 
f6d08a7
a4fdd27
f6d08a7
 
 
 
 
a4fdd27
93514f5
a4fdd27
 
 
 
a609809
a4fdd27
 
 
 
 
 
 
 
93514f5
bc0d3b0
a4fdd27
 
 
 
a609809
a4fdd27
 
 
 
 
 
 
 
18dbd38
 
a4fdd27
18dbd38
a4fdd27
a609809
a4fdd27
 
 
 
 
 
 
1cede21
a4fdd27
 
1cede21
 
 
 
 
a609809
1cede21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7e97e7
 
 
 
1cede21
 
a4fdd27
a609809
a4fdd27
 
 
 
 
 
 
 
 
2ca7cab
bc0d3b0
2ca7cab
 
93514f5
 
 
2ca7cab
a4fdd27
93514f5
a4fdd27
 
93514f5
a4fdd27
93514f5
a4fdd27
93514f5
a4fdd27
93514f5
 
cad6dbf
 
18dbd38
cad6dbf
 
 
 
a4fdd27
93514f5
cad6dbf
 
 
 
 
 
a4fdd27
139321b
a4fdd27
93514f5
a4fdd27
3454095
 
 
a4fdd27
93514f5
 
 
 
 
a4fdd27
93514f5
a4fdd27
 
 
54cedbc
a4fdd27
 
 
 
 
93514f5
 
a4fdd27
 
93514f5
 
f6d08a7
a4fdd27
 
 
 
 
 
93514f5
 
f6d08a7
a4fdd27
 
 
 
 
 
 
93514f5
 
 
 
 
 
a4fdd27
93514f5
a4fdd27
f6d08a7
a4fdd27
f6d08a7
a4fdd27
 
 
 
 
2ca7cab
f6d08a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be84f49
 
f6d08a7
 
be84f49
f6d08a7
 
 
 
 
 
 
 
 
2ca7cab
f6d08a7
a4fdd27
bc0d3b0
a4fdd27
 
f6d08a7
2ca7cab
f6d08a7
 
 
bc0d3b0
f6d08a7
 
 
 
a4fdd27
 
 
f6d08a7
a4fdd27
f6d08a7
2ca7cab
 
f6d08a7
 
 
 
62d4565
f6d08a7

#============================================================================================
# https://huggingface.co/spaces/projectlosangeles/Orpheus-MIDI-Loops-Mixer
#============================================================================================

print('=' * 70)
print('Orpheus MIDI Loops Mixer Gradio App')

print('=' * 70)
print('Loading core Orpheus MIDI Loops Mixer modules...')

import os
import copy

import time as reqtime
import datetime
from pytz import timezone

print('=' * 70)
print('Loading main Orpheus MIDI Loops Mixer modules...')

os.environ['USE_FLASH_ATTENTION'] = '1'

import torch

torch.set_float32_matmul_precision('high')
torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
torch.backends.cuda.enable_flash_sdp(True)

from huggingface_hub import hf_hub_download

import TMIDIX

from midi_to_colab_audio import midi_to_colab_audio

from x_transformer_2_3_1 import *

import random

import tqdm

print('=' * 70)
print('Loading aux Orpheus MIDI Loops Mixer modules...')

import matplotlib.pyplot as plt

import gradio as gr
import spaces

print('=' * 70)
print('PyTorch version:', torch.__version__)
print('=' * 70)
print('Done!')
print('Enjoy! :)')
print('=' * 70)

#==================================================================================

MODEL_CHECKPOINT = 'Orpheus_Bridge_Music_Transformer_Trained_Model_43450_steps_0.8334_loss_0.7629_acc.pth'

SOUDFONT_PATH = 'SGM-v2.01-YamahaGrand-Guit-Bass-v2.7.sf2'

#==================================================================================

print('=' * 70)
print('Instantiating model...')

device_type = 'cuda'
dtype = 'bfloat16'

ptdtype = {'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
ctx = torch.amp.autocast(device_type=device_type, dtype=ptdtype)

SEQ_LEN = 1668
PAD_IDX = 18819

model = TransformerWrapper(num_tokens = PAD_IDX+1,
                           max_seq_len = SEQ_LEN,
                           attn_layers = Decoder(dim = 2048,
                                                 depth = 8,
                                                 heads = 32,
                                                 rotary_pos_emb = True,
                                                 attn_flash = True
                                                 )
                           )

model = AutoregressiveWrapper(model, ignore_index=PAD_IDX, pad_value=PAD_IDX)

print('=' * 70)
print('Loading model checkpoint...')      

model_checkpoint = hf_hub_download(repo_id='asigalov61/Orpheus-Music-Transformer', 
                                   filename=MODEL_CHECKPOINT
                                  )

model.load_state_dict(torch.load(model_checkpoint, 
                                 map_location=device_type, 
                                 weights_only=True
                                )
                     )

model = torch.compile(model, mode='max-autotune')

model.to(device_type)
model.eval()

print('=' * 70)
print('Done!')
print('=' * 70)
print('Model will use', dtype, 'precision...')
print('=' * 70)

#==================================================================================

print('=' * 70)
print('Loading Orpheus MIDI Loops dataset...')

orpheus_loops_dataset_file = hf_hub_download(repo_id='asigalov61/Orpheus-Music-Transformer', 
                                             filename='orpheus_data/190191_Orpheus_MIDI_Loops_MP_Dataset_CC_BY_NC_SA.pickle'
                                            )

loops_data = TMIDIX.Tegridy_Any_Pickle_File_Reader(orpheus_loops_dataset_file)

print('=' * 70)
print('Done!')
print('=' * 70)
print('Loaded', len(loops_data), 'loops')
print('=' * 70)

#==================================================================================

def tokens_to_score(tokens, abs_time):

    song_f = []
    
    time = abs_time
    dur = 1
    vel = 90
    pitch = 60
    channel = 0
    patch = 0

    patches = [-1] * 16

    channels = [0] * 16
    channels[9] = 1

    for ss in tokens:

        if 0 <= ss < 256:

            time += ss * 16

        if 256 <= ss < 16768:

            patch = (ss-256) // 128

            if patch < 128:

                if patch not in patches:
                  if 0 in channels:
                      cha = channels.index(0)
                      channels[cha] = 1
                  else:
                      cha = 15

                  patches[cha] = patch
                  channel = patches.index(patch)
                else:
                  channel = patches.index(patch)

            if patch == 128:
                channel = 9

            pitch = (ss-256) % 128


        if 16768 <= ss < 18816:

            dur = ((ss-16768) // 8) * 16
            vel = (((ss-16768) % 8)+1) * 15

            song_f.append(['note', time, dur, channel, pitch, vel, patch])

    return song_f, time

#==================================================================================

@spaces.GPU
def Mix_MIDI_Loops(num_loops_to_mix,
                   use_one_loop,
                   model_temperature,
                   model_sampling_top_k
                  ):

    #===============================================================================

    print('=' * 70)
    print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
    start_time = reqtime.time()
    print('=' * 70)

    print('=' * 70)
    print('Requested settings:')
    print('=' * 70)
    print('Num loops to mix:', num_loops_to_mix)
    print('Use one loop:', use_one_loop)
    print('Model temperature:', model_temperature)
    print('Model top k:', model_sampling_top_k)
   
    print('=' * 70)

    #==================================================================

    print('Generating...')

    song = []
    song_indexes = []
    song_titles = []
    song_parts = []
    
    while len(song) <= 512:
        lidx = random.randint(0, len(loops_data)-1)
        song = loops_data[lidx][1]
    
    song_indexes.append(lidx)
    song_titles.append(loops_data[lidx][0])
    song_parts.append(loops_data[lidx][1])
    
    for i in tqdm.tqdm(range(num_loops_to_mix-1)):

        left_chunk = [1] + loops_data[lidx][1][2:]
    
        if use_one_loop:
            right_chunk = [1] + loops_data[lidx][1][2:]
            
        else:
            right_chunk = []
    
            ridx = [-1]
            rlen = -1
    
            while ridx and rlen <= 512:
                rlen = len(loops_data[ridx[0]][1])
                ridx = [l for l in loops_data[lidx][2] if l not in song_indexes]   

            if ridx:
                ridx = ridx[0]
                right_chunk = [1] + loops_data[ridx][1][2:]
                    
                lidx = ridx
                song_titles.append(loops_data[lidx][0])
                song_indexes.append(lidx)
    
            else:
                break
    
        seq = [18815] + left_chunk[-512:] + [18816] + right_chunk[:512] + [18817] + left_chunk[-64:]
    
        x = torch.LongTensor(seq).cuda()

        y_val = []
        rcount = 0

        while y_val != right_chunk[:64]:
        
            with ctx:
                out = model.generate(x,
                                     576,
                                     temperature=model_temperature,
                                     filter_logits_fn=top_k,
                                     filter_kwargs={'k': model_sampling_top_k},
                                     eos_token=18818,
                                     return_prime=False,
                                     verbose=False)
            
            y = out.tolist()
    
            y_val = y[-64:]

            if y_val != right_chunk[:64]:
                rcount += 1
                print('Regenerating attempt #', rcount)
                
                if rcount == 3:
                    break
                    
        song = song + y[:-64] + right_chunk
        song_parts.append(y[:-64])
        song_parts.append(right_chunk)
        
    #==================================================================
   
    print('=' * 70)
    print('Done!')
    print('=' * 70)
    
    #===============================================================================
    
    print('Rendering results...')

    used_loops_titles = 'Composition used ' + str(len(song_titles)) + ' loops from the following titles:\n\n'

    for i, t in enumerate(song_titles):
        used_loops_titles += 'Loop #' + str(i+1) + ': ' + str(t) + '\n'

    #===============================================================================
        
    print('=' * 70)
    print('Sample INTs', song[:15])
    print('=' * 70)

    output_score = []
    
    abs_time = 1000
    
    for i, part in enumerate(song_parts):
    
        if i == 0:
            part = part[1:]

        if not use_one_loop:
            part_idx = song_indexes[i // 2]

        else:
            part_idx = song_indexes[0]
            
    
        if i % 2 == 0:

            if not use_one_loop:
                part_title = song_titles[i // 2]

            else:
                part_title = song_titles[0]
    
            output_score.append(['text_event', abs_time + (part[0] * 16), 'Loop #' + str((i // 2)+1) + ' / IDX #' + str(part_idx) + ' / ' + part_title])
    
        else:
    
            tidx = [i for i in range(20) if part[i] < 256][0]
    
            output_score.append(['text_event', abs_time + (part[tidx] * 16), 'AI-generated bridge'])
    
        score, abs_time= tokens_to_score(part, abs_time)
        
        output_score.extend(score)

    #===============================================================================

    patched_score, patches, overflow_patches = TMIDIX.patch_enhanced_score_notes(output_score)

    fn1 = "Orpheus-MIDI-Loops-Mixer-Composition"
    
    detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(patched_score,
                                                              output_signature = 'Orpheus MIDI Loops Mixer',
                                                              output_file_name = fn1,
                                                              track_name='Project Los Angeles',
                                                              list_of_MIDI_patches=patches
                                                              )

    #===============================================================================
    
    new_fn = fn1+'.mid'

    #===============================================================================            
    
    audio = midi_to_colab_audio(new_fn, 
                        soundfont_path=SOUDFONT_PATH,
                        sample_rate=16000,
                        volume_scale=10,
                        output_for_gradio=True
                        )

    #===============================================================================
    
    print('Done!')
    print('=' * 70)

    #========================================================

    output_midi = str(new_fn)
    output_audio = (16000, audio)
    output_plot = TMIDIX.plot_ms_SONG(patched_score, 
                                      plot_title=output_midi, 
                                      return_plt=True
                                     )

    #===============================================================================

    print(used_loops_titles)
    print('=' * 70) 
    
    #========================================================

    print('-' * 70)
    print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
    print('-' * 70)
    print('Req execution time:', (reqtime.time() - start_time), 'sec')

    return used_loops_titles, output_audio, output_plot, output_midi
    
#==================================================================================

PDT = timezone('US/Pacific')

print('=' * 70)
print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
print('=' * 70)

#==================================================================================

with gr.Blocks() as demo:

    #==================================================================================

    gr.Markdown("<h1 style='text-align: left; margin-bottom: 1rem'>Orpheus MIDI Loops Mixer</h1>")
    gr.Markdown("<h1 style='text-align: left; margin-bottom: 1rem'>Mix several MIDI loops into one composition by bridging</h1>")
    gr.HTML("""            
            <p> 
                <a href="https://huggingface.co/spaces/projectlosangeles/Orpheus-MIDI-Loops-Mixer?duplicate=true">
                    <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-md.svg" alt="Duplicate in Hugging Face">
                </a>
            </p>
            
            for faster execution and endless generation!
            """)
    
    #==================================================================================
    
    gr.Markdown("## Generation options")
    
    num_loops_to_mix = gr.Slider(2, 10, value=5, step=1, label="Number of loops to mix")
    use_one_loop = gr.Checkbox(value=False, label="Use only one randomly selected loop")
    model_temperature = gr.Slider(0.1, 1, value=1.0, step=0.01, label="Model temperature")
    model_sampling_top_k = gr.Slider(1, 100, value=5, step=1, label="Model sampling top k value")
    
    generate_btn = gr.Button("Mix Loops", variant="primary")

    gr.Markdown("## Generation results")

    used_loops_titles = gr.Textbox(label="MIDI loops titles")
    output_audio = gr.Audio(label="MIDI audio", format="wav", elem_id="midi_audio")
    output_plot = gr.Plot(label="MIDI score plot")
    output_midi = gr.File(label="MIDI file", file_types=[".mid"])

    generate_btn.click(Mix_MIDI_Loops, 
                       [num_loops_to_mix,
                        use_one_loop,
                        model_temperature,
                        model_sampling_top_k
                       ], 
                       [used_loops_titles,
                        output_audio,
                        output_plot,
                        output_midi                          
                       ]
                      )
    
#==================================================================================

demo.launch()

#==================================================================================