NotaGenX

Running

App Files Files Community

ElectricAlexis commited on May 3

Commit

c86d7b7

verified ·

1 Parent(s): d900b7e

Update inference.py

Browse files

Files changed (1) hide show

inference.py +388 -388

inference.py CHANGED Viewed

@@ -1,388 +1,388 @@
-import os
-import time
-import torch
-import re
-import difflib
-from utils import *
-from config import *
-from transformers import GPT2Config
-from abctoolkit.utils import Exclaim_re, Quote_re, SquareBracket_re, Barline_regexPattern
-from abctoolkit.transpose import Note_list, Pitch_sign_list
-from abctoolkit.duration import calculate_bartext_duration
-import requests
-import torch
-from huggingface_hub import hf_hub_download
-import logging
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-Note_list = Note_list + ['z', 'x']
-if torch.cuda.is_available():
-    device = torch.device("cuda")
-else:
-    device = torch.device("cpu")
-patchilizer = Patchilizer()
-patch_config = GPT2Config(num_hidden_layers=PATCH_NUM_LAYERS,
-                          max_length=PATCH_LENGTH,
-                          max_position_embeddings=PATCH_LENGTH,
-                          n_embd=HIDDEN_SIZE,
-                          num_attention_heads=HIDDEN_SIZE // 64,
-                          vocab_size=1)
-byte_config = GPT2Config(num_hidden_layers=CHAR_NUM_LAYERS,
-                         max_length=PATCH_SIZE + 1,
-                         max_position_embeddings=PATCH_SIZE + 1,
-                         hidden_size=HIDDEN_SIZE,
-                         num_attention_heads=HIDDEN_SIZE // 64,
-                         vocab_size=128)
-model = NotaGenLMHeadModel(encoder_config=patch_config, decoder_config=byte_config).to(device)
-def download_model_weights():
-    weights_path = "weights_notagenx_p_size_16_p_length_1024_p_layers_20_h_size_1280.pth"
-    local_weights_path = os.path.join(os.getcwd(), weights_path)
-    # Check if weights already exist locally
-    if os.path.exists(local_weights_path):
-        logger.info(f"Model weights already exist at {local_weights_path}")
-        return local_weights_path
-    logger.info("Downloading model weights from HuggingFace Hub...")
-    try:
-        # Download from HuggingFace
-        downloaded_path = hf_hub_download(
-            repo_id="ElectricAlexis/NotaGen",
-            filename=weights_path,
-            local_dir=os.getcwd(),
-            local_dir_use_symlinks=False
-        )
-        logger.info(f"Model weights downloaded successfully to {downloaded_path}")
-        return downloaded_path
-    except Exception as e:
-        logger.error(f"Error downloading model weights: {str(e)}")
-        raise RuntimeError(f"Failed to download model weights: {str(e)}")
-def prepare_model_for_kbit_training(model, use_gradient_checkpointing=True):
-    """
-    Prepare model for k-bit training.
-    Features include:
-    1. Convert model to mixed precision (FP16).
-    2. Disable unnecessary gradient computations.
-    3. Enable gradient checkpointing (optional).
-    """
-    # Convert model to mixed precision
-    model = model.to(dtype=torch.float16)
-    # Disable gradients for embedding layers
-    for param in model.parameters():
-        if param.dtype == torch.float32:
-            param.requires_grad = False
-    # Enable gradient checkpointing
-    if use_gradient_checkpointing:
-        model.gradient_checkpointing_enable()
-    return model
-model = prepare_model_for_kbit_training(
-    model,
-    use_gradient_checkpointing=False
-)
-print("Parameter Number: " + str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
-# Download weights at startup
-model_weights_path = download_model_weights()
-checkpoint = torch.load(model_weights_path, map_location=torch.device(device))
-model.load_state_dict(checkpoint['model'], strict=False)
-model = model.to(device)
-model.eval()
-def postprocess_inst_names(abc_text):
-    with open('standard_inst_names.txt', 'r', encoding='utf-8') as f:
-        standard_instruments_list = [line.strip() for line in f if line.strip()]
-    with open('instrument_mapping.json', 'r', encoding='utf-8') as f:
-        instrument_mapping = json.load(f)
-    abc_lines = abc_text.split('\n')
-    abc_lines = list(filter(None, abc_lines))
-    abc_lines = [line + '\n' for line in abc_lines]
-    for i, line in enumerate(abc_lines):
-        if line.startswith('V:') and 'nm=' in line:
-            match = re.search(r'nm="([^"]*)"', line)
-            if match:
-                inst_name = match.group(1)
-                # Check if the instrument name is already standard
-                if inst_name in standard_instruments_list:
-                    continue
-                # Find the most similar key in instrument_mapping
-                matching_key = difflib.get_close_matches(inst_name, list(instrument_mapping.keys()), n=1, cutoff=0.6)
-                if matching_key:
-                    # Replace the instrument name with the standardized version
-                    replacement = instrument_mapping[matching_key[0]]
-                    new_line = line.replace(f'nm="{inst_name}"', f'nm="{replacement}"')
-                    abc_lines[i] = new_line
-    # Combine the lines back into a single string
-    processed_abc_text = ''.join(abc_lines)
-    return processed_abc_text
-def complete_brackets(s):
-    stack = []
-    bracket_map = {'{': '}', '[': ']', '(': ')'}
-    # Iterate through each character, handle bracket matching
-    for char in s:
-        if char in bracket_map:
-            stack.append(char)
-        elif char in bracket_map.values():
-            # Find the corresponding left bracket
-            for key, value in bracket_map.items():
-                if value == char:
-                    if stack and stack[-1] == key:
-                        stack.pop()
-                    break  # Found matching right bracket, process next character
-    # Complete missing right brackets (in reverse order of remaining left brackets in stack)
-    completion = ''.join(bracket_map[c] for c in reversed(stack))
-    return s + completion
-def rest_unreduce(abc_lines):
-    tunebody_index = None
-    for i in range(len(abc_lines)):
-        if abc_lines[i].startswith('%%score'):
-            abc_lines[i] = complete_brackets(abc_lines[i])
-        if '[V:' in abc_lines[i]:
-            tunebody_index = i
-            break
-    metadata_lines = abc_lines[: tunebody_index]
-    tunebody_lines = abc_lines[tunebody_index:]
-    part_symbol_list = []
-    voice_group_list = []
-    for line in metadata_lines:
-        if line.startswith('%%score'):
-            for round_bracket_match in re.findall(r'\((.*?)\)', line):
-                voice_group_list.append(round_bracket_match.split())
-            existed_voices = [item for sublist in voice_group_list for item in sublist]
-        if line.startswith('V:'):
-            symbol = line.split()[0]
-            part_symbol_list.append(symbol)
-            if symbol[2:] not in existed_voices:
-                voice_group_list.append([symbol[2:]])
-    z_symbol_list = []  # voices that use z as rest
-    x_symbol_list = []  # voices that use x as rest
-    for voice_group in voice_group_list:
-        z_symbol_list.append('V:' + voice_group[0])
-        for j in range(1, len(voice_group)):
-            x_symbol_list.append('V:' + voice_group[j])
-    part_symbol_list.sort(key=lambda x: int(x[2:]))
-    unreduced_tunebody_lines = []
-    for i, line in enumerate(tunebody_lines):
-        unreduced_line = ''
-        line = re.sub(r'^\[r:[^\]]*\]', '', line)
-        pattern = r'\[V:(\d+)\](.*?)(?=\[V:|$)'
-        matches = re.findall(pattern, line)
-        line_bar_dict = {}
-        for match in matches:
-            key = f'V:{match[0]}'
-            value = match[1]
-            line_bar_dict[key] = value
-        # calculate duration and collect barline
-        dur_dict = {}
-        for symbol, bartext in line_bar_dict.items():
-            right_barline = ''.join(re.split(Barline_regexPattern, bartext)[-2:])
-            bartext = bartext[:-len(right_barline)]
-            try:
-                bar_dur = calculate_bartext_duration(bartext)
-            except:
-                bar_dur = None
-            if bar_dur is not None:
-                if bar_dur not in dur_dict.keys():
-                    dur_dict[bar_dur] = 1
-                else:
-                    dur_dict[bar_dur] += 1
-        try:
-            ref_dur = max(dur_dict, key=dur_dict.get)
-        except:
-            pass  # use last ref_dur
-        if i == 0:
-            prefix_left_barline = line.split('[V:')[0]
-        else:
-            prefix_left_barline = ''
-        for symbol in part_symbol_list:
-            if symbol in line_bar_dict.keys():
-                symbol_bartext = line_bar_dict[symbol]
-            else:
-                if symbol in z_symbol_list:
-                    symbol_bartext = prefix_left_barline + 'z' + str(ref_dur) + right_barline
-                elif symbol in x_symbol_list:
-                    symbol_bartext = prefix_left_barline + 'x' + str(ref_dur) + right_barline
-            unreduced_line += '[' + symbol + ']' + symbol_bartext
-        unreduced_tunebody_lines.append(unreduced_line + '\n')
-    unreduced_lines = metadata_lines + unreduced_tunebody_lines
-    return unreduced_lines
-def inference_patch(period, composer, instrumentation):
-    prompt_lines = [
-        '%' + period + '\n',
-        '%' + composer + '\n',
-        '%' + instrumentation + '\n']
-    while True:
-        failure_flag = False
-        bos_patch = [patchilizer.bos_token_id] * (PATCH_SIZE - 1) + [patchilizer.eos_token_id]
-        start_time = time.time()
-        prompt_patches = patchilizer.patchilize_metadata(prompt_lines)
-        byte_list = list(''.join(prompt_lines))
-        context_tunebody_byte_list = []
-        metadata_byte_list = []
-        print(''.join(byte_list), end='')
-        prompt_patches = [[ord(c) for c in patch] + [patchilizer.special_token_id] * (PATCH_SIZE - len(patch)) for patch
-                          in prompt_patches]
-        prompt_patches.insert(0, bos_patch)
-        input_patches = torch.tensor(prompt_patches, device=device).reshape(1, -1)
-        end_flag = False
-        cut_index = None
-        tunebody_flag = False
-        with torch.inference_mode():
-            while True:
-                with torch.autocast(device_type='cuda', dtype=torch.float16):
-                    predicted_patch = model.generate(input_patches.unsqueeze(0),
-                                                     top_k=TOP_K,
-                                                     top_p=TOP_P,
-                                                     temperature=TEMPERATURE)
-                if not tunebody_flag and patchilizer.decode([predicted_patch]).startswith(
-                        '[r:'):  # 初次进入tunebody，必须以[r:0/开头
-                    tunebody_flag = True
-                    r0_patch = torch.tensor([ord(c) for c in '[r:0/']).unsqueeze(0).to(device)
-                    temp_input_patches = torch.concat([input_patches, r0_patch], axis=-1)
-                    predicted_patch = model.generate(temp_input_patches.unsqueeze(0),
-                                                     top_k=TOP_K,
-                                                     top_p=TOP_P,
-                                                     temperature=TEMPERATURE)
-                    predicted_patch = [ord(c) for c in '[r:0/'] + predicted_patch
-                if predicted_patch[0] == patchilizer.bos_token_id and predicted_patch[1] == patchilizer.eos_token_id:
-                    end_flag = True
-                    break
-                next_patch = patchilizer.decode([predicted_patch])
-                for char in next_patch:
-                    byte_list.append(char)
-                    if tunebody_flag:
-                        context_tunebody_byte_list.append(char)
-                    else:
-                        metadata_byte_list.append(char)
-                    print(char, end='')
-                patch_end_flag = False
-                for j in range(len(predicted_patch)):
-                    if patch_end_flag:
-                        predicted_patch[j] = patchilizer.special_token_id
-                    if predicted_patch[j] == patchilizer.eos_token_id:
-                        patch_end_flag = True
-                predicted_patch = torch.tensor([predicted_patch], device=device)  # (1, 16)
-                input_patches = torch.cat([input_patches, predicted_patch], dim=1)  # (1, 16 * patch_len)
-                if len(byte_list) > 102400:
-                    failure_flag = True
-                    break
-                if time.time() - start_time > 10 * 60:
-                    failure_flag = True
-                    break
-                if input_patches.shape[1] >= PATCH_LENGTH * PATCH_SIZE and not end_flag:
-                    print('Stream generating...')
-                    metadata = ''.join(metadata_byte_list)
-                    context_tunebody = ''.join(context_tunebody_byte_list)
-                    if '\n' not in context_tunebody:
-                        break  # Generated content is all metadata, abandon
-                    context_tunebody_lines = context_tunebody.strip().split('\n')
-                    if not context_tunebody.endswith('\n'):
-                        context_tunebody_lines = [context_tunebody_lines[i] + '\n' for i in
-                                                  range(len(context_tunebody_lines) - 1)] + [context_tunebody_lines[-1]]
-                    else:
-                        context_tunebody_lines = [context_tunebody_lines[i] + '\n' for i in
-                                                  range(len(context_tunebody_lines))]
-                    cut_index = len(context_tunebody_lines) // 2
-                    abc_code_slice = metadata + ''.join(context_tunebody_lines[-cut_index:])
-                    input_patches = patchilizer.encode_generate(abc_code_slice)
-                    input_patches = [item for sublist in input_patches for item in sublist]
-                    input_patches = torch.tensor([input_patches], device=device)
-                    input_patches = input_patches.reshape(1, -1)
-                    context_tunebody_byte_list = list(''.join(context_tunebody_lines[-cut_index:]))
-            if not failure_flag:
-                abc_text = ''.join(byte_list)
-                # unreduce
-                abc_lines = abc_text.split('\n')
-                abc_lines = list(filter(None, abc_lines))
-                abc_lines = [line + '\n' for line in abc_lines]
-                try:
-                    unreduced_abc_lines = rest_unreduce(abc_lines)
-                except:
-                    failure_flag = True
-                    pass
-                else:
-                    unreduced_abc_lines = [line for line in unreduced_abc_lines if
-                                           not (line.startswith('%') and not line.startswith('%%'))]
-                    unreduced_abc_lines = ['X:1\n'] + unreduced_abc_lines
-                    unreduced_abc_text = ''.join(unreduced_abc_lines)
-                    return unreduced_abc_text
-if __name__ == '__main__':
-    inference_patch('Classical', 'Beethoven, Ludwig van', 'Orchestral')

+import os
+import time
+import torch
+import re
+import difflib
+from utils import *
+from config import *
+from transformers import GPT2Config
+from abctoolkit.utils import Exclaim_re, Quote_re, SquareBracket_re, Barline_regexPattern
+from abctoolkit.transpose import Note_list, Pitch_sign_list
+from abctoolkit.duration import calculate_bartext_duration
+import requests
+import torch
+from huggingface_hub import hf_hub_download
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+Note_list = Note_list + ['z', 'x']
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+else:
+    device = torch.device("cpu")
+patchilizer = Patchilizer()
+patch_config = GPT2Config(num_hidden_layers=PATCH_NUM_LAYERS,
+                          max_length=PATCH_LENGTH,
+                          max_position_embeddings=PATCH_LENGTH,
+                          n_embd=HIDDEN_SIZE,
+                          num_attention_heads=HIDDEN_SIZE // 64,
+                          vocab_size=1)
+byte_config = GPT2Config(num_hidden_layers=CHAR_NUM_LAYERS,
+                         max_length=PATCH_SIZE + 1,
+                         max_position_embeddings=PATCH_SIZE + 1,
+                         hidden_size=HIDDEN_SIZE,
+                         num_attention_heads=HIDDEN_SIZE // 64,
+                         vocab_size=128)
+model = NotaGenLMHeadModel(encoder_config=patch_config, decoder_config=byte_config).to(device)
+def download_model_weights():
+    weights_path = "weights_notagenx_p_size_16_p_length_1024_p_layers_20_h_size_1280.pth"
+    local_weights_path = os.path.join(os.getcwd(), weights_path)
+    # Check if weights already exist locally
+    if os.path.exists(local_weights_path):
+        logger.info(f"Model weights already exist at {local_weights_path}")
+        return local_weights_path
+    logger.info("Downloading model weights from HuggingFace Hub...")
+    try:
+        # Download from HuggingFace
+        downloaded_path = hf_hub_download(
+            repo_id="ElectricAlexis/NotaGen",
+            filename=weights_path,
+            local_dir=os.getcwd(),
+            local_dir_use_symlinks=False
+        )
+        logger.info(f"Model weights downloaded successfully to {downloaded_path}")
+        return downloaded_path
+    except Exception as e:
+        logger.error(f"Error downloading model weights: {str(e)}")
+        raise RuntimeError(f"Failed to download model weights: {str(e)}")
+def prepare_model_for_kbit_training(model, use_gradient_checkpointing=True):
+    """
+    Prepare model for k-bit training.
+    Features include:
+    1. Convert model to mixed precision (FP16).
+    2. Disable unnecessary gradient computations.
+    3. Enable gradient checkpointing (optional).
+    """
+    # Convert model to mixed precision
+    model = model.to(dtype=torch.float16)
+    # Disable gradients for embedding layers
+    for param in model.parameters():
+        if param.dtype == torch.float32:
+            param.requires_grad = False
+    # Enable gradient checkpointing
+    if use_gradient_checkpointing:
+        model.gradient_checkpointing_enable()
+    return model
+model = prepare_model_for_kbit_training(
+    model,
+    use_gradient_checkpointing=False
+)
+print("Parameter Number: " + str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
+# Download weights at startup
+model_weights_path = download_model_weights()
+checkpoint = torch.load(model_weights_path, weights_only=True, map_location=torch.device(device))
+model.load_state_dict(checkpoint['model'], strict=False)
+model = model.to(device)
+model.eval()
+def postprocess_inst_names(abc_text):
+    with open('standard_inst_names.txt', 'r', encoding='utf-8') as f:
+        standard_instruments_list = [line.strip() for line in f if line.strip()]
+    with open('instrument_mapping.json', 'r', encoding='utf-8') as f:
+        instrument_mapping = json.load(f)
+    abc_lines = abc_text.split('\n')
+    abc_lines = list(filter(None, abc_lines))
+    abc_lines = [line + '\n' for line in abc_lines]
+    for i, line in enumerate(abc_lines):
+        if line.startswith('V:') and 'nm=' in line:
+            match = re.search(r'nm="([^"]*)"', line)
+            if match:
+                inst_name = match.group(1)
+                # Check if the instrument name is already standard
+                if inst_name in standard_instruments_list:
+                    continue
+                # Find the most similar key in instrument_mapping
+                matching_key = difflib.get_close_matches(inst_name, list(instrument_mapping.keys()), n=1, cutoff=0.6)
+                if matching_key:
+                    # Replace the instrument name with the standardized version
+                    replacement = instrument_mapping[matching_key[0]]
+                    new_line = line.replace(f'nm="{inst_name}"', f'nm="{replacement}"')
+                    abc_lines[i] = new_line
+    # Combine the lines back into a single string
+    processed_abc_text = ''.join(abc_lines)
+    return processed_abc_text
+def complete_brackets(s):
+    stack = []
+    bracket_map = {'{': '}', '[': ']', '(': ')'}
+    # Iterate through each character, handle bracket matching
+    for char in s:
+        if char in bracket_map:
+            stack.append(char)
+        elif char in bracket_map.values():
+            # Find the corresponding left bracket
+            for key, value in bracket_map.items():
+                if value == char:
+                    if stack and stack[-1] == key:
+                        stack.pop()
+                    break  # Found matching right bracket, process next character
+    # Complete missing right brackets (in reverse order of remaining left brackets in stack)
+    completion = ''.join(bracket_map[c] for c in reversed(stack))
+    return s + completion
+def rest_unreduce(abc_lines):
+    tunebody_index = None
+    for i in range(len(abc_lines)):
+        if abc_lines[i].startswith('%%score'):
+            abc_lines[i] = complete_brackets(abc_lines[i])
+        if '[V:' in abc_lines[i]:
+            tunebody_index = i
+            break
+    metadata_lines = abc_lines[: tunebody_index]
+    tunebody_lines = abc_lines[tunebody_index:]
+    part_symbol_list = []
+    voice_group_list = []
+    for line in metadata_lines:
+        if line.startswith('%%score'):
+            for round_bracket_match in re.findall(r'\((.*?)\)', line):
+                voice_group_list.append(round_bracket_match.split())
+            existed_voices = [item for sublist in voice_group_list for item in sublist]
+        if line.startswith('V:'):
+            symbol = line.split()[0]
+            part_symbol_list.append(symbol)
+            if symbol[2:] not in existed_voices:
+                voice_group_list.append([symbol[2:]])
+    z_symbol_list = []  # voices that use z as rest
+    x_symbol_list = []  # voices that use x as rest
+    for voice_group in voice_group_list:
+        z_symbol_list.append('V:' + voice_group[0])
+        for j in range(1, len(voice_group)):
+            x_symbol_list.append('V:' + voice_group[j])
+    part_symbol_list.sort(key=lambda x: int(x[2:]))
+    unreduced_tunebody_lines = []
+    for i, line in enumerate(tunebody_lines):
+        unreduced_line = ''
+        line = re.sub(r'^\[r:[^\]]*\]', '', line)
+        pattern = r'\[V:(\d+)\](.*?)(?=\[V:|$)'
+        matches = re.findall(pattern, line)
+        line_bar_dict = {}
+        for match in matches:
+            key = f'V:{match[0]}'
+            value = match[1]
+            line_bar_dict[key] = value
+        # calculate duration and collect barline
+        dur_dict = {}
+        for symbol, bartext in line_bar_dict.items():
+            right_barline = ''.join(re.split(Barline_regexPattern, bartext)[-2:])
+            bartext = bartext[:-len(right_barline)]
+            try:
+                bar_dur = calculate_bartext_duration(bartext)
+            except:
+                bar_dur = None
+            if bar_dur is not None:
+                if bar_dur not in dur_dict.keys():
+                    dur_dict[bar_dur] = 1
+                else:
+                    dur_dict[bar_dur] += 1
+        try:
+            ref_dur = max(dur_dict, key=dur_dict.get)
+        except:
+            pass  # use last ref_dur
+        if i == 0:
+            prefix_left_barline = line.split('[V:')[0]
+        else:
+            prefix_left_barline = ''
+        for symbol in part_symbol_list:
+            if symbol in line_bar_dict.keys():
+                symbol_bartext = line_bar_dict[symbol]
+            else:
+                if symbol in z_symbol_list:
+                    symbol_bartext = prefix_left_barline + 'z' + str(ref_dur) + right_barline
+                elif symbol in x_symbol_list:
+                    symbol_bartext = prefix_left_barline + 'x' + str(ref_dur) + right_barline
+            unreduced_line += '[' + symbol + ']' + symbol_bartext
+        unreduced_tunebody_lines.append(unreduced_line + '\n')
+    unreduced_lines = metadata_lines + unreduced_tunebody_lines
+    return unreduced_lines
+def inference_patch(period, composer, instrumentation):
+    prompt_lines = [
+        '%' + period + '\n',
+        '%' + composer + '\n',
+        '%' + instrumentation + '\n']
+    while True:
+        failure_flag = False
+        bos_patch = [patchilizer.bos_token_id] * (PATCH_SIZE - 1) + [patchilizer.eos_token_id]
+        start_time = time.time()
+        prompt_patches = patchilizer.patchilize_metadata(prompt_lines)
+        byte_list = list(''.join(prompt_lines))
+        context_tunebody_byte_list = []
+        metadata_byte_list = []
+        print(''.join(byte_list), end='')
+        prompt_patches = [[ord(c) for c in patch] + [patchilizer.special_token_id] * (PATCH_SIZE - len(patch)) for patch
+                          in prompt_patches]
+        prompt_patches.insert(0, bos_patch)
+        input_patches = torch.tensor(prompt_patches, device=device).reshape(1, -1)
+        end_flag = False
+        cut_index = None
+        tunebody_flag = False
+        with torch.inference_mode():
+            while True:
+                with torch.autocast(device_type='cuda', dtype=torch.float16):
+                    predicted_patch = model.generate(input_patches.unsqueeze(0),
+                                                     top_k=TOP_K,
+                                                     top_p=TOP_P,
+                                                     temperature=TEMPERATURE)
+                if not tunebody_flag and patchilizer.decode([predicted_patch]).startswith(
+                        '[r:'):  # 初次进入tunebody，必须以[r:0/开头
+                    tunebody_flag = True
+                    r0_patch = torch.tensor([ord(c) for c in '[r:0/']).unsqueeze(0).to(device)
+                    temp_input_patches = torch.concat([input_patches, r0_patch], axis=-1)
+                    predicted_patch = model.generate(temp_input_patches.unsqueeze(0),
+                                                     top_k=TOP_K,
+                                                     top_p=TOP_P,
+                                                     temperature=TEMPERATURE)
+                    predicted_patch = [ord(c) for c in '[r:0/'] + predicted_patch
+                if predicted_patch[0] == patchilizer.bos_token_id and predicted_patch[1] == patchilizer.eos_token_id:
+                    end_flag = True
+                    break
+                next_patch = patchilizer.decode([predicted_patch])
+                for char in next_patch:
+                    byte_list.append(char)
+                    if tunebody_flag:
+                        context_tunebody_byte_list.append(char)
+                    else:
+                        metadata_byte_list.append(char)
+                    print(char, end='')
+                patch_end_flag = False
+                for j in range(len(predicted_patch)):
+                    if patch_end_flag:
+                        predicted_patch[j] = patchilizer.special_token_id
+                    if predicted_patch[j] == patchilizer.eos_token_id:
+                        patch_end_flag = True
+                predicted_patch = torch.tensor([predicted_patch], device=device)  # (1, 16)
+                input_patches = torch.cat([input_patches, predicted_patch], dim=1)  # (1, 16 * patch_len)
+                if len(byte_list) > 102400:
+                    failure_flag = True
+                    break
+                if time.time() - start_time > 10 * 60:
+                    failure_flag = True
+                    break
+                if input_patches.shape[1] >= PATCH_LENGTH * PATCH_SIZE and not end_flag:
+                    print('Stream generating...')
+                    metadata = ''.join(metadata_byte_list)
+                    context_tunebody = ''.join(context_tunebody_byte_list)
+                    if '\n' not in context_tunebody:
+                        break  # Generated content is all metadata, abandon
+                    context_tunebody_lines = context_tunebody.strip().split('\n')
+                    if not context_tunebody.endswith('\n'):
+                        context_tunebody_lines = [context_tunebody_lines[i] + '\n' for i in
+                                                  range(len(context_tunebody_lines) - 1)] + [context_tunebody_lines[-1]]
+                    else:
+                        context_tunebody_lines = [context_tunebody_lines[i] + '\n' for i in
+                                                  range(len(context_tunebody_lines))]
+                    cut_index = len(context_tunebody_lines) // 2
+                    abc_code_slice = metadata + ''.join(context_tunebody_lines[-cut_index:])
+                    input_patches = patchilizer.encode_generate(abc_code_slice)
+                    input_patches = [item for sublist in input_patches for item in sublist]
+                    input_patches = torch.tensor([input_patches], device=device)
+                    input_patches = input_patches.reshape(1, -1)
+                    context_tunebody_byte_list = list(''.join(context_tunebody_lines[-cut_index:]))
+            if not failure_flag:
+                abc_text = ''.join(byte_list)
+                # unreduce
+                abc_lines = abc_text.split('\n')
+                abc_lines = list(filter(None, abc_lines))
+                abc_lines = [line + '\n' for line in abc_lines]
+                try:
+                    unreduced_abc_lines = rest_unreduce(abc_lines)
+                except:
+                    failure_flag = True
+                    pass
+                else:
+                    unreduced_abc_lines = [line for line in unreduced_abc_lines if
+                                           not (line.startswith('%') and not line.startswith('%%'))]
+                    unreduced_abc_lines = ['X:1\n'] + unreduced_abc_lines
+                    unreduced_abc_text = ''.join(unreduced_abc_lines)
+                    return unreduced_abc_text
+if __name__ == '__main__':
+    inference_patch('Classical', 'Beethoven, Ludwig van', 'Orchestral')