Spaces:
Running
Running
from human_eval.data import read_problems, write_jsonl, stream_jsonl | |
import glob | |
from tqdm import tqdm | |
import argparse | |
parser = argparse.ArgumentParser() | |
# Inputs | |
parser.add_argument( | |
'--path', | |
type=str, | |
help="") | |
parser.add_argument( | |
'--out_path', | |
type=str, | |
help="") | |
parser.add_argument( | |
'--add_prompt', | |
action='store_true', | |
help='') | |
args = parser.parse_args() | |
files = sorted(glob.glob(args.path + '/*.jsonl')) | |
print("{} files in {}".format(len(files), args.path)) | |
problems = read_problems() | |
output = [] | |
a = 0 | |
for code_file in tqdm(files, total=len(files)): | |
codes = [c for c in stream_jsonl(code_file)] | |
if args.add_prompt: | |
for code in codes: | |
task_id = code['task_id'] | |
prompt = problems[task_id]['prompt'] | |
completion = code['completion'] | |
completion = completion.replace("\r", "") | |
if '```python' in completion: | |
def_line = completion.index('```python') | |
completion = completion[def_line:].strip() | |
completion = completion.replace('```python', '') | |
# print(completion) | |
try: | |
next_line = completion.index('```') | |
completion = completion[:next_line].strip() | |
except: | |
a += 1 | |
print(completion) | |
print("================\n") | |
# print(completion) | |
if "__name__ == \"__main__\"" in completion: | |
next_line = completion.index('if __name__ == "__main__":') | |
completion = completion[:next_line].strip() | |
# print(completion) | |
if "# Example usage" in completion: | |
# print(completion) | |
next_line = completion.index('# Example usage') | |
completion = completion[:next_line].strip() | |
code['completion'] = completion | |
output += codes | |
print("save to {}".format(args.out_path)) | |
write_jsonl(args.out_path, output) | |
print(a) |