diff --git "a/analysis.ipynb" "b/analysis.ipynb" new file mode 100644--- /dev/null +++ "b/analysis.ipynb" @@ -0,0 +1,13389 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "a1e514e1-c921-4fdb-a877-fef7a22d73cd", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import pandas as pd\n", + "\n", + "# ───────────────────────── Парсер ──────────────────────────\n", + "def parse_smart_log(path_or_str, top_n=15):\n", + " \"\"\"Принимает путь к .txt‑файлу или сам текст лога, \n", + " отдаёт DataFrame со шагами, dev/test/gap и всеми гиперпараметрами.\"\"\"\n", + " \n", + " # читаем либо из файла, либо из уже переданной строки\n", + " if '\\n' in path_or_str or 'Шаг' in path_or_str:\n", + " lines = path_or_str.splitlines()\n", + " else:\n", + " with open(path_or_str, encoding='utf-8') as f:\n", + " lines = f.readlines()\n", + "\n", + " rows, current = [], {}\n", + "\n", + " step_re = re.compile(\n", + " r\"Шаг\\s+(\\d+):\\s*([^=]+?)=\\s*\\((.*?)\\)\"\n", + " )\n", + " mean_re = re.compile(r\"MEAN\\s*=\\s*([0-9.]+)\")\n", + " gap_re = re.compile(r\"GAP\\s*=\\s*([+-]?[0-9.]+)\")\n", + "\n", + " for i, raw in enumerate(lines):\n", + " line = raw.rstrip(\"\\n\")\n", + " \n", + " # ── 1. ищем строку «Шаг N: …» ───────────────────\n", + " m = step_re.search(line)\n", + " if m:\n", + " # если предыдущий step уже набрал все метрики — сохраняем\n", + " if current.get('dev') and current.get('test'):\n", + " current.setdefault('gap', round(current['test'] - current['dev'], 4))\n", + " rows.append(current)\n", + " # начинаем новый шаг\n", + " current = {'step': int(m.group(1))}\n", + " \n", + " keys = [k.strip() for k in m.group(2).split('+')]\n", + " raw_vals = re.findall(r\"'[^']*'|[^,]+\", m.group(3))\n", + " vals = [v.strip().strip(\"'\") for v in raw_vals]\n", + " for k, v in zip(keys, vals):\n", + " try:\n", + " current[k] = eval(v) # превращаем 0.001 → float, 8 → int\n", + " except Exception:\n", + " current[k] = v # если это строка без кавычек\n", + " \n", + " # ── 2. «Результаты (DEV):» ───────────────────────\n", + " if \"Результаты (DEV):\" in line:\n", + " for j in range(i + 1, len(lines)):\n", + " m = mean_re.search(lines[j])\n", + " if m:\n", + " current['dev'] = float(m.group(1))\n", + " break\n", + " \n", + " # ── 3. «Результаты (TEST):» + GAP ────────────────\n", + " if \"Результаты (TEST):\" in line:\n", + " for j in range(i + 1, len(lines)):\n", + " m = mean_re.search(lines[j])\n", + " if m:\n", + " current['test'] = float(m.group(1))\n", + " break\n", + " for j in range(i + 1, len(lines)):\n", + " g = gap_re.search(lines[j])\n", + " if g:\n", + " current['gap'] = float(g.group(1))\n", + " break\n", + "\n", + " # не забываем «добавить хвост»\n", + " if current.get('dev') and current.get('test'):\n", + " current.setdefault('gap', round(current['test'] - current['dev'], 4))\n", + " rows.append(current)\n", + "\n", + " df = pd.DataFrame(rows)\n", + " if not df.empty:\n", + " df = df.sort_values('test', ascending=False)\n", + " if top_n is not None:\n", + " df = df.head(top_n)\n", + " df = df.reset_index(drop=True)\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d40405fe-1159-4d73-94ff-1084124840a1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodevtestgap
0370.000184adamplateau0.050.59040.57560.0148
1380.000184adamplateau0.100.59040.57560.0148
2470.000185adamhuggingface_cosine_with_restarts0.050.58770.57710.0106
3400.000184adamhuggingface_cosine_with_restarts0.100.58560.57510.0105
4640.0001165adamhuggingface_cosine_with_restarts0.100.58300.57050.0126
5450.000185adamplateau0.050.58140.5825-0.0011
6460.000185adamplateau0.100.58140.5825-0.0011
730.001084sgdhuggingface_cosine_with_restarts0.050.58100.57880.0022
840.001084sgdhuggingface_cosine_with_restarts0.100.58010.57700.0031
9480.000185adamhuggingface_cosine_with_restarts0.100.57980.57310.0066
10210.0010164adamplateau0.050.57970.56860.0111
11220.0010164adamplateau0.100.57970.56860.0111
12620.0001165adamplateau0.100.57950.57260.0069
13610.0001165adamplateau0.050.57950.57260.0069
14530.0001164adamplateau0.050.57850.56780.0107
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dev test gap\n", + "0 37 0.0001 8 4 adam plateau 0.05 0.5904 0.5756 0.0148\n", + "1 38 0.0001 8 4 adam plateau 0.10 0.5904 0.5756 0.0148\n", + "2 47 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.5877 0.5771 0.0106\n", + "3 40 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.5856 0.5751 0.0105\n", + "4 64 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.5830 0.5705 0.0126\n", + "5 45 0.0001 8 5 adam plateau 0.05 0.5814 0.5825 -0.0011\n", + "6 46 0.0001 8 5 adam plateau 0.10 0.5814 0.5825 -0.0011\n", + "7 3 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.05 0.5810 0.5788 0.0022\n", + "8 4 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.10 0.5801 0.5770 0.0031\n", + "9 48 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.5798 0.5731 0.0066\n", + "10 21 0.0010 16 4 adam plateau 0.05 0.5797 0.5686 0.0111\n", + "11 22 0.0010 16 4 adam plateau 0.10 0.5797 0.5686 0.0111\n", + "12 62 0.0001 16 5 adam plateau 0.10 0.5795 0.5726 0.0069\n", + "13 61 0.0001 16 5 adam plateau 0.05 0.5795 0.5726 0.0069\n", + "14 53 0.0001 16 4 adam plateau 0.05 0.5785 0.5678 0.0107" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/10.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(15))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "248d2c65-2222-44b5-a83e-20e1c2048ba4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodevtestgap
0560.0001164adamhuggingface_cosine_with_restarts0.100.58910.57770.0114
130.001084sgdhuggingface_cosine_with_restarts0.050.58530.58230.0030
2530.0001164adamplateau0.050.58450.57740.0072
3540.0001164adamplateau0.100.58450.57740.0072
4300.0010165adamplateau0.100.58390.56940.0145
5290.0010165adamplateau0.050.58390.56940.0145
6620.0001165adamplateau0.100.58380.57750.0063
7610.0001165adamplateau0.050.58380.57750.0063
8380.000184adamplateau0.100.58340.5836-0.0002
9370.000184adamplateau0.050.58340.5836-0.0002
10400.000184adamhuggingface_cosine_with_restarts0.100.58220.56630.0159
11190.0010164sgdhuggingface_cosine_with_restarts0.050.58220.58030.0019
12210.0010164adamplateau0.050.58050.57400.0065
13220.0010164adamplateau0.100.58050.57400.0065
14200.0010164sgdhuggingface_cosine_with_restarts0.100.58030.57060.0097
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dev test gap\n", + "0 56 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.5891 0.5777 0.0114\n", + "1 3 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.05 0.5853 0.5823 0.0030\n", + "2 53 0.0001 16 4 adam plateau 0.05 0.5845 0.5774 0.0072\n", + "3 54 0.0001 16 4 adam plateau 0.10 0.5845 0.5774 0.0072\n", + "4 30 0.0010 16 5 adam plateau 0.10 0.5839 0.5694 0.0145\n", + "5 29 0.0010 16 5 adam plateau 0.05 0.5839 0.5694 0.0145\n", + "6 62 0.0001 16 5 adam plateau 0.10 0.5838 0.5775 0.0063\n", + "7 61 0.0001 16 5 adam plateau 0.05 0.5838 0.5775 0.0063\n", + "8 38 0.0001 8 4 adam plateau 0.10 0.5834 0.5836 -0.0002\n", + "9 37 0.0001 8 4 adam plateau 0.05 0.5834 0.5836 -0.0002\n", + "10 40 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.5822 0.5663 0.0159\n", + "11 19 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.05 0.5822 0.5803 0.0019\n", + "12 21 0.0010 16 4 adam plateau 0.05 0.5805 0.5740 0.0065\n", + "13 22 0.0010 16 4 adam plateau 0.10 0.5805 0.5740 0.0065\n", + "14 20 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.10 0.5803 0.5706 0.0097" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/20.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(15))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7e06dfea-d6cc-479b-8113-3b0140840db8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodevtestgap
0640.0001165adamhuggingface_cosine_with_restarts0.100.58120.57120.0100
1470.000185adamhuggingface_cosine_with_restarts0.050.58110.57100.0101
2480.000185adamhuggingface_cosine_with_restarts0.100.58000.57220.0078
3200.0010164sgdhuggingface_cosine_with_restarts0.100.57910.57240.0067
4270.0010165sgdhuggingface_cosine_with_restarts0.050.57900.57460.0044
5190.0010164sgdhuggingface_cosine_with_restarts0.050.57830.57000.0083
640.001084sgdhuggingface_cosine_with_restarts0.100.57740.56540.0120
7540.0001164adamplateau0.100.57730.56970.0075
8530.0001164adamplateau0.050.57730.56970.0075
9380.000184adamplateau0.100.57610.57370.0024
10370.000184adamplateau0.050.57610.57370.0024
1130.001084sgdhuggingface_cosine_with_restarts0.050.57590.57360.0023
12630.0001165adamhuggingface_cosine_with_restarts0.050.57570.56490.0108
13290.0010165adamplateau0.050.57540.56650.0090
14300.0010165adamplateau0.100.57540.56650.0090
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dev test gap\n", + "0 64 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.5812 0.5712 0.0100\n", + "1 47 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.5811 0.5710 0.0101\n", + "2 48 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.5800 0.5722 0.0078\n", + "3 20 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.10 0.5791 0.5724 0.0067\n", + "4 27 0.0010 16 5 sgd huggingface_cosine_with_restarts 0.05 0.5790 0.5746 0.0044\n", + "5 19 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.05 0.5783 0.5700 0.0083\n", + "6 4 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.10 0.5774 0.5654 0.0120\n", + "7 54 0.0001 16 4 adam plateau 0.10 0.5773 0.5697 0.0075\n", + "8 53 0.0001 16 4 adam plateau 0.05 0.5773 0.5697 0.0075\n", + "9 38 0.0001 8 4 adam plateau 0.10 0.5761 0.5737 0.0024\n", + "10 37 0.0001 8 4 adam plateau 0.05 0.5761 0.5737 0.0024\n", + "11 3 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.05 0.5759 0.5736 0.0023\n", + "12 63 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.5757 0.5649 0.0108\n", + "13 29 0.0010 16 5 adam plateau 0.05 0.5754 0.5665 0.0090\n", + "14 30 0.0010 16 5 adam plateau 0.10 0.5754 0.5665 0.0090" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/30.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(15))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c3f7929b-5279-4490-84e6-f0e4309f769d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodevtestgap
0370.000184adamplateau0.050.58270.57340.0093
1380.000184adamplateau0.100.58270.57340.0093
2560.0001164adamhuggingface_cosine_with_restarts0.100.58210.55890.0232
330.001084sgdhuggingface_cosine_with_restarts0.050.58130.58010.0012
4390.000184adamhuggingface_cosine_with_restarts0.050.58100.57160.0094
5170.0010164sgdplateau0.050.58060.57570.0049
6180.0010164sgdplateau0.100.58060.57570.0049
7630.0001165adamhuggingface_cosine_with_restarts0.050.57980.57670.0031
8280.0010165sgdhuggingface_cosine_with_restarts0.100.57970.57100.0087
9190.0010164sgdhuggingface_cosine_with_restarts0.050.57950.57310.0065
1040.001084sgdhuggingface_cosine_with_restarts0.100.57910.56810.0110
11200.0010164sgdhuggingface_cosine_with_restarts0.100.57900.56820.0109
1210.001084sgdplateau0.050.57780.56740.0104
1320.001084sgdplateau0.100.57780.56740.0104
14640.0001165adamhuggingface_cosine_with_restarts0.100.57550.56700.0085
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dev test gap\n", + "0 37 0.0001 8 4 adam plateau 0.05 0.5827 0.5734 0.0093\n", + "1 38 0.0001 8 4 adam plateau 0.10 0.5827 0.5734 0.0093\n", + "2 56 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.5821 0.5589 0.0232\n", + "3 3 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.05 0.5813 0.5801 0.0012\n", + "4 39 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.5810 0.5716 0.0094\n", + "5 17 0.0010 16 4 sgd plateau 0.05 0.5806 0.5757 0.0049\n", + "6 18 0.0010 16 4 sgd plateau 0.10 0.5806 0.5757 0.0049\n", + "7 63 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.5798 0.5767 0.0031\n", + "8 28 0.0010 16 5 sgd huggingface_cosine_with_restarts 0.10 0.5797 0.5710 0.0087\n", + "9 19 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.05 0.5795 0.5731 0.0065\n", + "10 4 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.10 0.5791 0.5681 0.0110\n", + "11 20 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.10 0.5790 0.5682 0.0109\n", + "12 1 0.0010 8 4 sgd plateau 0.05 0.5778 0.5674 0.0104\n", + "13 2 0.0010 8 4 sgd plateau 0.10 0.5778 0.5674 0.0104\n", + "14 64 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.5755 0.5670 0.0085" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/40.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(15))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "70da89eb-18e6-4795-8b83-1116fe1fa968", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodevtestgap
0560.0001164adamhuggingface_cosine_with_restarts0.100.59190.57730.0146
1400.000184adamhuggingface_cosine_with_restarts0.100.59110.57710.0140
2540.0001164adamplateau0.100.58790.57740.0105
3530.0001164adamplateau0.050.58790.57740.0105
4370.000184adamplateau0.050.58730.57220.0152
5380.000184adamplateau0.100.58730.57220.0152
6180.0010164sgdplateau0.100.58610.57610.0100
7170.0010164sgdplateau0.050.58610.57610.0100
8190.0010164sgdhuggingface_cosine_with_restarts0.050.58380.58260.0012
9620.0001165adamplateau0.100.58370.57320.0105
10610.0001165adamplateau0.050.58370.57320.0105
11550.0001164adamhuggingface_cosine_with_restarts0.050.58360.56920.0144
12390.000184adamhuggingface_cosine_with_restarts0.050.58340.57050.0129
1340.001084sgdhuggingface_cosine_with_restarts0.100.58260.57950.0031
1430.001084sgdhuggingface_cosine_with_restarts0.050.58250.57930.0032
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dev test gap\n", + "0 56 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.5919 0.5773 0.0146\n", + "1 40 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.5911 0.5771 0.0140\n", + "2 54 0.0001 16 4 adam plateau 0.10 0.5879 0.5774 0.0105\n", + "3 53 0.0001 16 4 adam plateau 0.05 0.5879 0.5774 0.0105\n", + "4 37 0.0001 8 4 adam plateau 0.05 0.5873 0.5722 0.0152\n", + "5 38 0.0001 8 4 adam plateau 0.10 0.5873 0.5722 0.0152\n", + "6 18 0.0010 16 4 sgd plateau 0.10 0.5861 0.5761 0.0100\n", + "7 17 0.0010 16 4 sgd plateau 0.05 0.5861 0.5761 0.0100\n", + "8 19 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.05 0.5838 0.5826 0.0012\n", + "9 62 0.0001 16 5 adam plateau 0.10 0.5837 0.5732 0.0105\n", + "10 61 0.0001 16 5 adam plateau 0.05 0.5837 0.5732 0.0105\n", + "11 55 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.5836 0.5692 0.0144\n", + "12 39 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.5834 0.5705 0.0129\n", + "13 4 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.10 0.5826 0.5795 0.0031\n", + "14 3 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.05 0.5825 0.5793 0.0032" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/50.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(15))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "6614a37d-344e-46a5-b5ba-e49d4010027a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratioweight_decaydropoutdevtestgap
0270.000185adamplateau0.050.00.200.58500.57680.0082
1330.000185adamplateau0.100.00.200.58500.57680.0082
2320.000185adamplateau0.100.00.150.58270.57320.0095
3260.000185adamplateau0.050.00.150.58270.57320.0095
4490.0001164adamplateau0.050.00.100.58270.57090.0119
5550.0001164adamplateau0.100.00.100.58270.57090.0119
6360.000185adamplateau0.100.10.200.58200.57170.0104
7300.000185adamplateau0.050.10.200.58200.57170.0104
8290.000185adamplateau0.050.10.150.58150.57190.0096
9350.000185adamplateau0.100.10.150.58150.57190.0096
10510.0001164adamplateau0.050.00.200.58100.56940.0116
11570.0001164adamplateau0.100.00.200.58100.56940.0116
12680.0001164adamhuggingface_cosine_with_restarts0.100.00.150.58060.56680.0138
1330.000184adamplateau0.050.00.200.57970.57090.0088
1490.000184adamplateau0.100.00.200.57970.57090.0088
1580.000184adamplateau0.100.00.150.57910.56690.0123
1620.000184adamplateau0.050.00.150.57910.56690.0123
17610.0001164adamhuggingface_cosine_with_restarts0.050.00.100.57900.56640.0126
18310.000185adamplateau0.100.00.100.57880.56950.0093
19250.000185adamplateau0.050.00.100.57880.56950.0093
20770.0001165adamplateau0.050.10.150.57850.57330.0051
21830.0001165adamplateau0.100.10.150.57850.57330.0051
22500.0001164adamplateau0.050.00.150.57790.56700.0109
23560.0001164adamplateau0.100.00.150.57790.56700.0109
24620.0001164adamhuggingface_cosine_with_restarts0.050.00.150.57770.56220.0155
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio weight_decay dropout dev test \\\n", + "0 27 0.0001 8 5 adam plateau 0.05 0.0 0.20 0.5850 0.5768 \n", + "1 33 0.0001 8 5 adam plateau 0.10 0.0 0.20 0.5850 0.5768 \n", + "2 32 0.0001 8 5 adam plateau 0.10 0.0 0.15 0.5827 0.5732 \n", + "3 26 0.0001 8 5 adam plateau 0.05 0.0 0.15 0.5827 0.5732 \n", + "4 49 0.0001 16 4 adam plateau 0.05 0.0 0.10 0.5827 0.5709 \n", + "5 55 0.0001 16 4 adam plateau 0.10 0.0 0.10 0.5827 0.5709 \n", + "6 36 0.0001 8 5 adam plateau 0.10 0.1 0.20 0.5820 0.5717 \n", + "7 30 0.0001 8 5 adam plateau 0.05 0.1 0.20 0.5820 0.5717 \n", + "8 29 0.0001 8 5 adam plateau 0.05 0.1 0.15 0.5815 0.5719 \n", + "9 35 0.0001 8 5 adam plateau 0.10 0.1 0.15 0.5815 0.5719 \n", + "10 51 0.0001 16 4 adam plateau 0.05 0.0 0.20 0.5810 0.5694 \n", + "11 57 0.0001 16 4 adam plateau 0.10 0.0 0.20 0.5810 0.5694 \n", + "12 68 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5806 0.5668 \n", + "13 3 0.0001 8 4 adam plateau 0.05 0.0 0.20 0.5797 0.5709 \n", + "14 9 0.0001 8 4 adam plateau 0.10 0.0 0.20 0.5797 0.5709 \n", + "15 8 0.0001 8 4 adam plateau 0.10 0.0 0.15 0.5791 0.5669 \n", + "16 2 0.0001 8 4 adam plateau 0.05 0.0 0.15 0.5791 0.5669 \n", + "17 61 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5790 0.5664 \n", + "18 31 0.0001 8 5 adam plateau 0.10 0.0 0.10 0.5788 0.5695 \n", + "19 25 0.0001 8 5 adam plateau 0.05 0.0 0.10 0.5788 0.5695 \n", + "20 77 0.0001 16 5 adam plateau 0.05 0.1 0.15 0.5785 0.5733 \n", + "21 83 0.0001 16 5 adam plateau 0.10 0.1 0.15 0.5785 0.5733 \n", + "22 50 0.0001 16 4 adam plateau 0.05 0.0 0.15 0.5779 0.5670 \n", + "23 56 0.0001 16 4 adam plateau 0.10 0.0 0.15 0.5779 0.5670 \n", + "24 62 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5777 0.5622 \n", + "\n", + " gap \n", + "0 0.0082 \n", + "1 0.0082 \n", + "2 0.0095 \n", + "3 0.0095 \n", + "4 0.0119 \n", + "5 0.0119 \n", + "6 0.0104 \n", + "7 0.0104 \n", + "8 0.0096 \n", + "9 0.0096 \n", + "10 0.0116 \n", + "11 0.0116 \n", + "12 0.0138 \n", + "13 0.0088 \n", + "14 0.0088 \n", + "15 0.0123 \n", + "16 0.0123 \n", + "17 0.0126 \n", + "18 0.0093 \n", + "19 0.0093 \n", + "20 0.0051 \n", + "21 0.0051 \n", + "22 0.0109 \n", + "23 0.0109 \n", + "24 0.0155 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/60.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7f6b722c-c134-45ec-9cf0-b4b4f8eb0c3f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratioweight_decaydropoutdevtestgap
0430.000185adamhuggingface_cosine_with_restarts0.100.00.100.59350.57710.0164
1910.0001165adamhuggingface_cosine_with_restarts0.100.00.100.59020.56800.0221
2730.0001165adamplateau0.050.00.100.59000.56750.0224
3790.0001165adamplateau0.100.00.100.59000.56750.0224
4810.0001165adamplateau0.100.00.200.58580.57200.0138
5750.0001165adamplateau0.050.00.200.58580.57200.0138
6800.0001165adamplateau0.100.00.150.58560.57140.0141
7740.0001165adamplateau0.050.00.150.58560.57140.0141
8850.0001165adamhuggingface_cosine_with_restarts0.050.00.100.58480.56370.0211
9250.000185adamplateau0.050.00.100.58440.56900.0154
10310.000185adamplateau0.100.00.100.58440.56900.0154
11870.0001165adamhuggingface_cosine_with_restarts0.050.00.200.58370.56400.0197
12930.0001165adamhuggingface_cosine_with_restarts0.100.00.200.58340.56530.0180
13860.0001165adamhuggingface_cosine_with_restarts0.050.00.150.58320.56440.0189
14920.0001165adamhuggingface_cosine_with_restarts0.100.00.150.58190.56400.0180
15390.000185adamhuggingface_cosine_with_restarts0.050.00.200.58140.57290.0085
16450.000185adamhuggingface_cosine_with_restarts0.100.00.200.58120.57510.0060
17370.000185adamhuggingface_cosine_with_restarts0.050.00.100.58070.57170.0090
18320.000185adamplateau0.100.00.150.58020.56520.0150
19260.000185adamplateau0.050.00.150.58020.56520.0150
20270.000185adamplateau0.050.00.200.57930.56330.0160
21330.000185adamplateau0.100.00.200.57930.56330.0160
2290.000184adamplateau0.100.00.200.57830.56460.0137
2330.000184adamplateau0.050.00.200.57830.56460.0137
24490.0001164adamplateau0.050.00.100.57830.56190.0164
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio weight_decay dropout dev test \\\n", + "0 43 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.10 0.5935 0.5771 \n", + "1 91 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.10 0.5902 0.5680 \n", + "2 73 0.0001 16 5 adam plateau 0.05 0.0 0.10 0.5900 0.5675 \n", + "3 79 0.0001 16 5 adam plateau 0.10 0.0 0.10 0.5900 0.5675 \n", + "4 81 0.0001 16 5 adam plateau 0.10 0.0 0.20 0.5858 0.5720 \n", + "5 75 0.0001 16 5 adam plateau 0.05 0.0 0.20 0.5858 0.5720 \n", + "6 80 0.0001 16 5 adam plateau 0.10 0.0 0.15 0.5856 0.5714 \n", + "7 74 0.0001 16 5 adam plateau 0.05 0.0 0.15 0.5856 0.5714 \n", + "8 85 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5848 0.5637 \n", + "9 25 0.0001 8 5 adam plateau 0.05 0.0 0.10 0.5844 0.5690 \n", + "10 31 0.0001 8 5 adam plateau 0.10 0.0 0.10 0.5844 0.5690 \n", + "11 87 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5837 0.5640 \n", + "12 93 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5834 0.5653 \n", + "13 86 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5832 0.5644 \n", + "14 92 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5819 0.5640 \n", + "15 39 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5814 0.5729 \n", + "16 45 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5812 0.5751 \n", + "17 37 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5807 0.5717 \n", + "18 32 0.0001 8 5 adam plateau 0.10 0.0 0.15 0.5802 0.5652 \n", + "19 26 0.0001 8 5 adam plateau 0.05 0.0 0.15 0.5802 0.5652 \n", + "20 27 0.0001 8 5 adam plateau 0.05 0.0 0.20 0.5793 0.5633 \n", + "21 33 0.0001 8 5 adam plateau 0.10 0.0 0.20 0.5793 0.5633 \n", + "22 9 0.0001 8 4 adam plateau 0.10 0.0 0.20 0.5783 0.5646 \n", + "23 3 0.0001 8 4 adam plateau 0.05 0.0 0.20 0.5783 0.5646 \n", + "24 49 0.0001 16 4 adam plateau 0.05 0.0 0.10 0.5783 0.5619 \n", + "\n", + " gap \n", + "0 0.0164 \n", + "1 0.0221 \n", + "2 0.0224 \n", + "3 0.0224 \n", + "4 0.0138 \n", + "5 0.0138 \n", + "6 0.0141 \n", + "7 0.0141 \n", + "8 0.0211 \n", + "9 0.0154 \n", + "10 0.0154 \n", + "11 0.0197 \n", + "12 0.0180 \n", + "13 0.0189 \n", + "14 0.0180 \n", + "15 0.0085 \n", + "16 0.0060 \n", + "17 0.0090 \n", + "18 0.0150 \n", + "19 0.0150 \n", + "20 0.0160 \n", + "21 0.0160 \n", + "22 0.0137 \n", + "23 0.0137 \n", + "24 0.0164 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/70.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1d4db04e-ee0f-4c2c-b0d2-45edbf2128dd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratioweight_decaydropoutdevtestgap
020.000184adamplateau0.050.00.150.57350.56750.0060
180.000184adamplateau0.100.00.150.57350.56750.0060
230.000184adamplateau0.050.00.200.57340.56410.0094
390.000184adamplateau0.100.00.200.57340.56410.0094
4750.0001165adamplateau0.050.00.200.57230.57080.0015
5810.0001165adamplateau0.100.00.200.57230.57080.0015
6200.000184adamhuggingface_cosine_with_restarts0.100.00.150.57150.56870.0028
770.000184adamplateau0.100.00.100.57120.57060.0006
810.000184adamplateau0.050.00.100.57120.57060.0006
9740.0001165adamplateau0.050.00.150.57110.56790.0032
10800.0001165adamplateau0.100.00.150.57110.56790.0032
11630.0001164adamhuggingface_cosine_with_restarts0.050.00.200.57040.56120.0092
12330.000185adamplateau0.100.00.200.57030.56590.0044
13270.000185adamplateau0.050.00.200.57030.56590.0044
14450.000185adamhuggingface_cosine_with_restarts0.100.00.200.56990.56070.0092
15390.000185adamhuggingface_cosine_with_restarts0.050.00.200.56970.56870.0010
16210.000184adamhuggingface_cosine_with_restarts0.100.00.200.56920.55400.0152
17500.0001164adamplateau0.050.00.150.56870.56380.0048
18560.0001164adamplateau0.100.00.150.56870.56380.0048
19850.0001165adamhuggingface_cosine_with_restarts0.050.00.100.56860.56700.0016
20150.000184adamhuggingface_cosine_with_restarts0.050.00.200.56850.56290.0056
21680.0001164adamhuggingface_cosine_with_restarts0.100.00.150.56760.56170.0059
22920.0001165adamhuggingface_cosine_with_restarts0.100.00.150.56740.56380.0036
23140.000184adamhuggingface_cosine_with_restarts0.050.00.150.56730.56610.0011
24510.0001164adamplateau0.050.00.200.56710.56330.0038
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio weight_decay dropout dev test \\\n", + "0 2 0.0001 8 4 adam plateau 0.05 0.0 0.15 0.5735 0.5675 \n", + "1 8 0.0001 8 4 adam plateau 0.10 0.0 0.15 0.5735 0.5675 \n", + "2 3 0.0001 8 4 adam plateau 0.05 0.0 0.20 0.5734 0.5641 \n", + "3 9 0.0001 8 4 adam plateau 0.10 0.0 0.20 0.5734 0.5641 \n", + "4 75 0.0001 16 5 adam plateau 0.05 0.0 0.20 0.5723 0.5708 \n", + "5 81 0.0001 16 5 adam plateau 0.10 0.0 0.20 0.5723 0.5708 \n", + "6 20 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5715 0.5687 \n", + "7 7 0.0001 8 4 adam plateau 0.10 0.0 0.10 0.5712 0.5706 \n", + "8 1 0.0001 8 4 adam plateau 0.05 0.0 0.10 0.5712 0.5706 \n", + "9 74 0.0001 16 5 adam plateau 0.05 0.0 0.15 0.5711 0.5679 \n", + "10 80 0.0001 16 5 adam plateau 0.10 0.0 0.15 0.5711 0.5679 \n", + "11 63 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5704 0.5612 \n", + "12 33 0.0001 8 5 adam plateau 0.10 0.0 0.20 0.5703 0.5659 \n", + "13 27 0.0001 8 5 adam plateau 0.05 0.0 0.20 0.5703 0.5659 \n", + "14 45 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5699 0.5607 \n", + "15 39 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5697 0.5687 \n", + "16 21 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5692 0.5540 \n", + "17 50 0.0001 16 4 adam plateau 0.05 0.0 0.15 0.5687 0.5638 \n", + "18 56 0.0001 16 4 adam plateau 0.10 0.0 0.15 0.5687 0.5638 \n", + "19 85 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5686 0.5670 \n", + "20 15 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5685 0.5629 \n", + "21 68 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5676 0.5617 \n", + "22 92 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5674 0.5638 \n", + "23 14 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5673 0.5661 \n", + "24 51 0.0001 16 4 adam plateau 0.05 0.0 0.20 0.5671 0.5633 \n", + "\n", + " gap \n", + "0 0.0060 \n", + "1 0.0060 \n", + "2 0.0094 \n", + "3 0.0094 \n", + "4 0.0015 \n", + "5 0.0015 \n", + "6 0.0028 \n", + "7 0.0006 \n", + "8 0.0006 \n", + "9 0.0032 \n", + "10 0.0032 \n", + "11 0.0092 \n", + "12 0.0044 \n", + "13 0.0044 \n", + "14 0.0092 \n", + "15 0.0010 \n", + "16 0.0152 \n", + "17 0.0048 \n", + "18 0.0048 \n", + "19 0.0016 \n", + "20 0.0056 \n", + "21 0.0059 \n", + "22 0.0036 \n", + "23 0.0011 \n", + "24 0.0038 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/80.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ff64fbf5-cea1-4ee1-b7dc-d415ed7de9e1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratioweight_decaydropoutdevtestgap
0200.000184adamhuggingface_cosine_with_restarts0.100.00.150.58810.57130.0168
1570.0001164adamplateau0.100.00.200.58460.56720.0174
2510.0001164adamplateau0.050.00.200.58460.56720.0174
3140.000184adamhuggingface_cosine_with_restarts0.050.00.150.58330.56610.0172
4210.000184adamhuggingface_cosine_with_restarts0.100.00.200.58260.56990.0127
5150.000184adamhuggingface_cosine_with_restarts0.050.00.200.58190.56400.0179
6130.000184adamhuggingface_cosine_with_restarts0.050.00.100.58060.55870.0219
720.000184adamplateau0.050.00.150.58000.56860.0113
880.000184adamplateau0.100.00.150.58000.56860.0113
970.000184adamplateau0.100.00.100.57950.56930.0103
1010.000184adamplateau0.050.00.100.57950.56930.0103
11500.0001164adamplateau0.050.00.150.57870.56620.0125
12560.0001164adamplateau0.100.00.150.57870.56620.0125
13450.000185adamhuggingface_cosine_with_restarts0.100.00.200.57750.56370.0138
14550.0001164adamplateau0.100.00.100.57660.57170.0049
15490.0001164adamplateau0.050.00.100.57660.57170.0049
16610.0001164adamhuggingface_cosine_with_restarts0.050.00.100.57600.56200.0140
17810.0001165adamplateau0.100.00.200.57490.56840.0066
18750.0001165adamplateau0.050.00.200.57490.56840.0066
19310.000185adamplateau0.100.00.100.57450.56590.0086
20250.000185adamplateau0.050.00.100.57450.56590.0086
21370.000185adamhuggingface_cosine_with_restarts0.050.00.100.57450.57120.0034
22440.000185adamhuggingface_cosine_with_restarts0.100.00.150.57400.5744-0.0004
23190.000184adamhuggingface_cosine_with_restarts0.100.00.100.57360.56060.0130
24630.0001164adamhuggingface_cosine_with_restarts0.050.00.200.57340.56240.0110
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio weight_decay dropout dev test \\\n", + "0 20 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5881 0.5713 \n", + "1 57 0.0001 16 4 adam plateau 0.10 0.0 0.20 0.5846 0.5672 \n", + "2 51 0.0001 16 4 adam plateau 0.05 0.0 0.20 0.5846 0.5672 \n", + "3 14 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5833 0.5661 \n", + "4 21 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5826 0.5699 \n", + "5 15 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5819 0.5640 \n", + "6 13 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5806 0.5587 \n", + "7 2 0.0001 8 4 adam plateau 0.05 0.0 0.15 0.5800 0.5686 \n", + "8 8 0.0001 8 4 adam plateau 0.10 0.0 0.15 0.5800 0.5686 \n", + "9 7 0.0001 8 4 adam plateau 0.10 0.0 0.10 0.5795 0.5693 \n", + "10 1 0.0001 8 4 adam plateau 0.05 0.0 0.10 0.5795 0.5693 \n", + "11 50 0.0001 16 4 adam plateau 0.05 0.0 0.15 0.5787 0.5662 \n", + "12 56 0.0001 16 4 adam plateau 0.10 0.0 0.15 0.5787 0.5662 \n", + "13 45 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5775 0.5637 \n", + "14 55 0.0001 16 4 adam plateau 0.10 0.0 0.10 0.5766 0.5717 \n", + "15 49 0.0001 16 4 adam plateau 0.05 0.0 0.10 0.5766 0.5717 \n", + "16 61 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5760 0.5620 \n", + "17 81 0.0001 16 5 adam plateau 0.10 0.0 0.20 0.5749 0.5684 \n", + "18 75 0.0001 16 5 adam plateau 0.05 0.0 0.20 0.5749 0.5684 \n", + "19 31 0.0001 8 5 adam plateau 0.10 0.0 0.10 0.5745 0.5659 \n", + "20 25 0.0001 8 5 adam plateau 0.05 0.0 0.10 0.5745 0.5659 \n", + "21 37 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5745 0.5712 \n", + "22 44 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5740 0.5744 \n", + "23 19 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.10 0.5736 0.5606 \n", + "24 63 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5734 0.5624 \n", + "\n", + " gap \n", + "0 0.0168 \n", + "1 0.0174 \n", + "2 0.0174 \n", + "3 0.0172 \n", + "4 0.0127 \n", + "5 0.0179 \n", + "6 0.0219 \n", + "7 0.0113 \n", + "8 0.0113 \n", + "9 0.0103 \n", + "10 0.0103 \n", + "11 0.0125 \n", + "12 0.0125 \n", + "13 0.0138 \n", + "14 0.0049 \n", + "15 0.0049 \n", + "16 0.0140 \n", + "17 0.0066 \n", + "18 0.0066 \n", + "19 0.0086 \n", + "20 0.0086 \n", + "21 0.0034 \n", + "22 -0.0004 \n", + "23 0.0130 \n", + "24 0.0110 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/90.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "00b62770-f38f-405c-9c5c-630d4afd7d26", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratioweight_decaydropoutdevtestgap
0490.0001164adamplateau0.050.00.100.58600.57020.0158
1550.0001164adamplateau0.100.00.100.58600.57020.0158
2560.0001164adamplateau0.100.00.150.58270.57710.0056
3500.0001164adamplateau0.050.00.150.58270.57710.0056
420.000184adamplateau0.050.00.150.58160.57810.0034
580.000184adamplateau0.100.00.150.58160.57810.0034
6510.0001164adamplateau0.050.00.200.58050.57200.0085
7570.0001164adamplateau0.100.00.200.58050.57200.0085
870.000184adamplateau0.100.00.100.57930.57150.0079
910.000184adamplateau0.050.00.100.57930.57150.0079
10790.0001165adamplateau0.100.00.100.57890.56920.0097
11730.0001165adamplateau0.050.00.100.57890.56920.0097
1230.000184adamplateau0.050.00.200.57820.57340.0048
1390.000184adamplateau0.100.00.200.57820.57340.0048
14450.000185adamhuggingface_cosine_with_restarts0.100.00.200.57800.57770.0003
15390.000185adamhuggingface_cosine_with_restarts0.050.00.200.57780.57510.0026
16630.0001164adamhuggingface_cosine_with_restarts0.050.00.200.57670.57180.0048
17620.0001164adamhuggingface_cosine_with_restarts0.050.00.150.57660.57080.0059
18740.0001165adamplateau0.050.00.150.57560.56750.0080
19800.0001165adamplateau0.100.00.150.57560.56750.0080
20870.0001165adamhuggingface_cosine_with_restarts0.050.00.200.57540.56850.0069
21930.0001165adamhuggingface_cosine_with_restarts0.100.00.200.57520.57440.0009
22670.0001164adamhuggingface_cosine_with_restarts0.100.00.100.57400.56940.0045
23850.0001165adamhuggingface_cosine_with_restarts0.050.00.100.57370.57070.0030
24140.000184adamhuggingface_cosine_with_restarts0.050.00.150.57360.55650.0171
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio weight_decay dropout dev test \\\n", + "0 49 0.0001 16 4 adam plateau 0.05 0.0 0.10 0.5860 0.5702 \n", + "1 55 0.0001 16 4 adam plateau 0.10 0.0 0.10 0.5860 0.5702 \n", + "2 56 0.0001 16 4 adam plateau 0.10 0.0 0.15 0.5827 0.5771 \n", + "3 50 0.0001 16 4 adam plateau 0.05 0.0 0.15 0.5827 0.5771 \n", + "4 2 0.0001 8 4 adam plateau 0.05 0.0 0.15 0.5816 0.5781 \n", + "5 8 0.0001 8 4 adam plateau 0.10 0.0 0.15 0.5816 0.5781 \n", + "6 51 0.0001 16 4 adam plateau 0.05 0.0 0.20 0.5805 0.5720 \n", + "7 57 0.0001 16 4 adam plateau 0.10 0.0 0.20 0.5805 0.5720 \n", + "8 7 0.0001 8 4 adam plateau 0.10 0.0 0.10 0.5793 0.5715 \n", + "9 1 0.0001 8 4 adam plateau 0.05 0.0 0.10 0.5793 0.5715 \n", + "10 79 0.0001 16 5 adam plateau 0.10 0.0 0.10 0.5789 0.5692 \n", + "11 73 0.0001 16 5 adam plateau 0.05 0.0 0.10 0.5789 0.5692 \n", + "12 3 0.0001 8 4 adam plateau 0.05 0.0 0.20 0.5782 0.5734 \n", + "13 9 0.0001 8 4 adam plateau 0.10 0.0 0.20 0.5782 0.5734 \n", + "14 45 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5780 0.5777 \n", + "15 39 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5778 0.5751 \n", + "16 63 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5767 0.5718 \n", + "17 62 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5766 0.5708 \n", + "18 74 0.0001 16 5 adam plateau 0.05 0.0 0.15 0.5756 0.5675 \n", + "19 80 0.0001 16 5 adam plateau 0.10 0.0 0.15 0.5756 0.5675 \n", + "20 87 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5754 0.5685 \n", + "21 93 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5752 0.5744 \n", + "22 67 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.10 0.5740 0.5694 \n", + "23 85 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5737 0.5707 \n", + "24 14 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5736 0.5565 \n", + "\n", + " gap \n", + "0 0.0158 \n", + "1 0.0158 \n", + "2 0.0056 \n", + "3 0.0056 \n", + "4 0.0034 \n", + "5 0.0034 \n", + "6 0.0085 \n", + "7 0.0085 \n", + "8 0.0079 \n", + "9 0.0079 \n", + "10 0.0097 \n", + "11 0.0097 \n", + "12 0.0048 \n", + "13 0.0048 \n", + "14 0.0003 \n", + "15 0.0026 \n", + "16 0.0048 \n", + "17 0.0059 \n", + "18 0.0080 \n", + "19 0.0080 \n", + "20 0.0069 \n", + "21 0.0009 \n", + "22 0.0045 \n", + "23 0.0030 \n", + "24 0.0171 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/100.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb9186a9-f0bc-406d-b2c3-724d7b5f9d43", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f9fbc1c-ab41-4dca-bbad-6a0eadd28f7f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75806125-04ac-4e18-968e-4632b92d1d16", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ac6d74a-3625-4c46-b1a1-7d70a7ef5446", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99e64f6f-eeb4-4f1f-9bd0-b2cb230dc1da", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "467b04df-a408-4808-941d-5b0f2ebbf217", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodropoutdevtestgap
050.000184adamhuggingface_cosine_with_restarts0.050.100.57100.55750.0135
1140.000185adamhuggingface_cosine_with_restarts0.050.150.57070.56700.0037
2130.000185adamhuggingface_cosine_with_restarts0.050.100.56980.56500.0048
390.000185adamplateau0.050.100.56910.55650.0126
4110.000185adamplateau0.100.100.56910.55650.0126
5270.0001165adamplateau0.100.100.56900.56780.0011
6250.0001165adamplateau0.050.100.56900.56780.0011
770.000184adamhuggingface_cosine_with_restarts0.100.100.56710.55740.0097
8320.0001165adamhuggingface_cosine_with_restarts0.100.150.56640.56340.0031
9240.0001164adamhuggingface_cosine_with_restarts0.100.150.56630.56290.0034
10290.0001165adamhuggingface_cosine_with_restarts0.050.100.56620.55900.0072
11310.0001165adamhuggingface_cosine_with_restarts0.100.100.56610.56050.0056
12280.0001165adamplateau0.100.150.56560.5668-0.0012
13260.0001165adamplateau0.050.150.56560.5668-0.0012
14160.000185adamhuggingface_cosine_with_restarts0.100.150.56460.55520.0094
15150.000185adamhuggingface_cosine_with_restarts0.100.100.56400.55960.0044
16300.0001165adamhuggingface_cosine_with_restarts0.050.150.56380.55880.0050
17120.000185adamplateau0.100.150.56370.55340.0102
18100.000185adamplateau0.050.150.56370.55340.0102
1980.000184adamhuggingface_cosine_with_restarts0.100.150.56320.55610.0072
20210.0001164adamhuggingface_cosine_with_restarts0.050.100.56210.55220.0099
2120.000184adamplateau0.050.150.56150.55150.0100
2240.000184adamplateau0.100.150.56150.55150.0100
2360.000184adamhuggingface_cosine_with_restarts0.050.150.56130.55130.0100
2430.000184adamplateau0.100.100.56060.55250.0081
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dropout dev test gap\n", + "0 5 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5710 0.5575 0.0135\n", + "1 14 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5707 0.5670 0.0037\n", + "2 13 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5698 0.5650 0.0048\n", + "3 9 0.0001 8 5 adam plateau 0.05 0.10 0.5691 0.5565 0.0126\n", + "4 11 0.0001 8 5 adam plateau 0.10 0.10 0.5691 0.5565 0.0126\n", + "5 27 0.0001 16 5 adam plateau 0.10 0.10 0.5690 0.5678 0.0011\n", + "6 25 0.0001 16 5 adam plateau 0.05 0.10 0.5690 0.5678 0.0011\n", + "7 7 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5671 0.5574 0.0097\n", + "8 32 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5664 0.5634 0.0031\n", + "9 24 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5663 0.5629 0.0034\n", + "10 29 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5662 0.5590 0.0072\n", + "11 31 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.10 0.5661 0.5605 0.0056\n", + "12 28 0.0001 16 5 adam plateau 0.10 0.15 0.5656 0.5668 -0.0012\n", + "13 26 0.0001 16 5 adam plateau 0.05 0.15 0.5656 0.5668 -0.0012\n", + "14 16 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5646 0.5552 0.0094\n", + "15 15 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.10 0.5640 0.5596 0.0044\n", + "16 30 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5638 0.5588 0.0050\n", + "17 12 0.0001 8 5 adam plateau 0.10 0.15 0.5637 0.5534 0.0102\n", + "18 10 0.0001 8 5 adam plateau 0.05 0.15 0.5637 0.5534 0.0102\n", + "19 8 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5632 0.5561 0.0072\n", + "20 21 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5621 0.5522 0.0099\n", + "21 2 0.0001 8 4 adam plateau 0.05 0.15 0.5615 0.5515 0.0100\n", + "22 4 0.0001 8 4 adam plateau 0.10 0.15 0.5615 0.5515 0.0100\n", + "23 6 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5613 0.5513 0.0100\n", + "24 3 0.0001 8 4 adam plateau 0.10 0.10 0.5606 0.5525 0.0081" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/bi/10.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "efba3e34-f64b-4962-9c8b-ef70294badb1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodropoutdevtestgap
070.000184adamhuggingface_cosine_with_restarts0.100.100.58040.56890.0116
1210.0001164adamhuggingface_cosine_with_restarts0.050.100.57910.57090.0082
2220.0001164adamhuggingface_cosine_with_restarts0.050.150.57390.56510.0088
3180.0001164adamplateau0.050.150.57370.56790.0058
4200.0001164adamplateau0.100.150.57370.56790.0058
5150.000185adamhuggingface_cosine_with_restarts0.100.100.57310.56380.0092
6130.000185adamhuggingface_cosine_with_restarts0.050.100.57310.56730.0058
750.000184adamhuggingface_cosine_with_restarts0.050.100.57300.56890.0041
880.000184adamhuggingface_cosine_with_restarts0.100.150.57240.56310.0093
9300.0001165adamhuggingface_cosine_with_restarts0.050.150.57190.57020.0017
1040.000184adamplateau0.100.150.57170.56620.0055
1120.000184adamplateau0.050.150.57170.56620.0055
12170.0001164adamplateau0.050.100.57120.56800.0032
13260.0001165adamplateau0.050.150.57120.56260.0086
14190.0001164adamplateau0.100.100.57120.56800.0032
15280.0001165adamplateau0.100.150.57120.56260.0086
1690.000185adamplateau0.050.100.57080.55330.0175
17110.000185adamplateau0.100.100.57080.55330.0175
18230.0001164adamhuggingface_cosine_with_restarts0.100.100.57060.5706-0.0001
19160.000185adamhuggingface_cosine_with_restarts0.100.150.57040.56490.0055
2060.000184adamhuggingface_cosine_with_restarts0.050.150.56970.56570.0041
21290.0001165adamhuggingface_cosine_with_restarts0.050.100.56960.56610.0035
22250.0001165adamplateau0.050.100.56950.56040.0092
23270.0001165adamplateau0.100.100.56950.56040.0092
24100.000185adamplateau0.050.150.56940.55250.0170
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dropout dev test gap\n", + "0 7 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5804 0.5689 0.0116\n", + "1 21 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5791 0.5709 0.0082\n", + "2 22 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5739 0.5651 0.0088\n", + "3 18 0.0001 16 4 adam plateau 0.05 0.15 0.5737 0.5679 0.0058\n", + "4 20 0.0001 16 4 adam plateau 0.10 0.15 0.5737 0.5679 0.0058\n", + "5 15 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.10 0.5731 0.5638 0.0092\n", + "6 13 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5731 0.5673 0.0058\n", + "7 5 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5730 0.5689 0.0041\n", + "8 8 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5724 0.5631 0.0093\n", + "9 30 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5719 0.5702 0.0017\n", + "10 4 0.0001 8 4 adam plateau 0.10 0.15 0.5717 0.5662 0.0055\n", + "11 2 0.0001 8 4 adam plateau 0.05 0.15 0.5717 0.5662 0.0055\n", + "12 17 0.0001 16 4 adam plateau 0.05 0.10 0.5712 0.5680 0.0032\n", + "13 26 0.0001 16 5 adam plateau 0.05 0.15 0.5712 0.5626 0.0086\n", + "14 19 0.0001 16 4 adam plateau 0.10 0.10 0.5712 0.5680 0.0032\n", + "15 28 0.0001 16 5 adam plateau 0.10 0.15 0.5712 0.5626 0.0086\n", + "16 9 0.0001 8 5 adam plateau 0.05 0.10 0.5708 0.5533 0.0175\n", + "17 11 0.0001 8 5 adam plateau 0.10 0.10 0.5708 0.5533 0.0175\n", + "18 23 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5706 0.5706 -0.0001\n", + "19 16 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5704 0.5649 0.0055\n", + "20 6 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5697 0.5657 0.0041\n", + "21 29 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5696 0.5661 0.0035\n", + "22 25 0.0001 16 5 adam plateau 0.05 0.10 0.5695 0.5604 0.0092\n", + "23 27 0.0001 16 5 adam plateau 0.10 0.10 0.5695 0.5604 0.0092\n", + "24 10 0.0001 8 5 adam plateau 0.05 0.15 0.5694 0.5525 0.0170" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/bi/20.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5c48bc72-cac8-4119-96ad-a1f09cfed996", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodropoutdevtestgap
0200.0001164adamplateau0.100.150.56960.56310.0065
1180.0001164adamplateau0.050.150.56960.56310.0065
2190.0001164adamplateau0.100.100.56540.56360.0017
3170.0001164adamplateau0.050.100.56540.56360.0017
4260.0001165adamplateau0.050.150.56190.55940.0025
5280.0001165adamplateau0.100.150.56190.55940.0025
6110.000185adamplateau0.100.100.56170.55490.0069
790.000185adamplateau0.050.100.56170.55490.0069
8150.000185adamhuggingface_cosine_with_restarts0.100.100.56090.55470.0062
9270.0001165adamplateau0.100.100.56070.55840.0023
10250.0001165adamplateau0.050.100.56070.55840.0023
11300.0001165adamhuggingface_cosine_with_restarts0.050.150.56010.5603-0.0001
12120.000185adamplateau0.100.150.55960.55510.0045
13100.000185adamplateau0.050.150.55960.55510.0045
14140.000185adamhuggingface_cosine_with_restarts0.050.150.55930.55670.0026
1510.000184adamplateau0.050.100.55890.55530.0036
1630.000184adamplateau0.100.100.55890.55530.0036
17160.000185adamhuggingface_cosine_with_restarts0.100.150.55880.55470.0041
1850.000184adamhuggingface_cosine_with_restarts0.050.100.55880.55510.0037
19130.000185adamhuggingface_cosine_with_restarts0.050.100.55860.55200.0066
20210.0001164adamhuggingface_cosine_with_restarts0.050.100.55830.5599-0.0017
21320.0001165adamhuggingface_cosine_with_restarts0.100.150.55780.5591-0.0012
2260.000184adamhuggingface_cosine_with_restarts0.050.150.55760.5588-0.0012
2380.000184adamhuggingface_cosine_with_restarts0.100.150.55740.55220.0052
24310.0001165adamhuggingface_cosine_with_restarts0.100.100.55660.55230.0043
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dropout dev test gap\n", + "0 20 0.0001 16 4 adam plateau 0.10 0.15 0.5696 0.5631 0.0065\n", + "1 18 0.0001 16 4 adam plateau 0.05 0.15 0.5696 0.5631 0.0065\n", + "2 19 0.0001 16 4 adam plateau 0.10 0.10 0.5654 0.5636 0.0017\n", + "3 17 0.0001 16 4 adam plateau 0.05 0.10 0.5654 0.5636 0.0017\n", + "4 26 0.0001 16 5 adam plateau 0.05 0.15 0.5619 0.5594 0.0025\n", + "5 28 0.0001 16 5 adam plateau 0.10 0.15 0.5619 0.5594 0.0025\n", + "6 11 0.0001 8 5 adam plateau 0.10 0.10 0.5617 0.5549 0.0069\n", + "7 9 0.0001 8 5 adam plateau 0.05 0.10 0.5617 0.5549 0.0069\n", + "8 15 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.10 0.5609 0.5547 0.0062\n", + "9 27 0.0001 16 5 adam plateau 0.10 0.10 0.5607 0.5584 0.0023\n", + "10 25 0.0001 16 5 adam plateau 0.05 0.10 0.5607 0.5584 0.0023\n", + "11 30 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5601 0.5603 -0.0001\n", + "12 12 0.0001 8 5 adam plateau 0.10 0.15 0.5596 0.5551 0.0045\n", + "13 10 0.0001 8 5 adam plateau 0.05 0.15 0.5596 0.5551 0.0045\n", + "14 14 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5593 0.5567 0.0026\n", + "15 1 0.0001 8 4 adam plateau 0.05 0.10 0.5589 0.5553 0.0036\n", + "16 3 0.0001 8 4 adam plateau 0.10 0.10 0.5589 0.5553 0.0036\n", + "17 16 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5588 0.5547 0.0041\n", + "18 5 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5588 0.5551 0.0037\n", + "19 13 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5586 0.5520 0.0066\n", + "20 21 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5583 0.5599 -0.0017\n", + "21 32 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5578 0.5591 -0.0012\n", + "22 6 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5576 0.5588 -0.0012\n", + "23 8 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5574 0.5522 0.0052\n", + "24 31 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.10 0.5566 0.5523 0.0043" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/bi/30.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a93f7c97-3e3c-4e6f-8bcb-454480aae5f8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodropoutdevtestgap
0190.0001164adamplateau0.100.100.58220.56710.0151
1170.0001164adamplateau0.050.100.58220.56710.0151
220.000184adamplateau0.050.150.57290.56430.0086
340.000184adamplateau0.100.150.57290.56430.0086
4200.0001164adamplateau0.100.150.57270.56010.0126
5180.0001164adamplateau0.050.150.57270.56010.0126
660.000184adamhuggingface_cosine_with_restarts0.050.150.56980.56260.0072
780.000184adamhuggingface_cosine_with_restarts0.100.150.56970.56230.0074
8110.000185adamplateau0.100.100.56910.56220.0069
990.000185adamplateau0.050.100.56910.56220.0069
10220.0001164adamhuggingface_cosine_with_restarts0.050.150.56810.56410.0039
11240.0001164adamhuggingface_cosine_with_restarts0.100.150.56760.56490.0027
12260.0001165adamplateau0.050.150.56750.56290.0045
13280.0001165adamplateau0.100.150.56750.56290.0045
1430.000184adamplateau0.100.100.56750.56130.0062
1510.000184adamplateau0.050.100.56750.56130.0062
1650.000184adamhuggingface_cosine_with_restarts0.050.100.56630.55750.0088
17250.0001165adamplateau0.050.100.56550.56180.0037
18270.0001165adamplateau0.100.100.56550.56180.0037
1970.000184adamhuggingface_cosine_with_restarts0.100.100.56440.56250.0019
20230.0001164adamhuggingface_cosine_with_restarts0.100.100.56410.5656-0.0015
21320.0001165adamhuggingface_cosine_with_restarts0.100.150.56340.55650.0069
22210.0001164adamhuggingface_cosine_with_restarts0.050.100.56320.5638-0.0006
23300.0001165adamhuggingface_cosine_with_restarts0.050.150.56270.56070.0020
24120.000185adamplateau0.100.150.56220.55850.0038
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dropout dev test gap\n", + "0 19 0.0001 16 4 adam plateau 0.10 0.10 0.5822 0.5671 0.0151\n", + "1 17 0.0001 16 4 adam plateau 0.05 0.10 0.5822 0.5671 0.0151\n", + "2 2 0.0001 8 4 adam plateau 0.05 0.15 0.5729 0.5643 0.0086\n", + "3 4 0.0001 8 4 adam plateau 0.10 0.15 0.5729 0.5643 0.0086\n", + "4 20 0.0001 16 4 adam plateau 0.10 0.15 0.5727 0.5601 0.0126\n", + "5 18 0.0001 16 4 adam plateau 0.05 0.15 0.5727 0.5601 0.0126\n", + "6 6 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5698 0.5626 0.0072\n", + "7 8 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5697 0.5623 0.0074\n", + "8 11 0.0001 8 5 adam plateau 0.10 0.10 0.5691 0.5622 0.0069\n", + "9 9 0.0001 8 5 adam plateau 0.05 0.10 0.5691 0.5622 0.0069\n", + "10 22 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5681 0.5641 0.0039\n", + "11 24 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5676 0.5649 0.0027\n", + "12 26 0.0001 16 5 adam plateau 0.05 0.15 0.5675 0.5629 0.0045\n", + "13 28 0.0001 16 5 adam plateau 0.10 0.15 0.5675 0.5629 0.0045\n", + "14 3 0.0001 8 4 adam plateau 0.10 0.10 0.5675 0.5613 0.0062\n", + "15 1 0.0001 8 4 adam plateau 0.05 0.10 0.5675 0.5613 0.0062\n", + "16 5 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5663 0.5575 0.0088\n", + "17 25 0.0001 16 5 adam plateau 0.05 0.10 0.5655 0.5618 0.0037\n", + "18 27 0.0001 16 5 adam plateau 0.10 0.10 0.5655 0.5618 0.0037\n", + "19 7 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5644 0.5625 0.0019\n", + "20 23 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5641 0.5656 -0.0015\n", + "21 32 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5634 0.5565 0.0069\n", + "22 21 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5632 0.5638 -0.0006\n", + "23 30 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5627 0.5607 0.0020\n", + "24 12 0.0001 8 5 adam plateau 0.10 0.15 0.5622 0.5585 0.0038" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/bi/40.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a488eb61-89a5-4c79-82a6-3980a5621fdd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodropoutdevtestgap
0190.0001164adamplateau0.100.100.57230.56760.0047
1170.0001164adamplateau0.050.100.57230.56760.0047
2200.0001164adamplateau0.100.150.57210.56910.0031
3180.0001164adamplateau0.050.150.57210.56910.0031
4300.0001165adamhuggingface_cosine_with_restarts0.050.150.57070.56540.0053
5210.0001164adamhuggingface_cosine_with_restarts0.050.100.57010.56300.0071
6290.0001165adamhuggingface_cosine_with_restarts0.050.100.57000.56950.0005
7240.0001164adamhuggingface_cosine_with_restarts0.100.150.56950.56290.0066
8270.0001165adamplateau0.100.100.56780.5712-0.0034
9250.0001165adamplateau0.050.100.56780.5712-0.0034
10130.000185adamhuggingface_cosine_with_restarts0.050.100.56640.55710.0092
1120.000184adamplateau0.050.150.56630.56300.0033
1240.000184adamplateau0.100.150.56630.56300.0033
13160.000185adamhuggingface_cosine_with_restarts0.100.150.56560.56300.0026
14110.000185adamplateau0.100.100.56500.55840.0066
1590.000185adamplateau0.050.100.56500.55840.0066
16230.0001164adamhuggingface_cosine_with_restarts0.100.100.56480.55950.0053
1750.000184adamhuggingface_cosine_with_restarts0.050.100.56470.55860.0061
18260.0001165adamplateau0.050.150.56430.55750.0067
19280.0001165adamplateau0.100.150.56430.55750.0067
2060.000184adamhuggingface_cosine_with_restarts0.050.150.56380.56080.0031
21140.000185adamhuggingface_cosine_with_restarts0.050.150.56330.56240.0009
22100.000185adamplateau0.050.150.56320.56030.0028
23120.000185adamplateau0.100.150.56320.56030.0028
24320.0001165adamhuggingface_cosine_with_restarts0.100.150.56320.5644-0.0012
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dropout dev test gap\n", + "0 19 0.0001 16 4 adam plateau 0.10 0.10 0.5723 0.5676 0.0047\n", + "1 17 0.0001 16 4 adam plateau 0.05 0.10 0.5723 0.5676 0.0047\n", + "2 20 0.0001 16 4 adam plateau 0.10 0.15 0.5721 0.5691 0.0031\n", + "3 18 0.0001 16 4 adam plateau 0.05 0.15 0.5721 0.5691 0.0031\n", + "4 30 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5707 0.5654 0.0053\n", + "5 21 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5701 0.5630 0.0071\n", + "6 29 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5700 0.5695 0.0005\n", + "7 24 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5695 0.5629 0.0066\n", + "8 27 0.0001 16 5 adam plateau 0.10 0.10 0.5678 0.5712 -0.0034\n", + "9 25 0.0001 16 5 adam plateau 0.05 0.10 0.5678 0.5712 -0.0034\n", + "10 13 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5664 0.5571 0.0092\n", + "11 2 0.0001 8 4 adam plateau 0.05 0.15 0.5663 0.5630 0.0033\n", + "12 4 0.0001 8 4 adam plateau 0.10 0.15 0.5663 0.5630 0.0033\n", + "13 16 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5656 0.5630 0.0026\n", + "14 11 0.0001 8 5 adam plateau 0.10 0.10 0.5650 0.5584 0.0066\n", + "15 9 0.0001 8 5 adam plateau 0.05 0.10 0.5650 0.5584 0.0066\n", + "16 23 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5648 0.5595 0.0053\n", + "17 5 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5647 0.5586 0.0061\n", + "18 26 0.0001 16 5 adam plateau 0.05 0.15 0.5643 0.5575 0.0067\n", + "19 28 0.0001 16 5 adam plateau 0.10 0.15 0.5643 0.5575 0.0067\n", + "20 6 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5638 0.5608 0.0031\n", + "21 14 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5633 0.5624 0.0009\n", + "22 10 0.0001 8 5 adam plateau 0.05 0.15 0.5632 0.5603 0.0028\n", + "23 12 0.0001 8 5 adam plateau 0.10 0.15 0.5632 0.5603 0.0028\n", + "24 32 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5632 0.5644 -0.0012" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/bi/50.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "1e7603d5-f30b-4304-b6b6-a59daf1f5356", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stepoptimizerlrweight_decaynum_transformer_headstr_layer_numberhidden_dimout_featuresdevtestgap
021adam0.000100.001652562560.58620.58200.0043
1262lion0.000010.01852565120.58470.57280.0120
2226lion0.000010.00852565120.58470.57300.0117
3128adam0.000010.011645125120.58450.57310.0114
492adam0.000010.001645125120.58400.57680.0072
5104adam0.000010.003245125120.58340.57430.0091
65adam0.000100.00842562560.58340.57400.0094
7127adam0.000010.011645122560.58330.57590.0074
880adam0.000010.00845125120.58260.57390.0087
9139adam0.000010.013245122560.58200.57360.0085
10103adam0.000010.003245122560.58200.57650.0055
1134adam0.000100.003252565120.58150.57880.0027
12108adam0.000010.003255125120.58140.57960.0018
13245lion0.000010.003242562560.58130.57580.0055
14233lion0.000010.001642562560.58120.57230.0088
1533adam0.000100.003252562560.58110.57920.0020
16281lion0.000010.013242562560.58080.57590.0050
17230lion0.000010.001632565120.58060.57140.0092
1849adam0.000100.011632562560.58050.57290.0076
19266lion0.000010.011632565120.58030.57180.0085
20116adam0.000010.01845125120.58020.57160.0085
219adam0.000100.00852562560.58010.57620.0039
22115adam0.000010.01845122560.57990.57190.0080
2322adam0.000100.001652565120.57980.57140.0084
2479adam0.000010.00845122560.57970.57590.0038
\n", + "
" + ], + "text/plain": [ + " step optimizer lr weight_decay num_transformer_heads tr_layer_number hidden_dim out_features dev test gap\n", + "0 21 adam 0.00010 0.00 16 5 256 256 0.5862 0.5820 0.0043\n", + "1 262 lion 0.00001 0.01 8 5 256 512 0.5847 0.5728 0.0120\n", + "2 226 lion 0.00001 0.00 8 5 256 512 0.5847 0.5730 0.0117\n", + "3 128 adam 0.00001 0.01 16 4 512 512 0.5845 0.5731 0.0114\n", + "4 92 adam 0.00001 0.00 16 4 512 512 0.5840 0.5768 0.0072\n", + "5 104 adam 0.00001 0.00 32 4 512 512 0.5834 0.5743 0.0091\n", + "6 5 adam 0.00010 0.00 8 4 256 256 0.5834 0.5740 0.0094\n", + "7 127 adam 0.00001 0.01 16 4 512 256 0.5833 0.5759 0.0074\n", + "8 80 adam 0.00001 0.00 8 4 512 512 0.5826 0.5739 0.0087\n", + "9 139 adam 0.00001 0.01 32 4 512 256 0.5820 0.5736 0.0085\n", + "10 103 adam 0.00001 0.00 32 4 512 256 0.5820 0.5765 0.0055\n", + "11 34 adam 0.00010 0.00 32 5 256 512 0.5815 0.5788 0.0027\n", + "12 108 adam 0.00001 0.00 32 5 512 512 0.5814 0.5796 0.0018\n", + "13 245 lion 0.00001 0.00 32 4 256 256 0.5813 0.5758 0.0055\n", + "14 233 lion 0.00001 0.00 16 4 256 256 0.5812 0.5723 0.0088\n", + "15 33 adam 0.00010 0.00 32 5 256 256 0.5811 0.5792 0.0020\n", + "16 281 lion 0.00001 0.01 32 4 256 256 0.5808 0.5759 0.0050\n", + "17 230 lion 0.00001 0.00 16 3 256 512 0.5806 0.5714 0.0092\n", + "18 49 adam 0.00010 0.01 16 3 256 256 0.5805 0.5729 0.0076\n", + "19 266 lion 0.00001 0.01 16 3 256 512 0.5803 0.5718 0.0085\n", + "20 116 adam 0.00001 0.01 8 4 512 512 0.5802 0.5716 0.0085\n", + "21 9 adam 0.00010 0.00 8 5 256 256 0.5801 0.5762 0.0039\n", + "22 115 adam 0.00001 0.01 8 4 512 256 0.5799 0.5719 0.0080\n", + "23 22 adam 0.00010 0.00 16 5 256 512 0.5798 0.5714 0.0084\n", + "24 79 adam 0.00001 0.00 8 4 512 256 0.5797 0.5759 0.0038" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/5862_адам лучший.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "22d74f6b-4d7f-4a1a-a98b-6754c286712d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrmomentumnum_transformer_headstr_layer_numberhidden_dimout_featuresdevtestgap
050.00100.51642562560.58620.58410.0021
180.00100.9842562560.58570.56820.0175
2230.00010.91642562560.58070.57640.0043
320.00100.5842562560.58010.57070.0094
4110.00100.91642562560.57910.57320.0059
5200.00010.9842562560.57500.57380.0013
6240.00010.91652562560.56990.5715-0.0016
730.00100.5852562560.56960.5705-0.0009
8100.00100.91632562560.56880.55940.0094
9120.00100.91652562560.56720.5726-0.0054
1090.00100.9852562560.56570.5705-0.0049
11210.00010.9852562560.56520.5696-0.0045
1260.00100.51652562560.56430.5718-0.0075
1370.00100.9832562560.56120.55860.0026
1440.00100.51632562560.55800.5624-0.0044
15140.00010.5842562560.55690.5644-0.0075
16170.00010.51642562560.55670.55310.0036
17150.00010.5852562560.55520.54990.0053
1810.00100.5832562560.54960.5565-0.0069
19180.00010.51652562560.54770.5486-0.0009
20130.00010.5832562560.53610.5440-0.0078
21190.00010.9832562560.53590.5513-0.0154
22220.00010.91632562560.53390.5474-0.0135
23160.00010.51632562560.53130.5505-0.0192
\n", + "
" + ], + "text/plain": [ + " step lr momentum num_transformer_heads tr_layer_number hidden_dim out_features dev test gap\n", + "0 5 0.0010 0.5 16 4 256 256 0.5862 0.5841 0.0021\n", + "1 8 0.0010 0.9 8 4 256 256 0.5857 0.5682 0.0175\n", + "2 23 0.0001 0.9 16 4 256 256 0.5807 0.5764 0.0043\n", + "3 2 0.0010 0.5 8 4 256 256 0.5801 0.5707 0.0094\n", + "4 11 0.0010 0.9 16 4 256 256 0.5791 0.5732 0.0059\n", + "5 20 0.0001 0.9 8 4 256 256 0.5750 0.5738 0.0013\n", + "6 24 0.0001 0.9 16 5 256 256 0.5699 0.5715 -0.0016\n", + "7 3 0.0010 0.5 8 5 256 256 0.5696 0.5705 -0.0009\n", + "8 10 0.0010 0.9 16 3 256 256 0.5688 0.5594 0.0094\n", + "9 12 0.0010 0.9 16 5 256 256 0.5672 0.5726 -0.0054\n", + "10 9 0.0010 0.9 8 5 256 256 0.5657 0.5705 -0.0049\n", + "11 21 0.0001 0.9 8 5 256 256 0.5652 0.5696 -0.0045\n", + "12 6 0.0010 0.5 16 5 256 256 0.5643 0.5718 -0.0075\n", + "13 7 0.0010 0.9 8 3 256 256 0.5612 0.5586 0.0026\n", + "14 4 0.0010 0.5 16 3 256 256 0.5580 0.5624 -0.0044\n", + "15 14 0.0001 0.5 8 4 256 256 0.5569 0.5644 -0.0075\n", + "16 17 0.0001 0.5 16 4 256 256 0.5567 0.5531 0.0036\n", + "17 15 0.0001 0.5 8 5 256 256 0.5552 0.5499 0.0053\n", + "18 1 0.0010 0.5 8 3 256 256 0.5496 0.5565 -0.0069\n", + "19 18 0.0001 0.5 16 5 256 256 0.5477 0.5486 -0.0009\n", + "20 13 0.0001 0.5 8 3 256 256 0.5361 0.5440 -0.0078\n", + "21 19 0.0001 0.9 8 3 256 256 0.5359 0.5513 -0.0154\n", + "22 22 0.0001 0.9 16 3 256 256 0.5339 0.5474 -0.0135\n", + "23 16 0.0001 0.5 16 3 256 256 0.5313 0.5505 -0.0192" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/sgd_2.txt\",25)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(25))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "35ae96f7-7ceb-46da-9411-ceba258a98ac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stepoptimizerlrnum_transformer_headstr_layer_numberhidden_dimout_featuresscheduler_typewarmup_ratiodevtestgap
068adam0.0000185256256plateau0.100.57680.5797-0.0029
167adam0.0000185256256plateau0.050.57680.5797-0.0029
292adam0.00001165256256plateau0.100.57420.5771-0.0028
391adam0.00001165256256plateau0.050.57420.5771-0.0028
465adam0.0000185256256huggingface_cosine_with_restarts0.050.57370.5766-0.0029
513adam0.0001084256512huggingface_cosine_with_restarts0.050.57490.5765-0.0015
634adam0.00010164256256huggingface_cosine_with_restarts0.100.57740.57600.0014
766adam0.0000185256256huggingface_cosine_with_restarts0.100.57650.57600.0005
833adam0.00010164256256huggingface_cosine_with_restarts0.050.57590.57570.0002
946adam0.00010165256512huggingface_cosine_with_restarts0.100.57840.57520.0032
1037adam0.00010164256512huggingface_cosine_with_restarts0.050.57620.57440.0018
1118adam0.0001085256256huggingface_cosine_with_restarts0.100.57300.57240.0006
1290adam0.00001165256256huggingface_cosine_with_restarts0.100.57280.57200.0008
1326adam0.00010163256256huggingface_cosine_with_restarts0.100.57810.57130.0068
1489adam0.00001165256256huggingface_cosine_with_restarts0.050.57100.5711-0.0001
1551adam0.0000183256256plateau0.050.57610.57010.0060
1652adam0.0000183256256plateau0.100.57610.57010.0060
179adam0.0001084256256huggingface_cosine_with_restarts0.050.57810.57000.0080
1819adam0.0001085256256plateau0.050.57430.56980.0045
1920adam0.0001085256256plateau0.100.57430.56980.0045
2017adam0.0001085256256huggingface_cosine_with_restarts0.050.56700.5697-0.0027
2173adam0.00001163256256huggingface_cosine_with_restarts0.050.57570.56970.0060
2244adam0.00010165256256plateau0.100.57790.56940.0085
2343adam0.00010165256256plateau0.050.57790.56940.0085
2482adam0.00001164256256huggingface_cosine_with_restarts0.100.57800.56930.0087
2581adam0.00001164256256huggingface_cosine_with_restarts0.050.57730.56920.0082
2641adam0.00010165256256huggingface_cosine_with_restarts0.050.57200.56890.0031
2716adam0.0001084256512plateau0.100.57830.56870.0096
2815adam0.0001084256512plateau0.050.57830.56870.0096
2942adam0.00010165256256huggingface_cosine_with_restarts0.100.57460.56860.0060
3085adam0.00001164256512huggingface_cosine_with_restarts0.050.57330.56850.0048
3138adam0.00010164256512huggingface_cosine_with_restarts0.100.57280.56820.0046
3294adam0.00001165256512huggingface_cosine_with_restarts0.100.57540.56760.0078
3370adam0.0000185256512huggingface_cosine_with_restarts0.100.57130.56750.0038
3486adam0.00001164256512huggingface_cosine_with_restarts0.100.57080.56750.0033
3550adam0.0000183256256huggingface_cosine_with_restarts0.100.57710.56740.0097
3630adam0.00010163256512huggingface_cosine_with_restarts0.100.57110.56740.0037
3758adam0.0000184256256huggingface_cosine_with_restarts0.100.57290.56730.0056
3848adam0.00010165256512plateau0.100.57150.56660.0049
3947adam0.00010165256512plateau0.050.57150.56660.0049
4035adam0.00010164256256plateau0.050.57670.56620.0105
4136adam0.00010164256256plateau0.100.57670.56620.0105
4210adam0.0001084256256huggingface_cosine_with_restarts0.100.57340.56610.0073
4383adam0.00001164256256plateau0.050.57420.56600.0081
4484adam0.00001164256256plateau0.100.57420.56600.0081
4557adam0.0000184256256huggingface_cosine_with_restarts0.050.57170.56580.0058
4675adam0.00001163256256plateau0.050.57170.56560.0061
4771adam0.0000185256512plateau0.050.57490.56560.0094
4872adam0.0000185256512plateau0.100.57490.56560.0094
4949adam0.0000183256256huggingface_cosine_with_restarts0.050.57860.56560.0130
\n", + "
" + ], + "text/plain": [ + " step optimizer lr num_transformer_heads tr_layer_number hidden_dim out_features scheduler_type warmup_ratio dev test \\\n", + "0 68 adam 0.00001 8 5 256 256 plateau 0.10 0.5768 0.5797 \n", + "1 67 adam 0.00001 8 5 256 256 plateau 0.05 0.5768 0.5797 \n", + "2 92 adam 0.00001 16 5 256 256 plateau 0.10 0.5742 0.5771 \n", + "3 91 adam 0.00001 16 5 256 256 plateau 0.05 0.5742 0.5771 \n", + "4 65 adam 0.00001 8 5 256 256 huggingface_cosine_with_restarts 0.05 0.5737 0.5766 \n", + "5 13 adam 0.00010 8 4 256 512 huggingface_cosine_with_restarts 0.05 0.5749 0.5765 \n", + "6 34 adam 0.00010 16 4 256 256 huggingface_cosine_with_restarts 0.10 0.5774 0.5760 \n", + "7 66 adam 0.00001 8 5 256 256 huggingface_cosine_with_restarts 0.10 0.5765 0.5760 \n", + "8 33 adam 0.00010 16 4 256 256 huggingface_cosine_with_restarts 0.05 0.5759 0.5757 \n", + "9 46 adam 0.00010 16 5 256 512 huggingface_cosine_with_restarts 0.10 0.5784 0.5752 \n", + "10 37 adam 0.00010 16 4 256 512 huggingface_cosine_with_restarts 0.05 0.5762 0.5744 \n", + "11 18 adam 0.00010 8 5 256 256 huggingface_cosine_with_restarts 0.10 0.5730 0.5724 \n", + "12 90 adam 0.00001 16 5 256 256 huggingface_cosine_with_restarts 0.10 0.5728 0.5720 \n", + "13 26 adam 0.00010 16 3 256 256 huggingface_cosine_with_restarts 0.10 0.5781 0.5713 \n", + "14 89 adam 0.00001 16 5 256 256 huggingface_cosine_with_restarts 0.05 0.5710 0.5711 \n", + "15 51 adam 0.00001 8 3 256 256 plateau 0.05 0.5761 0.5701 \n", + "16 52 adam 0.00001 8 3 256 256 plateau 0.10 0.5761 0.5701 \n", + "17 9 adam 0.00010 8 4 256 256 huggingface_cosine_with_restarts 0.05 0.5781 0.5700 \n", + "18 19 adam 0.00010 8 5 256 256 plateau 0.05 0.5743 0.5698 \n", + "19 20 adam 0.00010 8 5 256 256 plateau 0.10 0.5743 0.5698 \n", + "20 17 adam 0.00010 8 5 256 256 huggingface_cosine_with_restarts 0.05 0.5670 0.5697 \n", + "21 73 adam 0.00001 16 3 256 256 huggingface_cosine_with_restarts 0.05 0.5757 0.5697 \n", + "22 44 adam 0.00010 16 5 256 256 plateau 0.10 0.5779 0.5694 \n", + "23 43 adam 0.00010 16 5 256 256 plateau 0.05 0.5779 0.5694 \n", + "24 82 adam 0.00001 16 4 256 256 huggingface_cosine_with_restarts 0.10 0.5780 0.5693 \n", + "25 81 adam 0.00001 16 4 256 256 huggingface_cosine_with_restarts 0.05 0.5773 0.5692 \n", + "26 41 adam 0.00010 16 5 256 256 huggingface_cosine_with_restarts 0.05 0.5720 0.5689 \n", + "27 16 adam 0.00010 8 4 256 512 plateau 0.10 0.5783 0.5687 \n", + "28 15 adam 0.00010 8 4 256 512 plateau 0.05 0.5783 0.5687 \n", + "29 42 adam 0.00010 16 5 256 256 huggingface_cosine_with_restarts 0.10 0.5746 0.5686 \n", + "30 85 adam 0.00001 16 4 256 512 huggingface_cosine_with_restarts 0.05 0.5733 0.5685 \n", + "31 38 adam 0.00010 16 4 256 512 huggingface_cosine_with_restarts 0.10 0.5728 0.5682 \n", + "32 94 adam 0.00001 16 5 256 512 huggingface_cosine_with_restarts 0.10 0.5754 0.5676 \n", + "33 70 adam 0.00001 8 5 256 512 huggingface_cosine_with_restarts 0.10 0.5713 0.5675 \n", + "34 86 adam 0.00001 16 4 256 512 huggingface_cosine_with_restarts 0.10 0.5708 0.5675 \n", + "35 50 adam 0.00001 8 3 256 256 huggingface_cosine_with_restarts 0.10 0.5771 0.5674 \n", + "36 30 adam 0.00010 16 3 256 512 huggingface_cosine_with_restarts 0.10 0.5711 0.5674 \n", + "37 58 adam 0.00001 8 4 256 256 huggingface_cosine_with_restarts 0.10 0.5729 0.5673 \n", + "38 48 adam 0.00010 16 5 256 512 plateau 0.10 0.5715 0.5666 \n", + "39 47 adam 0.00010 16 5 256 512 plateau 0.05 0.5715 0.5666 \n", + "40 35 adam 0.00010 16 4 256 256 plateau 0.05 0.5767 0.5662 \n", + "41 36 adam 0.00010 16 4 256 256 plateau 0.10 0.5767 0.5662 \n", + "42 10 adam 0.00010 8 4 256 256 huggingface_cosine_with_restarts 0.10 0.5734 0.5661 \n", + "43 83 adam 0.00001 16 4 256 256 plateau 0.05 0.5742 0.5660 \n", + "44 84 adam 0.00001 16 4 256 256 plateau 0.10 0.5742 0.5660 \n", + "45 57 adam 0.00001 8 4 256 256 huggingface_cosine_with_restarts 0.05 0.5717 0.5658 \n", + "46 75 adam 0.00001 16 3 256 256 plateau 0.05 0.5717 0.5656 \n", + "47 71 adam 0.00001 8 5 256 512 plateau 0.05 0.5749 0.5656 \n", + "48 72 adam 0.00001 8 5 256 512 plateau 0.10 0.5749 0.5656 \n", + "49 49 adam 0.00001 8 3 256 256 huggingface_cosine_with_restarts 0.05 0.5786 0.5656 \n", + "\n", + " gap \n", + "0 -0.0029 \n", + "1 -0.0029 \n", + "2 -0.0028 \n", + "3 -0.0028 \n", + "4 -0.0029 \n", + "5 -0.0015 \n", + "6 0.0014 \n", + "7 0.0005 \n", + "8 0.0002 \n", + "9 0.0032 \n", + "10 0.0018 \n", + "11 0.0006 \n", + "12 0.0008 \n", + "13 0.0068 \n", + "14 -0.0001 \n", + "15 0.0060 \n", + "16 0.0060 \n", + "17 0.0080 \n", + "18 0.0045 \n", + "19 0.0045 \n", + "20 -0.0027 \n", + "21 0.0060 \n", + "22 0.0085 \n", + "23 0.0085 \n", + "24 0.0087 \n", + "25 0.0082 \n", + "26 0.0031 \n", + "27 0.0096 \n", + "28 0.0096 \n", + "29 0.0060 \n", + "30 0.0048 \n", + "31 0.0046 \n", + "32 0.0078 \n", + "33 0.0038 \n", + "34 0.0033 \n", + "35 0.0097 \n", + "36 0.0037 \n", + "37 0.0056 \n", + "38 0.0049 \n", + "39 0.0049 \n", + "40 0.0105 \n", + "41 0.0105 \n", + "42 0.0073 \n", + "43 0.0081 \n", + "44 0.0081 \n", + "45 0.0058 \n", + "46 0.0061 \n", + "47 0.0094 \n", + "48 0.0094 \n", + "49 0.0130 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/biformer.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5832fe53-8308-4195-bb94-db7a86148076", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stepoptimizerlrnum_transformer_headstr_layer_numberhidden_dimout_featuresscheduler_typewarmup_ratiodevtestgap
028adam0.00010163256256plateau0.100.57500.5758-0.0008
127adam0.00010163256256plateau0.050.57500.5758-0.0008
225adam0.00010163256256huggingface_cosine_with_restarts0.050.57210.57160.0005
356adam0.0000183256512plateau0.100.57170.57050.0012
455adam0.0000183256512plateau0.050.57170.57050.0012
517adam0.0001085256256huggingface_cosine_with_restarts0.050.56830.5690-0.0006
626adam0.00010163256256huggingface_cosine_with_restarts0.100.57580.56870.0071
737adam0.00010164256512huggingface_cosine_with_restarts0.050.56740.5676-0.0002
844adam0.00010165256256plateau0.100.56810.56710.0010
943adam0.00010165256256plateau0.050.56810.56710.0010
1031adam0.00010163256512plateau0.050.56240.5659-0.0035
1132adam0.00010163256512plateau0.100.56240.5659-0.0035
122adam0.0001083256256huggingface_cosine_with_restarts0.100.56610.56560.0005
1380adam0.00001163256512plateau0.100.56890.56500.0040
1479adam0.00001163256512plateau0.050.56890.56500.0040
1553adam0.0000183256512huggingface_cosine_with_restarts0.050.56650.56490.0016
1677adam0.00001163256512huggingface_cosine_with_restarts0.050.56900.56490.0041
171adam0.0001083256256huggingface_cosine_with_restarts0.050.56390.5648-0.0009
1878adam0.00001163256512huggingface_cosine_with_restarts0.100.56560.56450.0011
1962adam0.0000184256512huggingface_cosine_with_restarts0.100.56490.56440.0005
2046adam0.00010165256512huggingface_cosine_with_restarts0.100.55960.5638-0.0042
2114adam0.0001084256512huggingface_cosine_with_restarts0.100.57220.56310.0091
2295adam0.00001165256512plateau0.050.56540.56310.0023
2396adam0.00001165256512plateau0.100.56540.56310.0023
2450adam0.0000183256256huggingface_cosine_with_restarts0.100.57020.56290.0073
2573adam0.00001163256256huggingface_cosine_with_restarts0.050.56740.56280.0046
266adam0.0001083256512huggingface_cosine_with_restarts0.100.57080.56260.0082
2761adam0.0000184256512huggingface_cosine_with_restarts0.050.56490.56240.0025
2874adam0.00001163256256huggingface_cosine_with_restarts0.100.56760.56230.0053
2991adam0.00001165256256plateau0.050.55860.5622-0.0036
3092adam0.00001165256256plateau0.100.55860.5622-0.0036
318adam0.0001083256512plateau0.100.56690.56160.0053
3238adam0.00010164256512huggingface_cosine_with_restarts0.100.56400.56160.0024
337adam0.0001083256512plateau0.050.56690.56160.0053
3454adam0.0000183256512huggingface_cosine_with_restarts0.100.56460.56150.0031
3593adam0.00001165256512huggingface_cosine_with_restarts0.050.56730.56120.0061
364adam0.0001083256256plateau0.100.56650.56100.0054
373adam0.0001083256256plateau0.050.56650.56100.0054
3849adam0.0000183256256huggingface_cosine_with_restarts0.050.57060.56090.0097
3975adam0.00001163256256plateau0.050.56560.56060.0050
4076adam0.00001163256256plateau0.100.56560.56060.0050
4142adam0.00010165256256huggingface_cosine_with_restarts0.100.56660.56030.0064
4289adam0.00001165256256huggingface_cosine_with_restarts0.050.56160.56000.0017
4310adam0.0001084256256huggingface_cosine_with_restarts0.100.56510.55970.0053
4470adam0.0000185256512huggingface_cosine_with_restarts0.100.56910.55940.0097
4534adam0.00010164256256huggingface_cosine_with_restarts0.100.56500.55890.0061
4647adam0.00010165256512plateau0.050.56130.55870.0026
4748adam0.00010165256512plateau0.100.56130.55870.0026
4839adam0.00010164256512plateau0.050.56120.55860.0026
4940adam0.00010164256512plateau0.100.56120.55860.0026
\n", + "
" + ], + "text/plain": [ + " step optimizer lr num_transformer_heads tr_layer_number hidden_dim out_features scheduler_type warmup_ratio dev test \\\n", + "0 28 adam 0.00010 16 3 256 256 plateau 0.10 0.5750 0.5758 \n", + "1 27 adam 0.00010 16 3 256 256 plateau 0.05 0.5750 0.5758 \n", + "2 25 adam 0.00010 16 3 256 256 huggingface_cosine_with_restarts 0.05 0.5721 0.5716 \n", + "3 56 adam 0.00001 8 3 256 512 plateau 0.10 0.5717 0.5705 \n", + "4 55 adam 0.00001 8 3 256 512 plateau 0.05 0.5717 0.5705 \n", + "5 17 adam 0.00010 8 5 256 256 huggingface_cosine_with_restarts 0.05 0.5683 0.5690 \n", + "6 26 adam 0.00010 16 3 256 256 huggingface_cosine_with_restarts 0.10 0.5758 0.5687 \n", + "7 37 adam 0.00010 16 4 256 512 huggingface_cosine_with_restarts 0.05 0.5674 0.5676 \n", + "8 44 adam 0.00010 16 5 256 256 plateau 0.10 0.5681 0.5671 \n", + "9 43 adam 0.00010 16 5 256 256 plateau 0.05 0.5681 0.5671 \n", + "10 31 adam 0.00010 16 3 256 512 plateau 0.05 0.5624 0.5659 \n", + "11 32 adam 0.00010 16 3 256 512 plateau 0.10 0.5624 0.5659 \n", + "12 2 adam 0.00010 8 3 256 256 huggingface_cosine_with_restarts 0.10 0.5661 0.5656 \n", + "13 80 adam 0.00001 16 3 256 512 plateau 0.10 0.5689 0.5650 \n", + "14 79 adam 0.00001 16 3 256 512 plateau 0.05 0.5689 0.5650 \n", + "15 53 adam 0.00001 8 3 256 512 huggingface_cosine_with_restarts 0.05 0.5665 0.5649 \n", + "16 77 adam 0.00001 16 3 256 512 huggingface_cosine_with_restarts 0.05 0.5690 0.5649 \n", + "17 1 adam 0.00010 8 3 256 256 huggingface_cosine_with_restarts 0.05 0.5639 0.5648 \n", + "18 78 adam 0.00001 16 3 256 512 huggingface_cosine_with_restarts 0.10 0.5656 0.5645 \n", + "19 62 adam 0.00001 8 4 256 512 huggingface_cosine_with_restarts 0.10 0.5649 0.5644 \n", + "20 46 adam 0.00010 16 5 256 512 huggingface_cosine_with_restarts 0.10 0.5596 0.5638 \n", + "21 14 adam 0.00010 8 4 256 512 huggingface_cosine_with_restarts 0.10 0.5722 0.5631 \n", + "22 95 adam 0.00001 16 5 256 512 plateau 0.05 0.5654 0.5631 \n", + "23 96 adam 0.00001 16 5 256 512 plateau 0.10 0.5654 0.5631 \n", + "24 50 adam 0.00001 8 3 256 256 huggingface_cosine_with_restarts 0.10 0.5702 0.5629 \n", + "25 73 adam 0.00001 16 3 256 256 huggingface_cosine_with_restarts 0.05 0.5674 0.5628 \n", + "26 6 adam 0.00010 8 3 256 512 huggingface_cosine_with_restarts 0.10 0.5708 0.5626 \n", + "27 61 adam 0.00001 8 4 256 512 huggingface_cosine_with_restarts 0.05 0.5649 0.5624 \n", + "28 74 adam 0.00001 16 3 256 256 huggingface_cosine_with_restarts 0.10 0.5676 0.5623 \n", + "29 91 adam 0.00001 16 5 256 256 plateau 0.05 0.5586 0.5622 \n", + "30 92 adam 0.00001 16 5 256 256 plateau 0.10 0.5586 0.5622 \n", + "31 8 adam 0.00010 8 3 256 512 plateau 0.10 0.5669 0.5616 \n", + "32 38 adam 0.00010 16 4 256 512 huggingface_cosine_with_restarts 0.10 0.5640 0.5616 \n", + "33 7 adam 0.00010 8 3 256 512 plateau 0.05 0.5669 0.5616 \n", + "34 54 adam 0.00001 8 3 256 512 huggingface_cosine_with_restarts 0.10 0.5646 0.5615 \n", + "35 93 adam 0.00001 16 5 256 512 huggingface_cosine_with_restarts 0.05 0.5673 0.5612 \n", + "36 4 adam 0.00010 8 3 256 256 plateau 0.10 0.5665 0.5610 \n", + "37 3 adam 0.00010 8 3 256 256 plateau 0.05 0.5665 0.5610 \n", + "38 49 adam 0.00001 8 3 256 256 huggingface_cosine_with_restarts 0.05 0.5706 0.5609 \n", + "39 75 adam 0.00001 16 3 256 256 plateau 0.05 0.5656 0.5606 \n", + "40 76 adam 0.00001 16 3 256 256 plateau 0.10 0.5656 0.5606 \n", + "41 42 adam 0.00010 16 5 256 256 huggingface_cosine_with_restarts 0.10 0.5666 0.5603 \n", + "42 89 adam 0.00001 16 5 256 256 huggingface_cosine_with_restarts 0.05 0.5616 0.5600 \n", + "43 10 adam 0.00010 8 4 256 256 huggingface_cosine_with_restarts 0.10 0.5651 0.5597 \n", + "44 70 adam 0.00001 8 5 256 512 huggingface_cosine_with_restarts 0.10 0.5691 0.5594 \n", + "45 34 adam 0.00010 16 4 256 256 huggingface_cosine_with_restarts 0.10 0.5650 0.5589 \n", + "46 47 adam 0.00010 16 5 256 512 plateau 0.05 0.5613 0.5587 \n", + "47 48 adam 0.00010 16 5 256 512 plateau 0.10 0.5613 0.5587 \n", + "48 39 adam 0.00010 16 4 256 512 plateau 0.05 0.5612 0.5586 \n", + "49 40 adam 0.00010 16 4 256 512 plateau 0.10 0.5612 0.5586 \n", + "\n", + " gap \n", + "0 -0.0008 \n", + "1 -0.0008 \n", + "2 0.0005 \n", + "3 0.0012 \n", + "4 0.0012 \n", + "5 -0.0006 \n", + "6 0.0071 \n", + "7 -0.0002 \n", + "8 0.0010 \n", + "9 0.0010 \n", + "10 -0.0035 \n", + "11 -0.0035 \n", + "12 0.0005 \n", + "13 0.0040 \n", + "14 0.0040 \n", + "15 0.0016 \n", + "16 0.0041 \n", + "17 -0.0009 \n", + "18 0.0011 \n", + "19 0.0005 \n", + "20 -0.0042 \n", + "21 0.0091 \n", + "22 0.0023 \n", + "23 0.0023 \n", + "24 0.0073 \n", + "25 0.0046 \n", + "26 0.0082 \n", + "27 0.0025 \n", + "28 0.0053 \n", + "29 -0.0036 \n", + "30 -0.0036 \n", + "31 0.0053 \n", + "32 0.0024 \n", + "33 0.0053 \n", + "34 0.0031 \n", + "35 0.0061 \n", + "36 0.0054 \n", + "37 0.0054 \n", + "38 0.0097 \n", + "39 0.0050 \n", + "40 0.0050 \n", + "41 0.0064 \n", + "42 0.0017 \n", + "43 0.0053 \n", + "44 0.0097 \n", + "45 0.0061 \n", + "46 0.0026 \n", + "47 0.0026 \n", + "48 0.0026 \n", + "49 0.0026 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/BiForm_wtb.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0d46eb44-6c34-4cc9-8322-6eb476eb827c", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headssmoothing_probabilitymodel_namedevtestgap
0450.0000180.0BiFormer0.57680.5797-0.0029
1490.0000180.2BiFormer0.57530.5779-0.0026
2470.0000180.1BiFormer0.57450.5773-0.0028
3670.00001160.0BiFormer0.57420.5771-0.0028
4250.00010160.1BiFormer0.57790.57660.0013
5270.00010160.2BiFormer0.56950.5752-0.0057
6710.00001160.2BiFormer0.57040.5745-0.0041
7690.00001160.1BiFormer0.57130.5741-0.0029
8510.0000180.3BiFormer0.57510.57350.0016
9770.00001160.5BiFormer0.56460.5722-0.0076
10550.0000180.5BiFormer0.57080.5719-0.0011
11290.00010160.3BiFormer0.56960.5710-0.0014
12730.00001160.3BiFormer0.57080.57080.0000
13110.0001080.5BiFormer0.56750.5706-0.0031
14310.00010160.4BiFormer0.57200.57050.0016
15750.00001160.4BiFormer0.56740.5702-0.0028
1630.0001080.1BiFormer0.57410.56980.0042
1710.0001080.0BiFormer0.57430.56980.0045
18230.00010160.0BiFormer0.57790.56940.0085
19530.0000180.4BiFormer0.57430.56910.0052
20500.0000180.2BiFormerWithProb0.57220.56830.0039
21720.00001160.2BiFormerWithProb0.57330.56820.0051
22520.0000180.3BiFormerWithProb0.56730.5677-0.0004
23570.0000180.6BiFormer0.56740.5677-0.0003
24260.00010160.1BiFormerWithProb0.57290.56730.0056
25740.00001160.3BiFormerWithProb0.56960.56710.0025
26240.00010160.0BiFormerWithProb0.56810.56710.0010
27790.00001160.6BiFormer0.56230.5663-0.0040
2850.0001080.2BiFormer0.56870.56590.0028
29810.00001160.7BiFormer0.55960.5655-0.0059
30280.00010160.2BiFormerWithProb0.56490.5655-0.0006
31700.00001160.1BiFormerWithProb0.56840.56380.0046
32590.0000180.7BiFormer0.56820.56360.0046
33130.0001080.6BiFormer0.55930.5634-0.0042
3440.0001080.1BiFormerWithProb0.56450.56300.0014
3590.0001080.4BiFormer0.56840.56290.0055
36170.0001080.8BiFormer0.56230.56220.0001
37680.00001160.0BiFormerWithProb0.55860.5622-0.0036
38150.0001080.7BiFormer0.56250.56160.0009
3970.0001080.3BiFormer0.56890.56080.0081
40370.00010160.7BiFormer0.55900.5607-0.0017
41100.0001080.4BiFormerWithProb0.56360.56070.0029
42330.00010160.5BiFormer0.56270.56050.0022
43610.0000180.8BiFormer0.56100.56000.0010
44760.00001160.4BiFormerWithProb0.56520.55990.0053
45190.0001080.9BiFormer0.56150.55920.0024
46630.0000180.9BiFormer0.55730.5590-0.0017
47830.00001160.8BiFormer0.55660.5585-0.0019
4880.0001080.3BiFormerWithProb0.56260.55850.0041
4960.0001080.2BiFormerWithProb0.56120.55750.0037
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads smoothing_probability model_name dev test gap\n", + "0 45 0.00001 8 0.0 BiFormer 0.5768 0.5797 -0.0029\n", + "1 49 0.00001 8 0.2 BiFormer 0.5753 0.5779 -0.0026\n", + "2 47 0.00001 8 0.1 BiFormer 0.5745 0.5773 -0.0028\n", + "3 67 0.00001 16 0.0 BiFormer 0.5742 0.5771 -0.0028\n", + "4 25 0.00010 16 0.1 BiFormer 0.5779 0.5766 0.0013\n", + "5 27 0.00010 16 0.2 BiFormer 0.5695 0.5752 -0.0057\n", + "6 71 0.00001 16 0.2 BiFormer 0.5704 0.5745 -0.0041\n", + "7 69 0.00001 16 0.1 BiFormer 0.5713 0.5741 -0.0029\n", + "8 51 0.00001 8 0.3 BiFormer 0.5751 0.5735 0.0016\n", + "9 77 0.00001 16 0.5 BiFormer 0.5646 0.5722 -0.0076\n", + "10 55 0.00001 8 0.5 BiFormer 0.5708 0.5719 -0.0011\n", + "11 29 0.00010 16 0.3 BiFormer 0.5696 0.5710 -0.0014\n", + "12 73 0.00001 16 0.3 BiFormer 0.5708 0.5708 0.0000\n", + "13 11 0.00010 8 0.5 BiFormer 0.5675 0.5706 -0.0031\n", + "14 31 0.00010 16 0.4 BiFormer 0.5720 0.5705 0.0016\n", + "15 75 0.00001 16 0.4 BiFormer 0.5674 0.5702 -0.0028\n", + "16 3 0.00010 8 0.1 BiFormer 0.5741 0.5698 0.0042\n", + "17 1 0.00010 8 0.0 BiFormer 0.5743 0.5698 0.0045\n", + "18 23 0.00010 16 0.0 BiFormer 0.5779 0.5694 0.0085\n", + "19 53 0.00001 8 0.4 BiFormer 0.5743 0.5691 0.0052\n", + "20 50 0.00001 8 0.2 BiFormerWithProb 0.5722 0.5683 0.0039\n", + "21 72 0.00001 16 0.2 BiFormerWithProb 0.5733 0.5682 0.0051\n", + "22 52 0.00001 8 0.3 BiFormerWithProb 0.5673 0.5677 -0.0004\n", + "23 57 0.00001 8 0.6 BiFormer 0.5674 0.5677 -0.0003\n", + "24 26 0.00010 16 0.1 BiFormerWithProb 0.5729 0.5673 0.0056\n", + "25 74 0.00001 16 0.3 BiFormerWithProb 0.5696 0.5671 0.0025\n", + "26 24 0.00010 16 0.0 BiFormerWithProb 0.5681 0.5671 0.0010\n", + "27 79 0.00001 16 0.6 BiFormer 0.5623 0.5663 -0.0040\n", + "28 5 0.00010 8 0.2 BiFormer 0.5687 0.5659 0.0028\n", + "29 81 0.00001 16 0.7 BiFormer 0.5596 0.5655 -0.0059\n", + "30 28 0.00010 16 0.2 BiFormerWithProb 0.5649 0.5655 -0.0006\n", + "31 70 0.00001 16 0.1 BiFormerWithProb 0.5684 0.5638 0.0046\n", + "32 59 0.00001 8 0.7 BiFormer 0.5682 0.5636 0.0046\n", + "33 13 0.00010 8 0.6 BiFormer 0.5593 0.5634 -0.0042\n", + "34 4 0.00010 8 0.1 BiFormerWithProb 0.5645 0.5630 0.0014\n", + "35 9 0.00010 8 0.4 BiFormer 0.5684 0.5629 0.0055\n", + "36 17 0.00010 8 0.8 BiFormer 0.5623 0.5622 0.0001\n", + "37 68 0.00001 16 0.0 BiFormerWithProb 0.5586 0.5622 -0.0036\n", + "38 15 0.00010 8 0.7 BiFormer 0.5625 0.5616 0.0009\n", + "39 7 0.00010 8 0.3 BiFormer 0.5689 0.5608 0.0081\n", + "40 37 0.00010 16 0.7 BiFormer 0.5590 0.5607 -0.0017\n", + "41 10 0.00010 8 0.4 BiFormerWithProb 0.5636 0.5607 0.0029\n", + "42 33 0.00010 16 0.5 BiFormer 0.5627 0.5605 0.0022\n", + "43 61 0.00001 8 0.8 BiFormer 0.5610 0.5600 0.0010\n", + "44 76 0.00001 16 0.4 BiFormerWithProb 0.5652 0.5599 0.0053\n", + "45 19 0.00010 8 0.9 BiFormer 0.5615 0.5592 0.0024\n", + "46 63 0.00001 8 0.9 BiFormer 0.5573 0.5590 -0.0017\n", + "47 83 0.00001 16 0.8 BiFormer 0.5566 0.5585 -0.0019\n", + "48 8 0.00010 8 0.3 BiFormerWithProb 0.5626 0.5585 0.0041\n", + "49 6 0.00010 8 0.2 BiFormerWithProb 0.5612 0.5575 0.0037" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/smoothing.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7b4e6f8c-4428-4c5c-a8ac-b1780372ee98", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrhidden_dimout_featuresmamba_d_statemamba_ker_sizemamba_layer_numberscheduler_typewarmup_ratiomodel_namedevtestgap
0760.00010256512844huggingface_cosine_with_restarts0.10BiMambaWithProb0.57420.5765-0.0024
13860.00001512256843huggingface_cosine_with_restarts0.05BiMambaWithProb0.57740.57090.0065
22600.00001256256843huggingface_cosine_with_restarts0.10BiMambaWithProb0.56530.5666-0.0012
32370.000105125121644plateau0.05BiMamba0.56120.5660-0.0048
42390.000105125121644plateau0.10BiMamba0.56120.5660-0.0048
51310.00010512256843huggingface_cosine_with_restarts0.10BiMamba0.56220.5649-0.0026
61200.000102565121653plateau0.10BiMambaWithProb0.57030.56430.0060
71180.000102565121653plateau0.05BiMambaWithProb0.57030.56430.0060
840.00010256256843huggingface_cosine_with_restarts0.10BiMambaWithProb0.56760.56430.0033
91100.000102565121644plateau0.05BiMambaWithProb0.56090.5636-0.0027
101120.000102565121644plateau0.10BiMambaWithProb0.56090.5636-0.0027
111940.00010512512843huggingface_cosine_with_restarts0.05BiMambaWithProb0.56410.56320.0008
123880.00001512256843huggingface_cosine_with_restarts0.10BiMambaWithProb0.57010.56300.0071
131500.00010512256853plateau0.05BiMambaWithProb0.56240.5629-0.0006
141520.00010512256853plateau0.10BiMambaWithProb0.56240.5629-0.0006
153380.00001256512853huggingface_cosine_with_restarts0.05BiMambaWithProb0.56200.5626-0.0006
163400.00001256512853huggingface_cosine_with_restarts0.10BiMambaWithProb0.56450.56260.0019
174020.00001512256853huggingface_cosine_with_restarts0.05BiMambaWithProb0.56100.5622-0.0011
181460.00010512256853huggingface_cosine_with_restarts0.05BiMambaWithProb0.56470.56130.0034
192580.00001256256843huggingface_cosine_with_restarts0.05BiMambaWithProb0.56340.56120.0022
202160.00010512512853plateau0.10BiMambaWithProb0.56430.56080.0035
212140.00010512512853plateau0.05BiMambaWithProb0.56430.56080.0035
224520.00001512512843huggingface_cosine_with_restarts0.10BiMambaWithProb0.56300.56080.0022
234040.00001512256853huggingface_cosine_with_restarts0.10BiMambaWithProb0.56140.56070.0007
241060.000102565121644huggingface_cosine_with_restarts0.05BiMambaWithProb0.56460.56030.0043
254500.00001512512843huggingface_cosine_with_restarts0.05BiMambaWithProb0.56680.56020.0066
261990.00010512512843plateau0.10BiMamba0.55910.5596-0.0004
272260.000105125121643huggingface_cosine_with_restarts0.05BiMambaWithProb0.56540.55960.0058
281970.00010512512843plateau0.05BiMamba0.55910.5596-0.0004
29250.00010256256854huggingface_cosine_with_restarts0.05BiMamba0.55690.5591-0.0022
301960.00010512512843huggingface_cosine_with_restarts0.10BiMambaWithProb0.56110.55900.0022
312120.00010512512853huggingface_cosine_with_restarts0.10BiMambaWithProb0.57000.55900.0109
323850.00001512256843huggingface_cosine_with_restarts0.05BiMamba0.55960.55860.0010
33460.000102562561644plateau0.05BiMambaWithProb0.56380.55810.0057
34480.000102562561644plateau0.10BiMambaWithProb0.56380.55810.0057
352420.000105125121653huggingface_cosine_with_restarts0.05BiMambaWithProb0.56110.55770.0034
361660.000105122561643plateau0.05BiMambaWithProb0.55960.55770.0020
371680.000105122561643plateau0.10BiMambaWithProb0.55960.55770.0020
381620.000105122561643huggingface_cosine_with_restarts0.05BiMambaWithProb0.56210.55740.0047
392660.00001256256844huggingface_cosine_with_restarts0.05BiMambaWithProb0.55430.5573-0.0030
401240.000102565121654huggingface_cosine_with_restarts0.10BiMambaWithProb0.55510.5573-0.0022
41530.000102562561653plateau0.05BiMamba0.55770.55720.0005
42550.000102562561653plateau0.10BiMamba0.55770.55720.0005
431160.000102565121653huggingface_cosine_with_restarts0.10BiMambaWithProb0.55820.55670.0015
443320.00001256512844huggingface_cosine_with_restarts0.10BiMambaWithProb0.55540.5567-0.0013
4520.00010256256843huggingface_cosine_with_restarts0.05BiMambaWithProb0.55280.5566-0.0037
461300.00010512256843huggingface_cosine_with_restarts0.05BiMambaWithProb0.56360.55650.0071
473930.00001512256844huggingface_cosine_with_restarts0.05BiMamba0.55920.55610.0032
481320.00010512256843huggingface_cosine_with_restarts0.10BiMambaWithProb0.56300.55590.0071
493150.000012562561654huggingface_cosine_with_restarts0.10BiMamba0.55630.55580.0005
\n", + "
" + ], + "text/plain": [ + " step lr hidden_dim out_features mamba_d_state mamba_ker_size mamba_layer_number scheduler_type warmup_ratio \\\n", + "0 76 0.00010 256 512 8 4 4 huggingface_cosine_with_restarts 0.10 \n", + "1 386 0.00001 512 256 8 4 3 huggingface_cosine_with_restarts 0.05 \n", + "2 260 0.00001 256 256 8 4 3 huggingface_cosine_with_restarts 0.10 \n", + "3 237 0.00010 512 512 16 4 4 plateau 0.05 \n", + "4 239 0.00010 512 512 16 4 4 plateau 0.10 \n", + "5 131 0.00010 512 256 8 4 3 huggingface_cosine_with_restarts 0.10 \n", + "6 120 0.00010 256 512 16 5 3 plateau 0.10 \n", + "7 118 0.00010 256 512 16 5 3 plateau 0.05 \n", + "8 4 0.00010 256 256 8 4 3 huggingface_cosine_with_restarts 0.10 \n", + "9 110 0.00010 256 512 16 4 4 plateau 0.05 \n", + "10 112 0.00010 256 512 16 4 4 plateau 0.10 \n", + "11 194 0.00010 512 512 8 4 3 huggingface_cosine_with_restarts 0.05 \n", + "12 388 0.00001 512 256 8 4 3 huggingface_cosine_with_restarts 0.10 \n", + "13 150 0.00010 512 256 8 5 3 plateau 0.05 \n", + "14 152 0.00010 512 256 8 5 3 plateau 0.10 \n", + "15 338 0.00001 256 512 8 5 3 huggingface_cosine_with_restarts 0.05 \n", + "16 340 0.00001 256 512 8 5 3 huggingface_cosine_with_restarts 0.10 \n", + "17 402 0.00001 512 256 8 5 3 huggingface_cosine_with_restarts 0.05 \n", + "18 146 0.00010 512 256 8 5 3 huggingface_cosine_with_restarts 0.05 \n", + "19 258 0.00001 256 256 8 4 3 huggingface_cosine_with_restarts 0.05 \n", + "20 216 0.00010 512 512 8 5 3 plateau 0.10 \n", + "21 214 0.00010 512 512 8 5 3 plateau 0.05 \n", + "22 452 0.00001 512 512 8 4 3 huggingface_cosine_with_restarts 0.10 \n", + "23 404 0.00001 512 256 8 5 3 huggingface_cosine_with_restarts 0.10 \n", + "24 106 0.00010 256 512 16 4 4 huggingface_cosine_with_restarts 0.05 \n", + "25 450 0.00001 512 512 8 4 3 huggingface_cosine_with_restarts 0.05 \n", + "26 199 0.00010 512 512 8 4 3 plateau 0.10 \n", + "27 226 0.00010 512 512 16 4 3 huggingface_cosine_with_restarts 0.05 \n", + "28 197 0.00010 512 512 8 4 3 plateau 0.05 \n", + "29 25 0.00010 256 256 8 5 4 huggingface_cosine_with_restarts 0.05 \n", + "30 196 0.00010 512 512 8 4 3 huggingface_cosine_with_restarts 0.10 \n", + "31 212 0.00010 512 512 8 5 3 huggingface_cosine_with_restarts 0.10 \n", + "32 385 0.00001 512 256 8 4 3 huggingface_cosine_with_restarts 0.05 \n", + "33 46 0.00010 256 256 16 4 4 plateau 0.05 \n", + "34 48 0.00010 256 256 16 4 4 plateau 0.10 \n", + "35 242 0.00010 512 512 16 5 3 huggingface_cosine_with_restarts 0.05 \n", + "36 166 0.00010 512 256 16 4 3 plateau 0.05 \n", + "37 168 0.00010 512 256 16 4 3 plateau 0.10 \n", + "38 162 0.00010 512 256 16 4 3 huggingface_cosine_with_restarts 0.05 \n", + "39 266 0.00001 256 256 8 4 4 huggingface_cosine_with_restarts 0.05 \n", + "40 124 0.00010 256 512 16 5 4 huggingface_cosine_with_restarts 0.10 \n", + "41 53 0.00010 256 256 16 5 3 plateau 0.05 \n", + "42 55 0.00010 256 256 16 5 3 plateau 0.10 \n", + "43 116 0.00010 256 512 16 5 3 huggingface_cosine_with_restarts 0.10 \n", + "44 332 0.00001 256 512 8 4 4 huggingface_cosine_with_restarts 0.10 \n", + "45 2 0.00010 256 256 8 4 3 huggingface_cosine_with_restarts 0.05 \n", + "46 130 0.00010 512 256 8 4 3 huggingface_cosine_with_restarts 0.05 \n", + "47 393 0.00001 512 256 8 4 4 huggingface_cosine_with_restarts 0.05 \n", + "48 132 0.00010 512 256 8 4 3 huggingface_cosine_with_restarts 0.10 \n", + "49 315 0.00001 256 256 16 5 4 huggingface_cosine_with_restarts 0.10 \n", + "\n", + " model_name dev test gap \n", + "0 BiMambaWithProb 0.5742 0.5765 -0.0024 \n", + "1 BiMambaWithProb 0.5774 0.5709 0.0065 \n", + "2 BiMambaWithProb 0.5653 0.5666 -0.0012 \n", + "3 BiMamba 0.5612 0.5660 -0.0048 \n", + "4 BiMamba 0.5612 0.5660 -0.0048 \n", + "5 BiMamba 0.5622 0.5649 -0.0026 \n", + "6 BiMambaWithProb 0.5703 0.5643 0.0060 \n", + "7 BiMambaWithProb 0.5703 0.5643 0.0060 \n", + "8 BiMambaWithProb 0.5676 0.5643 0.0033 \n", + "9 BiMambaWithProb 0.5609 0.5636 -0.0027 \n", + "10 BiMambaWithProb 0.5609 0.5636 -0.0027 \n", + "11 BiMambaWithProb 0.5641 0.5632 0.0008 \n", + "12 BiMambaWithProb 0.5701 0.5630 0.0071 \n", + "13 BiMambaWithProb 0.5624 0.5629 -0.0006 \n", + "14 BiMambaWithProb 0.5624 0.5629 -0.0006 \n", + "15 BiMambaWithProb 0.5620 0.5626 -0.0006 \n", + "16 BiMambaWithProb 0.5645 0.5626 0.0019 \n", + "17 BiMambaWithProb 0.5610 0.5622 -0.0011 \n", + "18 BiMambaWithProb 0.5647 0.5613 0.0034 \n", + "19 BiMambaWithProb 0.5634 0.5612 0.0022 \n", + "20 BiMambaWithProb 0.5643 0.5608 0.0035 \n", + "21 BiMambaWithProb 0.5643 0.5608 0.0035 \n", + "22 BiMambaWithProb 0.5630 0.5608 0.0022 \n", + "23 BiMambaWithProb 0.5614 0.5607 0.0007 \n", + "24 BiMambaWithProb 0.5646 0.5603 0.0043 \n", + "25 BiMambaWithProb 0.5668 0.5602 0.0066 \n", + "26 BiMamba 0.5591 0.5596 -0.0004 \n", + "27 BiMambaWithProb 0.5654 0.5596 0.0058 \n", + "28 BiMamba 0.5591 0.5596 -0.0004 \n", + "29 BiMamba 0.5569 0.5591 -0.0022 \n", + "30 BiMambaWithProb 0.5611 0.5590 0.0022 \n", + "31 BiMambaWithProb 0.5700 0.5590 0.0109 \n", + "32 BiMamba 0.5596 0.5586 0.0010 \n", + "33 BiMambaWithProb 0.5638 0.5581 0.0057 \n", + "34 BiMambaWithProb 0.5638 0.5581 0.0057 \n", + "35 BiMambaWithProb 0.5611 0.5577 0.0034 \n", + "36 BiMambaWithProb 0.5596 0.5577 0.0020 \n", + "37 BiMambaWithProb 0.5596 0.5577 0.0020 \n", + "38 BiMambaWithProb 0.5621 0.5574 0.0047 \n", + "39 BiMambaWithProb 0.5543 0.5573 -0.0030 \n", + "40 BiMambaWithProb 0.5551 0.5573 -0.0022 \n", + "41 BiMamba 0.5577 0.5572 0.0005 \n", + "42 BiMamba 0.5577 0.5572 0.0005 \n", + "43 BiMambaWithProb 0.5582 0.5567 0.0015 \n", + "44 BiMambaWithProb 0.5554 0.5567 -0.0013 \n", + "45 BiMambaWithProb 0.5528 0.5566 -0.0037 \n", + "46 BiMambaWithProb 0.5636 0.5565 0.0071 \n", + "47 BiMamba 0.5592 0.5561 0.0032 \n", + "48 BiMambaWithProb 0.5630 0.5559 0.0071 \n", + "49 BiMamba 0.5563 0.5558 0.0005 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/mambas.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "45434e0f-2af9-491a-bfc9-54cc3a91fca9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numberhidden_dim_gateddevtestgap
0130.000101641280.54180.5439-0.0022
1110.000101632560.54190.5430-0.0010
2160.000101651280.54320.54260.0005
3120.000101635120.55210.54070.0114
470.00010851280.54450.53950.0050
510.00010831280.53700.5394-0.0025
640.00010841280.54260.53680.0058
750.00010842560.54370.53680.0070
820.00010832560.53460.5366-0.0020
9100.000101631280.53220.5347-0.0025
1090.00010855120.54380.53430.0094
1180.00010852560.54190.53430.0076
12150.000101645120.53460.53190.0027
1360.00010845120.53380.53010.0037
14180.000101655120.52870.52850.0002
1530.00010835120.53540.52780.0076
16140.000101642560.53350.52720.0063
17170.000101652560.52160.51370.0080
18250.00001851280.51410.50380.0104
19290.000011632560.49450.4995-0.0049
20280.000011631280.50150.49640.0051
21220.00001841280.49930.49520.0042
22210.00001835120.49020.48310.0071
23340.000011651280.48210.47950.0026
24190.00001831280.48940.47930.0101
25200.00001832560.47560.4779-0.0023
26240.00001845120.47670.4777-0.0010
27300.000011635120.48620.47680.0094
28260.00001852560.48150.47450.0069
29230.00001842560.47410.47390.0003
30310.000011641280.47150.47050.0011
31360.000011655120.46690.46270.0042
32320.000011642560.46100.46060.0003
33330.000011645120.45300.45290.0001
34270.00001855120.45690.44770.0092
35350.000011652560.44480.44220.0025
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number hidden_dim_gated dev test gap\n", + "0 13 0.00010 16 4 128 0.5418 0.5439 -0.0022\n", + "1 11 0.00010 16 3 256 0.5419 0.5430 -0.0010\n", + "2 16 0.00010 16 5 128 0.5432 0.5426 0.0005\n", + "3 12 0.00010 16 3 512 0.5521 0.5407 0.0114\n", + "4 7 0.00010 8 5 128 0.5445 0.5395 0.0050\n", + "5 1 0.00010 8 3 128 0.5370 0.5394 -0.0025\n", + "6 4 0.00010 8 4 128 0.5426 0.5368 0.0058\n", + "7 5 0.00010 8 4 256 0.5437 0.5368 0.0070\n", + "8 2 0.00010 8 3 256 0.5346 0.5366 -0.0020\n", + "9 10 0.00010 16 3 128 0.5322 0.5347 -0.0025\n", + "10 9 0.00010 8 5 512 0.5438 0.5343 0.0094\n", + "11 8 0.00010 8 5 256 0.5419 0.5343 0.0076\n", + "12 15 0.00010 16 4 512 0.5346 0.5319 0.0027\n", + "13 6 0.00010 8 4 512 0.5338 0.5301 0.0037\n", + "14 18 0.00010 16 5 512 0.5287 0.5285 0.0002\n", + "15 3 0.00010 8 3 512 0.5354 0.5278 0.0076\n", + "16 14 0.00010 16 4 256 0.5335 0.5272 0.0063\n", + "17 17 0.00010 16 5 256 0.5216 0.5137 0.0080\n", + "18 25 0.00001 8 5 128 0.5141 0.5038 0.0104\n", + "19 29 0.00001 16 3 256 0.4945 0.4995 -0.0049\n", + "20 28 0.00001 16 3 128 0.5015 0.4964 0.0051\n", + "21 22 0.00001 8 4 128 0.4993 0.4952 0.0042\n", + "22 21 0.00001 8 3 512 0.4902 0.4831 0.0071\n", + "23 34 0.00001 16 5 128 0.4821 0.4795 0.0026\n", + "24 19 0.00001 8 3 128 0.4894 0.4793 0.0101\n", + "25 20 0.00001 8 3 256 0.4756 0.4779 -0.0023\n", + "26 24 0.00001 8 4 512 0.4767 0.4777 -0.0010\n", + "27 30 0.00001 16 3 512 0.4862 0.4768 0.0094\n", + "28 26 0.00001 8 5 256 0.4815 0.4745 0.0069\n", + "29 23 0.00001 8 4 256 0.4741 0.4739 0.0003\n", + "30 31 0.00001 16 4 128 0.4715 0.4705 0.0011\n", + "31 36 0.00001 16 5 512 0.4669 0.4627 0.0042\n", + "32 32 0.00001 16 4 256 0.4610 0.4606 0.0003\n", + "33 33 0.00001 16 4 512 0.4530 0.4529 0.0001\n", + "34 27 0.00001 8 5 512 0.4569 0.4477 0.0092\n", + "35 35 0.00001 16 5 256 0.4448 0.4422 0.0025" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/bigated.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "041a82f6-a5db-4033-8e89-f4ecc24479f1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numbernum_graph_headsdevtestgap
0350.000018580.57980.57740.0023
1110.000108580.57440.57010.0042
2450.0000116520.56930.5699-0.0005
3250.000018320.57260.56950.0031
4260.000018340.57370.56880.0049
5470.0000116580.57720.56880.0084
6230.0001016580.56870.56750.0012
790.000108520.56880.56690.0020
8170.0001016420.56850.56650.0020
9310.000018480.56770.56530.0024
10430.0000116480.56540.56460.0008
11140.0001016340.56210.5644-0.0022
12420.0000116440.57300.56400.0090
13280.0000183160.56570.56380.0019
1450.000108420.57220.56350.0086
15300.000018440.57290.56340.0095
16330.000018520.56250.56220.0004
17370.0000116320.56920.56220.0070
18460.0000116540.57300.56150.0114
19380.0000116340.56600.56110.0049
2080.0001084160.56470.56090.0038
2130.000108380.56850.56060.0079
22400.00001163160.56640.56050.0059
23440.00001164160.56690.56030.0066
24150.0001016380.56350.55960.0040
25100.000108540.57060.55960.0110
26290.000018420.56610.55910.0069
2760.000108440.56250.55890.0036
28200.00010164160.56420.55860.0056
29130.0001016320.56310.55840.0047
30210.0001016520.56100.55810.0028
3110.000108320.55850.55800.0005
32320.0000184160.56240.55790.0045
3370.000108480.56310.55770.0054
34410.0000116420.56610.55750.0086
35160.00010163160.56810.55750.0106
36270.000018380.55530.5563-0.0010
37220.0001016540.56290.55470.0082
38240.00010165160.55710.55450.0027
39190.0001016480.55900.55370.0053
40180.0001016440.56450.55370.0108
41390.0000116380.55420.55350.0006
42120.0001085160.55970.55340.0063
43480.00001165160.55420.55330.0009
44360.0000185160.55930.55300.0063
4520.000108340.56400.55260.0114
4640.0001083160.55560.55080.0048
47340.000018540.56530.55080.0145
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number num_graph_heads dev test gap\n", + "0 35 0.00001 8 5 8 0.5798 0.5774 0.0023\n", + "1 11 0.00010 8 5 8 0.5744 0.5701 0.0042\n", + "2 45 0.00001 16 5 2 0.5693 0.5699 -0.0005\n", + "3 25 0.00001 8 3 2 0.5726 0.5695 0.0031\n", + "4 26 0.00001 8 3 4 0.5737 0.5688 0.0049\n", + "5 47 0.00001 16 5 8 0.5772 0.5688 0.0084\n", + "6 23 0.00010 16 5 8 0.5687 0.5675 0.0012\n", + "7 9 0.00010 8 5 2 0.5688 0.5669 0.0020\n", + "8 17 0.00010 16 4 2 0.5685 0.5665 0.0020\n", + "9 31 0.00001 8 4 8 0.5677 0.5653 0.0024\n", + "10 43 0.00001 16 4 8 0.5654 0.5646 0.0008\n", + "11 14 0.00010 16 3 4 0.5621 0.5644 -0.0022\n", + "12 42 0.00001 16 4 4 0.5730 0.5640 0.0090\n", + "13 28 0.00001 8 3 16 0.5657 0.5638 0.0019\n", + "14 5 0.00010 8 4 2 0.5722 0.5635 0.0086\n", + "15 30 0.00001 8 4 4 0.5729 0.5634 0.0095\n", + "16 33 0.00001 8 5 2 0.5625 0.5622 0.0004\n", + "17 37 0.00001 16 3 2 0.5692 0.5622 0.0070\n", + "18 46 0.00001 16 5 4 0.5730 0.5615 0.0114\n", + "19 38 0.00001 16 3 4 0.5660 0.5611 0.0049\n", + "20 8 0.00010 8 4 16 0.5647 0.5609 0.0038\n", + "21 3 0.00010 8 3 8 0.5685 0.5606 0.0079\n", + "22 40 0.00001 16 3 16 0.5664 0.5605 0.0059\n", + "23 44 0.00001 16 4 16 0.5669 0.5603 0.0066\n", + "24 15 0.00010 16 3 8 0.5635 0.5596 0.0040\n", + "25 10 0.00010 8 5 4 0.5706 0.5596 0.0110\n", + "26 29 0.00001 8 4 2 0.5661 0.5591 0.0069\n", + "27 6 0.00010 8 4 4 0.5625 0.5589 0.0036\n", + "28 20 0.00010 16 4 16 0.5642 0.5586 0.0056\n", + "29 13 0.00010 16 3 2 0.5631 0.5584 0.0047\n", + "30 21 0.00010 16 5 2 0.5610 0.5581 0.0028\n", + "31 1 0.00010 8 3 2 0.5585 0.5580 0.0005\n", + "32 32 0.00001 8 4 16 0.5624 0.5579 0.0045\n", + "33 7 0.00010 8 4 8 0.5631 0.5577 0.0054\n", + "34 41 0.00001 16 4 2 0.5661 0.5575 0.0086\n", + "35 16 0.00010 16 3 16 0.5681 0.5575 0.0106\n", + "36 27 0.00001 8 3 8 0.5553 0.5563 -0.0010\n", + "37 22 0.00010 16 5 4 0.5629 0.5547 0.0082\n", + "38 24 0.00010 16 5 16 0.5571 0.5545 0.0027\n", + "39 19 0.00010 16 4 8 0.5590 0.5537 0.0053\n", + "40 18 0.00010 16 4 4 0.5645 0.5537 0.0108\n", + "41 39 0.00001 16 3 8 0.5542 0.5535 0.0006\n", + "42 12 0.00010 8 5 16 0.5597 0.5534 0.0063\n", + "43 48 0.00001 16 5 16 0.5542 0.5533 0.0009\n", + "44 36 0.00001 8 5 16 0.5593 0.5530 0.0063\n", + "45 2 0.00010 8 3 4 0.5640 0.5526 0.0114\n", + "46 4 0.00010 8 3 16 0.5556 0.5508 0.0048\n", + "47 34 0.00001 8 5 4 0.5653 0.5508 0.0145" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/bigraph.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "42ebb696-9e6d-4ba1-acd4-b8802ecf0fac", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numbernum_graph_headshidden_dim_gateddevtestgap
0500.000018582560.58050.57560.0048
1240.0001016485120.57230.57000.0024
2670.0000116581280.56670.5687-0.0019
3120.000108545120.56450.5685-0.0040
4320.0001016582560.56580.5678-0.0020
5640.0000116541280.56620.5676-0.0014
6680.0000116582560.57790.56690.0110
7550.0000116441280.57060.56690.0037
8490.000018581280.56420.5659-0.0018
9310.0001016581280.57500.56540.0095
10230.0001016482560.57160.56510.0065
11220.0001016481280.57430.56500.0093
12530.0000185162560.57320.56330.0099
13260.00010164162560.56360.56300.0006
1460.000108485120.56240.5626-0.0002
15100.000108541280.56550.56250.0029
16700.00001165161280.56180.56160.0002
17360.00010165165120.57530.56100.0143
18270.00010164165120.55870.5609-0.0022
19460.000018541280.56600.56080.0052
20190.0001016441280.56160.56040.0012
21440.0000184162560.56860.56000.0086
22350.00010165162560.56430.55980.0045
23130.000108581280.56210.55960.0025
24710.00001165162560.56670.55890.0078
25690.0000116585120.56380.55860.0052
2670.0001084161280.56260.55790.0047
27300.0001016545120.56190.55780.0041
28510.000018585120.56270.55770.0051
29520.0000185161280.55810.55740.0007
30370.000018441280.56080.55720.0036
3140.000108481280.56590.55710.0088
32140.000108582560.56340.55710.0064
33380.000018442560.56110.55690.0043
34570.0000116445120.56260.55690.0058
35180.0001085165120.55550.5568-0.0013
36620.00001164162560.56020.55650.0038
37160.0001085161280.56320.55640.0069
38590.0000116482560.56950.55630.0133
39470.000018542560.56430.55570.0086
4020.000108442560.55890.55550.0034
41280.0001016541280.56390.55550.0085
42430.0000184161280.56590.55540.0105
43580.0000116481280.56300.55540.0077
4410.000108441280.56330.55530.0080
45650.0000116542560.55840.55520.0032
46720.00001165165120.55920.55510.0041
47330.0001016585120.56170.55490.0067
48420.000018485120.56150.55460.0069
49450.0000184165120.56380.55460.0092
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number num_graph_heads hidden_dim_gated dev test gap\n", + "0 50 0.00001 8 5 8 256 0.5805 0.5756 0.0048\n", + "1 24 0.00010 16 4 8 512 0.5723 0.5700 0.0024\n", + "2 67 0.00001 16 5 8 128 0.5667 0.5687 -0.0019\n", + "3 12 0.00010 8 5 4 512 0.5645 0.5685 -0.0040\n", + "4 32 0.00010 16 5 8 256 0.5658 0.5678 -0.0020\n", + "5 64 0.00001 16 5 4 128 0.5662 0.5676 -0.0014\n", + "6 68 0.00001 16 5 8 256 0.5779 0.5669 0.0110\n", + "7 55 0.00001 16 4 4 128 0.5706 0.5669 0.0037\n", + "8 49 0.00001 8 5 8 128 0.5642 0.5659 -0.0018\n", + "9 31 0.00010 16 5 8 128 0.5750 0.5654 0.0095\n", + "10 23 0.00010 16 4 8 256 0.5716 0.5651 0.0065\n", + "11 22 0.00010 16 4 8 128 0.5743 0.5650 0.0093\n", + "12 53 0.00001 8 5 16 256 0.5732 0.5633 0.0099\n", + "13 26 0.00010 16 4 16 256 0.5636 0.5630 0.0006\n", + "14 6 0.00010 8 4 8 512 0.5624 0.5626 -0.0002\n", + "15 10 0.00010 8 5 4 128 0.5655 0.5625 0.0029\n", + "16 70 0.00001 16 5 16 128 0.5618 0.5616 0.0002\n", + "17 36 0.00010 16 5 16 512 0.5753 0.5610 0.0143\n", + "18 27 0.00010 16 4 16 512 0.5587 0.5609 -0.0022\n", + "19 46 0.00001 8 5 4 128 0.5660 0.5608 0.0052\n", + "20 19 0.00010 16 4 4 128 0.5616 0.5604 0.0012\n", + "21 44 0.00001 8 4 16 256 0.5686 0.5600 0.0086\n", + "22 35 0.00010 16 5 16 256 0.5643 0.5598 0.0045\n", + "23 13 0.00010 8 5 8 128 0.5621 0.5596 0.0025\n", + "24 71 0.00001 16 5 16 256 0.5667 0.5589 0.0078\n", + "25 69 0.00001 16 5 8 512 0.5638 0.5586 0.0052\n", + "26 7 0.00010 8 4 16 128 0.5626 0.5579 0.0047\n", + "27 30 0.00010 16 5 4 512 0.5619 0.5578 0.0041\n", + "28 51 0.00001 8 5 8 512 0.5627 0.5577 0.0051\n", + "29 52 0.00001 8 5 16 128 0.5581 0.5574 0.0007\n", + "30 37 0.00001 8 4 4 128 0.5608 0.5572 0.0036\n", + "31 4 0.00010 8 4 8 128 0.5659 0.5571 0.0088\n", + "32 14 0.00010 8 5 8 256 0.5634 0.5571 0.0064\n", + "33 38 0.00001 8 4 4 256 0.5611 0.5569 0.0043\n", + "34 57 0.00001 16 4 4 512 0.5626 0.5569 0.0058\n", + "35 18 0.00010 8 5 16 512 0.5555 0.5568 -0.0013\n", + "36 62 0.00001 16 4 16 256 0.5602 0.5565 0.0038\n", + "37 16 0.00010 8 5 16 128 0.5632 0.5564 0.0069\n", + "38 59 0.00001 16 4 8 256 0.5695 0.5563 0.0133\n", + "39 47 0.00001 8 5 4 256 0.5643 0.5557 0.0086\n", + "40 2 0.00010 8 4 4 256 0.5589 0.5555 0.0034\n", + "41 28 0.00010 16 5 4 128 0.5639 0.5555 0.0085\n", + "42 43 0.00001 8 4 16 128 0.5659 0.5554 0.0105\n", + "43 58 0.00001 16 4 8 128 0.5630 0.5554 0.0077\n", + "44 1 0.00010 8 4 4 128 0.5633 0.5553 0.0080\n", + "45 65 0.00001 16 5 4 256 0.5584 0.5552 0.0032\n", + "46 72 0.00001 16 5 16 512 0.5592 0.5551 0.0041\n", + "47 33 0.00010 16 5 8 512 0.5617 0.5549 0.0067\n", + "48 42 0.00001 8 4 8 512 0.5615 0.5546 0.0069\n", + "49 45 0.00001 8 4 16 512 0.5638 0.5546 0.0092" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/bigatedgraph.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "dfae755b-6a58-4c26-8c83-4884038b321e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrsmoothing_probabilitydevtestgap
0120.000010.00.57680.5797-0.0029
1140.000010.20.57530.5779-0.0026
2130.000010.10.57450.5773-0.0028
3150.000010.30.57510.57350.0016
4170.000010.50.57080.5719-0.0011
560.000100.50.56750.5706-0.0031
610.000100.00.57430.56980.0045
720.000100.10.57410.56980.0042
8160.000010.40.57430.56910.0052
9180.000010.60.56740.5677-0.0003
1030.000100.20.56870.56590.0028
11190.000010.70.56820.56360.0046
1270.000100.60.55930.5634-0.0042
1350.000100.40.56840.56290.0055
1490.000100.80.56230.56220.0001
1580.000100.70.56250.56160.0009
1640.000100.30.56890.56080.0081
17200.000010.80.56100.56000.0010
18100.000100.90.56150.55920.0024
19210.000010.90.55730.5590-0.0017
20220.000011.00.55580.5574-0.0016
21110.000101.00.55340.5564-0.0029
\n", + "
" + ], + "text/plain": [ + " step lr smoothing_probability dev test gap\n", + "0 12 0.00001 0.0 0.5768 0.5797 -0.0029\n", + "1 14 0.00001 0.2 0.5753 0.5779 -0.0026\n", + "2 13 0.00001 0.1 0.5745 0.5773 -0.0028\n", + "3 15 0.00001 0.3 0.5751 0.5735 0.0016\n", + "4 17 0.00001 0.5 0.5708 0.5719 -0.0011\n", + "5 6 0.00010 0.5 0.5675 0.5706 -0.0031\n", + "6 1 0.00010 0.0 0.5743 0.5698 0.0045\n", + "7 2 0.00010 0.1 0.5741 0.5698 0.0042\n", + "8 16 0.00001 0.4 0.5743 0.5691 0.0052\n", + "9 18 0.00001 0.6 0.5674 0.5677 -0.0003\n", + "10 3 0.00010 0.2 0.5687 0.5659 0.0028\n", + "11 19 0.00001 0.7 0.5682 0.5636 0.0046\n", + "12 7 0.00010 0.6 0.5593 0.5634 -0.0042\n", + "13 5 0.00010 0.4 0.5684 0.5629 0.0055\n", + "14 9 0.00010 0.8 0.5623 0.5622 0.0001\n", + "15 8 0.00010 0.7 0.5625 0.5616 0.0009\n", + "16 4 0.00010 0.3 0.5689 0.5608 0.0081\n", + "17 20 0.00001 0.8 0.5610 0.5600 0.0010\n", + "18 10 0.00010 0.9 0.5615 0.5592 0.0024\n", + "19 21 0.00001 0.9 0.5573 0.5590 -0.0017\n", + "20 22 0.00001 1.0 0.5558 0.5574 -0.0016\n", + "21 11 0.00010 1.0 0.5534 0.5564 -0.0029" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/smothing/phi.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f4db3748-6be0-4ee6-abd1-c0e734efc8c8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrsmoothing_probabilitydevtestgap
0140.000010.20.58020.5808-0.0007
1120.000010.00.57680.5797-0.0029
2150.000010.30.57890.57820.0007
3160.000010.40.57590.5768-0.0008
4130.000010.10.57970.57620.0035
5170.000010.50.57450.5760-0.0015
6180.000010.60.56770.5710-0.0033
710.000100.00.57430.56980.0045
820.000100.10.56900.5696-0.0006
9190.000010.70.56440.5694-0.0050
10200.000010.80.56460.5690-0.0044
11210.000010.90.56510.5683-0.0032
1240.000100.30.57180.56620.0056
1330.000100.20.56560.56470.0009
14220.000011.00.55590.5610-0.0051
1550.000100.40.56500.56080.0042
1680.000100.70.56600.55950.0065
1790.000100.80.56360.55890.0047
1870.000100.60.55650.5587-0.0022
1960.000100.50.55960.55850.0011
20100.000100.90.55810.55610.0020
21110.000101.00.54270.5463-0.0036
\n", + "
" + ], + "text/plain": [ + " step lr smoothing_probability dev test gap\n", + "0 14 0.00001 0.2 0.5802 0.5808 -0.0007\n", + "1 12 0.00001 0.0 0.5768 0.5797 -0.0029\n", + "2 15 0.00001 0.3 0.5789 0.5782 0.0007\n", + "3 16 0.00001 0.4 0.5759 0.5768 -0.0008\n", + "4 13 0.00001 0.1 0.5797 0.5762 0.0035\n", + "5 17 0.00001 0.5 0.5745 0.5760 -0.0015\n", + "6 18 0.00001 0.6 0.5677 0.5710 -0.0033\n", + "7 1 0.00010 0.0 0.5743 0.5698 0.0045\n", + "8 2 0.00010 0.1 0.5690 0.5696 -0.0006\n", + "9 19 0.00001 0.7 0.5644 0.5694 -0.0050\n", + "10 20 0.00001 0.8 0.5646 0.5690 -0.0044\n", + "11 21 0.00001 0.9 0.5651 0.5683 -0.0032\n", + "12 4 0.00010 0.3 0.5718 0.5662 0.0056\n", + "13 3 0.00010 0.2 0.5656 0.5647 0.0009\n", + "14 22 0.00001 1.0 0.5559 0.5610 -0.0051\n", + "15 5 0.00010 0.4 0.5650 0.5608 0.0042\n", + "16 8 0.00010 0.7 0.5660 0.5595 0.0065\n", + "17 9 0.00010 0.8 0.5636 0.5589 0.0047\n", + "18 7 0.00010 0.6 0.5565 0.5587 -0.0022\n", + "19 6 0.00010 0.5 0.5596 0.5585 0.0011\n", + "20 10 0.00010 0.9 0.5581 0.5561 0.0020\n", + "21 11 0.00010 1.0 0.5427 0.5463 -0.0036" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/smothing/qwen.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "952affd8-2fea-481c-b8cc-3f53418b5472", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numbernum_graph_headsdevtestgap
030.0001016540.56320.56300.0002
110.000108540.56540.56160.0039
250.000018540.56560.55880.0068
360.000018580.56880.55800.0108
470.0000116540.56310.55740.0057
520.000108580.56190.55640.0054
680.0000116580.56530.55590.0094
740.0001016580.56310.55390.0092
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number num_graph_heads dev test gap\n", + "0 3 0.00010 16 5 4 0.5632 0.5630 0.0002\n", + "1 1 0.00010 8 5 4 0.5654 0.5616 0.0039\n", + "2 5 0.00001 8 5 4 0.5656 0.5588 0.0068\n", + "3 6 0.00001 8 5 8 0.5688 0.5580 0.0108\n", + "4 7 0.00001 16 5 4 0.5631 0.5574 0.0057\n", + "5 2 0.00010 8 5 8 0.5619 0.5564 0.0054\n", + "6 8 0.00001 16 5 8 0.5653 0.5559 0.0094\n", + "7 4 0.00010 16 5 8 0.5631 0.5539 0.0092" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/BiGraphFormerWithProb.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "c210111f-e1e1-4276-8e47-249bda79b189", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steplrnum_transformer_headstr_layer_numbernum_graph_headshidden_dim_gateddevtestgap
030.0001016582560.56700.56560.0015
120.000108585120.57060.56160.0091
240.0001016585120.55600.5573-0.0013
380.0000116585120.55690.55290.0040
410.000108582560.55720.55140.0059
560.000018585120.55200.54740.0046
650.000018582560.55500.54720.0078
770.0000116582560.54880.54180.0071
\n", + "
" + ], + "text/plain": [ + " step lr num_transformer_heads tr_layer_number num_graph_heads hidden_dim_gated dev test gap\n", + "0 3 0.00010 16 5 8 256 0.5670 0.5656 0.0015\n", + "1 2 0.00010 8 5 8 512 0.5706 0.5616 0.0091\n", + "2 4 0.00010 16 5 8 512 0.5560 0.5573 -0.0013\n", + "3 8 0.00001 16 5 8 512 0.5569 0.5529 0.0040\n", + "4 1 0.00010 8 5 8 256 0.5572 0.5514 0.0059\n", + "5 6 0.00001 8 5 8 512 0.5520 0.5474 0.0046\n", + "6 5 0.00001 8 5 8 256 0.5550 0.5472 0.0078\n", + "7 7 0.00001 16 5 8 256 0.5488 0.5418 0.0071" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/BiGatedGraphFormerWithProb.txt\",50)\n", + "\n", + "from IPython.display import display\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 160)\n", + "\n", + "display(df.head(50))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2e06ea3-d6ba-48d4-a013-15b1145d9a4a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f9a4eae-c094-40c8-a0ee-ecf7c884d523", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8d39259-0192-4df1-8eef-531dee7f45b8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40afd60f-78b2-4b26-b5e2-314dddfa6c3f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c90849c-69d8-484e-b6a4-96a8e6447bc7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3325846-ffbc-444f-8549-2fbe2f5d6fde", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03aaa192-92c8-4938-bc18-8eac086e4648", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "d32b1b7f-bcb6-45bd-930c-f93b4b40b3f8", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "PretrainedAudioEmbeddingExtractor.__init__() got an unexpected keyword argument 'model_name'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[21]\u001b[39m\u001b[32m, line 32\u001b[39m\n\u001b[32m 29\u001b[39m DEVICE = \u001b[33m\"\u001b[39m\u001b[33mcuda\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m torch.cuda.is_available() \u001b[38;5;28;01melse\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mcpu\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 30\u001b[39m SAMPLE_RATE = \u001b[32m16000\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m32\u001b[39m audio_feat = \u001b[43mPretrainedAudioEmbeddingExtractor\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 33\u001b[39m \u001b[43m \u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mAUDIO_MODEL\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 34\u001b[39m \u001b[43m \u001b[49m\u001b[43mcheckpoint\u001b[49m\u001b[43m=\u001b[49m\u001b[43mAUDIO_CKPT\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 35\u001b[39m \u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m=\u001b[49m\u001b[43mDEVICE\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 36\u001b[39m \u001b[43m)\u001b[49m\n\u001b[32m 38\u001b[39m text_feat = PretrainedTextEmbeddingExtractor(\n\u001b[32m 39\u001b[39m model_name=TEXT_MODEL,\n\u001b[32m 40\u001b[39m checkpoint=TEXT_CKPT,\n\u001b[32m 41\u001b[39m device=DEVICE,\n\u001b[32m 42\u001b[39m )\n\u001b[32m 44\u001b[39m \u001b[38;5;66;03m# ---------- 4. Узнаём фактические размеры эмбеддингов ----------\u001b[39;00m\n", + "\u001b[31mTypeError\u001b[39m: PretrainedAudioEmbeddingExtractor.__init__() got an unexpected keyword argument 'model_name'" + ] + } + ], + "source": [ + "# ======================================================================\n", + "# Проверка синтетического корпуса MELD-S на «битые» эмбеддинги\n", + "# ======================================================================\n", + "\n", + "# ---------- 1. Импорты и базовые настройки ----------\n", + "import os, logging, traceback\n", + "import torch, torchaudio\n", + "import pandas as pd\n", + "from tqdm.auto import tqdm\n", + "\n", + "# --- если проект находится в другом каталоге, добавьте его в sys.path ---\n", + "# import sys; sys.path.append(r\"C:\\Prgrm\\ESWA_2025\")\n", + "\n", + "from data_loading.feature_extractor import (\n", + " PretrainedAudioEmbeddingExtractor,\n", + " PretrainedTextEmbeddingExtractor,\n", + ")\n", + "\n", + "# ---------- 2. Пути из вашего config.toml ----------\n", + "synthetic_path = r\"E:/MELD_S\"\n", + "synth_csv_path = os.path.join(synthetic_path, \"meld_s_train_labels.csv\")\n", + "synth_wav_dir = os.path.join(synthetic_path, \"wavs\")\n", + "\n", + "# ---------- 3. Создаём экстракторы ровно как в основном проекте ----------\n", + "AUDIO_MODEL = \"audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim\"\n", + "AUDIO_CKPT = \"best_audio_model_2.pt\" # путь относительно запуска\n", + "TEXT_MODEL = \"jinaai/jina-embeddings-v3\"\n", + "TEXT_CKPT = \"best_text_model.pth\"\n", + "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "SAMPLE_RATE = 16000\n", + "\n", + "audio_feat = PretrainedAudioEmbeddingExtractor(\n", + " model_name=AUDIO_MODEL,\n", + " checkpoint=AUDIO_CKPT,\n", + " device=DEVICE,\n", + ")\n", + "\n", + "text_feat = PretrainedTextEmbeddingExtractor(\n", + " model_name=TEXT_MODEL,\n", + " checkpoint=TEXT_CKPT,\n", + " device=DEVICE,\n", + ")\n", + "\n", + "# ---------- 4. Узнаём фактические размеры эмбеддингов ----------\n", + "with torch.no_grad():\n", + " dummy_wav = torch.zeros(1, SAMPLE_RATE) # секунда тишины\n", + " _, a_emb = audio_feat.extract(dummy_wav[0], SAMPLE_RATE)\n", + " AUDIO_DIM = a_emb[0].shape[-1]\n", + "\n", + " _, t_emb = text_feat.extract(\"hello world\")\n", + " TEXT_DIM = t_emb[0].shape[-1]\n", + "\n", + "# сколько логитов выдаёт каждый классификатор\n", + "NUM_EMOTIONS = 7 # [\"anger\", \"disgust\", ...] — как в config\n", + "PRED_DIM = NUM_EMOTIONS\n", + "\n", + "EXPECTED_ALL = AUDIO_DIM + TEXT_DIM + 2 * PRED_DIM\n", + "print(f\"AUDIO_DIM = {AUDIO_DIM}, TEXT_DIM = {TEXT_DIM}, \"\n", + " f\"TOTAL EXPECTED = {EXPECTED_ALL}\")\n", + "\n", + "# ---------- 5. Читаем CSV синтетики ----------\n", + "df = pd.read_csv(synth_csv_path)\n", + "print(f\"Всего строк в CSV: {len(df)}\")\n", + "\n", + "bad_rows, good_cnt = [], 0\n", + "\n", + "# ---------- 6. Проходим по записям ----------\n", + "for i, row in tqdm(df.iterrows(), total=len(df)):\n", + " video_name = row[\"video_name\"]\n", + " wav_path = os.path.join(synth_wav_dir, f\"{video_name}.wav\")\n", + " txt = row.get(\"text\", \"\")\n", + "\n", + " reason = None\n", + " try:\n", + " # 6.1 Проверяем, существует ли wav-файл\n", + " if not os.path.exists(wav_path):\n", + " reason = \"file_missing\"\n", + "\n", + " # 6.2 Получаем аудио-эмбеддинг\n", + " if reason is None:\n", + " wf, sr = torchaudio.load(wav_path)\n", + " if sr != SAMPLE_RATE:\n", + " wf = torchaudio.transforms.Resample(sr, SAMPLE_RATE)(wf)\n", + " a_pred, a_emb = audio_feat.extract(wf[0], SAMPLE_RATE)\n", + " a_emb = a_emb[0]\n", + " if a_emb.shape[-1] != AUDIO_DIM:\n", + " reason = f\"audio_dim_{a_emb.shape[-1]}\"\n", + "\n", + " # 6.3 Получаем текст-эмбеддинг\n", + " if reason is None:\n", + " t_pred, t_emb = text_feat.extract(txt)\n", + " t_emb = t_emb[0]\n", + " if t_emb.shape[-1] != TEXT_DIM:\n", + " reason = f\"text_dim_{t_emb.shape[-1]}\"\n", + "\n", + " # 6.4 Проверяем полную конкатенацию\n", + " if reason is None:\n", + " full_vec = torch.cat([a_emb, t_emb, a_pred[0], t_pred[0]], dim=-1)\n", + " if full_vec.shape[-1] != EXPECTED_ALL:\n", + " reason = f\"concat_dim_{full_vec.shape[-1]}\"\n", + "\n", + " except Exception as e:\n", + " reason = \"exception_\" + e.__class__.__name__\n", + " logging.error(f\"{video_name}: {traceback.format_exc(limit=2)}\")\n", + "\n", + " # 6.5 Сохраняем результат\n", + " if reason:\n", + " bad_rows.append({\n", + " \"idx\": i,\n", + " \"video_name\": video_name,\n", + " \"reason\": reason,\n", + " \"wav_path\": wav_path,\n", + " \"text_len\": len(txt),\n", + " })\n", + " else:\n", + " good_cnt += 1\n", + "\n", + "# ---------- 7. Итоги ----------\n", + "print(f\"\\n✅ GOOD : {good_cnt}\")\n", + "print(f\"❌ BAD : {len(bad_rows)}\")\n", + "\n", + "bad_df = pd.DataFrame(bad_rows)\n", + "display(bad_df)\n", + "\n", + "# ---------- 8. (Необязательно) сохраняем список плохих файлов ----------\n", + "out_csv = os.path.join(synthetic_path, \"bad_synth_meld.csv\")\n", + "bad_df.to_csv(out_csv, index=False)\n", + "print(f\"\\nСписок «битых» примеров сохранён в: {out_csv}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a232f93d-7f7c-41d3-9204-74445e43d071", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}