File size: 89,673 Bytes

feb5be5

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ed4a9148-55d8-483f-888d-9939a06873f9",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['HF_HOME'] = \"/scratch/tar3kh/models/cache\"\n",
    "import torch \n",
    "from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline\n",
    "from datasets import load_dataset #datasets is huggingface's dataset package\n",
    "from peft import get_peft_model, LoraConfig, TaskType\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import PIL\n",
    "\n",
    "import lm_eval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "74f6aba0-fb07-4ba6-b3d5-f63900b3e4f5",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1731e4705d734f3b9f1cab292fcbc9fd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Llama-3.2-3B-Instruct\")\n",
    "model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-3.2-3B-Instruct\", device_map = \"auto\", torch_dtype = torch.bfloat16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0cb7397c-bcbe-4637-b973-1d98873d0f8a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "task_manager = lm_eval.tasks.TaskManager()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9ae14b7a-81bb-494c-856c-fa3f3ff0b1f0",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "`pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way.\n",
      "Passed an already-initialized model through `pretrained`, assuming single-process call to evaluate() or custom distributed integration\n",
      "100%|██████████| 30/30 [00:00<00:00, 629.43it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 633.84it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 633.37it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 623.71it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 630.66it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 638.69it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 624.79it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 641.17it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 642.51it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 642.24it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 640.21it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.55it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.32it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 646.81it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 626.84it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 308.57it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 379.50it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 631.24it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 635.51it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 644.93it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.68it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 644.05it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 102.55it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 624.02it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 628.20it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 627.93it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 627.78it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 491.80it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 619.18it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 637.35it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 632.35it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 633.64it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 642.34it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 640.85it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 615.70it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 636.56it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 629.97it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 626.97it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 633.91it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 643.70it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 629.93it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 635.28it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 634.87it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 645.25it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 638.43it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 645.74it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 624.12it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 642.42it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 636.23it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 627.26it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 643.17it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 636.89it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 641.15it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.27it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 637.52it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 634.23it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.96it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 69.18it/s]\n",
      "Running loglikelihood requests: 100%|██████████| 6840/6840 [01:08<00:00, 99.56it/s] \n",
      "Running generate_until requests:   0%|          | 0/30 [00:00<?, ?it/s]/scratch/tar3kh/llm_course_2/lib/python3.11/site-packages/transformers/generation/configuration_utils.py:631: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
      "  warnings.warn(\n",
      "/scratch/tar3kh/llm_course_2/lib/python3.11/site-packages/transformers/generation/configuration_utils.py:636: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
      "  warnings.warn(\n",
      "Running generate_until requests: 100%|██████████| 30/30 [01:49<00:00,  3.66s/it]\n",
      "fatal: not a git repository (or any parent up to mount point /sfs/gpfs)\n",
      "Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\n"
     ]
    }
   ],
   "source": [
    "\n",
    "results = lm_eval.simple_evaluate(\n",
    "    model = 'hf',\n",
    "    model_args = {\"pretrained\": model, \"dtype\": \"bfloat16\", \"toeknzier\": tokenizer},\n",
    "    tasks = ['gsm8k_cot',  'mmlu'],\n",
    "    task_manager = task_manager,\n",
    "    log_samples = True, \n",
    "    batch_size = \"1\", \n",
    "    limit = 30, \n",
    "    random_seed = 42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f753cc30-d67e-4185-9d41-e56eaafa5dc8",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'gsm8k_cot': {'alias': 'gsm8k_cot',\n",
       "  'exact_match,strict-match': np.float64(0.5),\n",
       "  'exact_match_stderr,strict-match': 0.09284766908852593,\n",
       "  'exact_match,flexible-extract': np.float64(0.5),\n",
       "  'exact_match_stderr,flexible-extract': 0.09284766908852593},\n",
       " 'mmlu': {'acc,none': 0.6111111111111112,\n",
       "  'acc_stderr,none': np.float64(0.011219896029746483),\n",
       "  'alias': 'mmlu'},\n",
       " 'mmlu_humanities': {'acc,none': 0.6435897435897436,\n",
       "  'acc_stderr,none': np.float64(0.02350521124512561),\n",
       "  'alias': ' - humanities'},\n",
       " 'mmlu_formal_logic': {'alias': '  - formal_logic',\n",
       "  'acc,none': 0.3,\n",
       "  'acc_stderr,none': 0.0850962943396763},\n",
       " 'mmlu_high_school_european_history': {'alias': '  - high_school_european_history',\n",
       "  'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': 0.08753762190648169},\n",
       " 'mmlu_high_school_us_history': {'alias': '  - high_school_us_history',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.08948554539839962},\n",
       " 'mmlu_high_school_world_history': {'alias': '  - high_school_world_history',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_international_law': {'alias': '  - international_law',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478331},\n",
       " 'mmlu_jurisprudence': {'alias': '  - jurisprudence',\n",
       "  'acc,none': 0.7,\n",
       "  'acc_stderr,none': 0.0850962943396763},\n",
       " 'mmlu_logical_fallacies': {'alias': '  - logical_fallacies',\n",
       "  'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': 0.08753762190648169},\n",
       " 'mmlu_moral_disputes': {'alias': '  - moral_disputes',\n",
       "  'acc,none': 0.5666666666666667,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_moral_scenarios': {'alias': '  - moral_scenarios',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.08948554539839962},\n",
       " 'mmlu_philosophy': {'alias': '  - philosophy',\n",
       "  'acc,none': 0.7,\n",
       "  'acc_stderr,none': 0.08509629433967632},\n",
       " 'mmlu_prehistory': {'alias': '  - prehistory',\n",
       "  'acc,none': 0.6,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_professional_law': {'alias': '  - professional_law',\n",
       "  'acc,none': 0.43333333333333335,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_world_religions': {'alias': '  - world_religions',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478328},\n",
       " 'mmlu_other': {'acc,none': 0.6538461538461539,\n",
       "  'acc_stderr,none': np.float64(0.02283992657168969),\n",
       "  'alias': ' - other'},\n",
       " 'mmlu_business_ethics': {'alias': '  - business_ethics',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.08211756827352532},\n",
       " 'mmlu_clinical_knowledge': {'alias': '  - clinical_knowledge',\n",
       "  'acc,none': 0.6,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_college_medicine': {'alias': '  - college_medicine',\n",
       "  'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': 0.08753762190648168},\n",
       " 'mmlu_global_facts': {'alias': '  - global_facts',\n",
       "  'acc,none': 0.3333333333333333,\n",
       "  'acc_stderr,none': 0.08753762190648168},\n",
       " 'mmlu_human_aging': {'alias': '  - human_aging',\n",
       "  'acc,none': 0.4666666666666667,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_management': {'alias': '  - management',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_marketing': {'alias': '  - marketing',\n",
       "  'acc,none': 0.9333333333333333,\n",
       "  'acc_stderr,none': 0.046320555585310084},\n",
       " 'mmlu_medical_genetics': {'alias': '  - medical_genetics',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.0785403232453173},\n",
       " 'mmlu_miscellaneous': {'alias': '  - miscellaneous',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478328},\n",
       " 'mmlu_nutrition': {'alias': '  - nutrition',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478331},\n",
       " 'mmlu_professional_accounting': {'alias': '  - professional_accounting',\n",
       "  'acc,none': 0.5,\n",
       "  'acc_stderr,none': 0.09284766908852593},\n",
       " 'mmlu_professional_medicine': {'alias': '  - professional_medicine',\n",
       "  'acc,none': 0.5333333333333333,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_virology': {'alias': '  - virology',\n",
       "  'acc,none': 0.5,\n",
       "  'acc_stderr,none': 0.09284766908852593},\n",
       " 'mmlu_social_sciences': {'acc,none': 0.6805555555555556,\n",
       "  'acc_stderr,none': np.float64(0.024243558039781773),\n",
       "  'alias': ' - social sciences'},\n",
       " 'mmlu_econometrics': {'alias': '  - econometrics',\n",
       "  'acc,none': 0.4,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_high_school_geography': {'alias': '  - high_school_geography',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.08211756827352532},\n",
       " 'mmlu_high_school_government_and_politics': {'alias': '  - high_school_government_and_politics',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_high_school_macroeconomics': {'alias': '  - high_school_macroeconomics',\n",
       "  'acc,none': 0.6,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_high_school_microeconomics': {'alias': '  - high_school_microeconomics',\n",
       "  'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': 0.08753762190648169},\n",
       " 'mmlu_high_school_psychology': {'alias': '  - high_school_psychology',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_human_sexuality': {'alias': '  - human_sexuality',\n",
       "  'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': 0.08753762190648169},\n",
       " 'mmlu_professional_psychology': {'alias': '  - professional_psychology',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.08211756827352529},\n",
       " 'mmlu_public_relations': {'alias': '  - public_relations',\n",
       "  'acc,none': 0.6,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_security_studies': {'alias': '  - security_studies',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.07854032324531726},\n",
       " 'mmlu_sociology': {'alias': '  - sociology',\n",
       "  'acc,none': 0.6,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_us_foreign_policy': {'alias': '  - us_foreign_policy',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_stem': {'acc,none': 0.5157894736842106,\n",
       "  'acc_stderr,none': np.float64(0.019891342584452104),\n",
       "  'alias': ' - stem'},\n",
       " 'mmlu_abstract_algebra': {'alias': '  - abstract_algebra',\n",
       "  'acc,none': 0.3333333333333333,\n",
       "  'acc_stderr,none': 0.0875376219064817},\n",
       " 'mmlu_anatomy': {'alias': '  - anatomy',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.0894855453983996},\n",
       " 'mmlu_astronomy': {'alias': '  - astronomy',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.0785403232453173},\n",
       " 'mmlu_college_biology': {'alias': '  - college_biology',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_college_chemistry': {'alias': '  - college_chemistry',\n",
       "  'acc,none': 0.4,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_college_computer_science': {'alias': '  - college_computer_science',\n",
       "  'acc,none': 0.43333333333333335,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_college_mathematics': {'alias': '  - college_mathematics',\n",
       "  'acc,none': 0.43333333333333335,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_college_physics': {'alias': '  - college_physics',\n",
       "  'acc,none': 0.36666666666666664,\n",
       "  'acc_stderr,none': 0.0894855453983996},\n",
       " 'mmlu_computer_security': {'alias': '  - computer_security',\n",
       "  'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': 0.08753762190648168},\n",
       " 'mmlu_conceptual_physics': {'alias': '  - conceptual_physics',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.08948554539839962},\n",
       " 'mmlu_electrical_engineering': {'alias': '  - electrical_engineering',\n",
       "  'acc,none': 0.5333333333333333,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_elementary_mathematics': {'alias': '  - elementary_mathematics',\n",
       "  'acc,none': 0.3333333333333333,\n",
       "  'acc_stderr,none': 0.0875376219064817},\n",
       " 'mmlu_high_school_biology': {'alias': '  - high_school_biology',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.07854032324531729},\n",
       " 'mmlu_high_school_chemistry': {'alias': '  - high_school_chemistry',\n",
       "  'acc,none': 0.5666666666666667,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_high_school_computer_science': {'alias': '  - high_school_computer_science',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.0785403232453173},\n",
       " 'mmlu_high_school_mathematics': {'alias': '  - high_school_mathematics',\n",
       "  'acc,none': 0.26666666666666666,\n",
       "  'acc_stderr,none': 0.08211756827352526},\n",
       " 'mmlu_high_school_physics': {'alias': '  - high_school_physics',\n",
       "  'acc,none': 0.36666666666666664,\n",
       "  'acc_stderr,none': 0.0894855453983996},\n",
       " 'mmlu_high_school_statistics': {'alias': '  - high_school_statistics',\n",
       "  'acc,none': 0.23333333333333334,\n",
       "  'acc_stderr,none': 0.07854032324531728},\n",
       " 'mmlu_machine_learning': {'alias': '  - machine_learning',\n",
       "  'acc,none': 0.5,\n",
       "  'acc_stderr,none': 0.09284766908852593}}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results['results']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "408c9b77-ddc7-4100-8af3-205da92b8981",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# pull in the datasets and prepare them for training\n",
    "\n",
    "budget = pd.read_csv(\"budget_dataset.csv\")\n",
    "goals = pd.read_csv(\"goals_dataset.csv\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "4d2aac10-2e16-45e5-9c28-ecee34823332",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "budget['instruct_lora'] = budget.apply(\n",
    "    lambda row: f\"Q: {row['question']}\\n\\nA: \",\n",
    "    axis=1\n",
    ")\n",
    "\n",
    "goals['instruct_lora'] = goals.apply(\n",
    "    lambda row: f\"Q: {row['question']}\\n\\nA: \",\n",
    "    axis=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "699a1799-2eb1-4e3d-92fd-d95e608d0a46",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8228990282024cdcbda7f17c4d8791aa",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/2500 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "981eeb5a57cb43d4a957db0cec7255fb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/500 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from datasets import load_dataset, Dataset #datasets is huggingface's dataset package\n",
    "budget = budget.sample(frac = 1, random_state = 42) # randomly shuffle DF\n",
    "train_budget = budget[:2500]\n",
    "val_budget = budget[2500:]\n",
    "train_budget = Dataset.from_pandas(train_budget)\n",
    "val_budget = Dataset.from_pandas(val_budget)\n",
    "train_budget = train_budget.map(lambda samples: tokenizer(samples['instruct']), batched = True)\n",
    "val_budget = val_budget.map(lambda samples: tokenizer(samples['instruct']), batched = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "5771055b-6fd1-4116-8f31-e96bdf6b3f69",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b17dc4834f3541d2b2a23de0fe014e28",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/2500 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6cc811fbe0a94abaab671c32f0078bb6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/500 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "goals = goals.sample(frac = 1, random_state = 42) # randomly shuffle DF\n",
    "train_goals = goals[:2500]\n",
    "val_goals = goals[2500:]\n",
    "train_goals = Dataset.from_pandas(train_goals)\n",
    "val_goals = Dataset.from_pandas(val_goals)\n",
    "train_goals = train_goals.map(lambda samples: tokenizer(samples['instruct']), batched = True)\n",
    "val_goals = val_goals.map(lambda samples: tokenizer(samples['instruct']), batched = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "a36d39f3-6937-47df-b3da-091dbf8df46e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepare the model and tokenizer \n",
    "tokenizer.pad_token = tokenizer.eos_token # set padding token to EOS token\n",
    "model.config.poad_token_id = tokenizer.pad_token_id # set the padding token for model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "3c846699-fdb9-4c49-aef3-7860cfe80712",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/scratch/tar3kh/llm_course_2/lib/python3.11/site-packages/transformers/generation/configuration_utils.py:631: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
      "  warnings.warn(\n",
      "/scratch/tar3kh/llm_course_2/lib/python3.11/site-packages/transformers/generation/configuration_utils.py:636: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
      "  warnings.warn(\n",
      "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Q: My short term goal is to save for a $1774 vacation in the next year, my medium term goal is to save for down payment for a new car, around 5227 in the next 2 or 3 years, and my long term goal is to save for a down payment for a house around 151861 in the next ten years, can you help me integrate these goals into my budget as well as where I should store these savings?\n",
      "\n",
      "A: 1. Short-term goal: Saving for a vacation in the next year. Allocate a specific amount each month towards this goal. For example, you can set aside $147 per month for 12 months to reach your goal of $1774. You can use a separate savings account specifically for this goal. Consider opening a high-yield savings account or a money market fund to earn interest on your savings.\n",
      "\n",
      "2. Medium-term goal: Saving for a down payment on a new car in 2-3 years. Allocate a specific amount each month towards this goal. For example, you can set aside $174 per month for 24-36 months to reach your goal of $5227. You can use a separate savings account specifically for this goal. Consider opening a high-yield savings account or a money market fund to earn interest on your savings.\n",
      "\n",
      "3. Long-term goal: Saving for a down payment on a house in 10 years. Allocate a specific amount each month towards this goal. For example, you can set aside $1549 per month for 120 months to reach your goal of $151861. You can use a separate savings account specifically for this goal. Consider opening a high-yield savings account or a money market fund to earn interest on your savings.\n",
      "\n",
      "To integrate these goals into your budget, consider the 50/30/20 rule: Allocate 50% of your income towards necessary expenses (housing, utilities, food, transportation, and minimum payments on debts), 30% towards discretionary spending (entertainment, hobbies, travel), and 20% towards saving and debt repayment. You can adjust this ratio based on your individual circumstances.\n",
      "\n",
      "To store these savings, consider the following options:\n",
      "\n",
      "*   High-yield savings account: Earns interest on your savings and is FDIC-insured, making it a low-risk option.\n",
      "*   Money market fund: Earns interest on your savings and provides liquidity, making it a good option for short-term goals.\n",
      "*   Certificates of Deposit (CDs): Earns interest on your savings and provides a fixed return, but you'll need to keep your money locked in the CD for a specified period.\n",
      "*   Individual Retirement Account (IRA): A tax-advantaged account that can be used for long-term savings, but may have penalties for early withdrawal.\n",
      "\n",
      "It's essential to review and adjust your budget regularly to ensure you're on track to meet your goals. Consider consulting with a financial advisor to create a personalized plan tailored to your needs and goals.\n"
     ]
    }
   ],
   "source": [
    "formatted_prompt = f\"Q: {val_goals[0]['question']}\\n\\nA: \"\n",
    "inputs = tokenizer.encode(formatted_prompt, return_tensors = \"pt\").to(model.device)\n",
    "output = model.generate(inputs, max_new_tokens = 800, pad_token_id = tokenizer.pad_token_id, do_sample = False)\n",
    "generated_text = tokenizer.decode(output[0], skip_special_tokens = True)\n",
    "print(generated_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6e1bd005-9f89-4a0b-ac57-cd8f521037e8",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Q: I  have an income of about 53255 a year and my monthly expenses include 2208 a month in rent and utilities, a 700 car payment, $300 in food, and about 205 a month in other expenses. Using python, can you create for me a budget spreadsheet and export it to excel?\n",
      "\n",
      "A:  Here's a Python script that creates a budget spreadsheet and exports it to Excel:\n",
      "\n",
      "```python\n",
      "import pandas as pd\n",
      "\n",
      "# Define your income and expenses\n",
      "income = 53255\n",
      "rent_and_utilities = 2208\n",
      "car_payment = 700\n",
      "food = 300\n",
      "other_expenses = 205\n",
      "\n",
      "# Calculate your total monthly expenses\n",
      "total_expenses = rent_and_utilities + car_payment + food + other_expenses\n",
      "\n",
      "# Create a dictionary to store your income and expenses\n",
      "budget = {\n",
      "    'Income': [income],\n",
      "    'Fixed Expenses': [rent_and_utilities, car_payment, other_expenses],\n",
      "    'Variable Expenses': [food],\n",
      "    'Total Expenses': [total_expenses]\n",
      "}\n",
      "\n",
      "# Create a DataFrame from the dictionary\n",
      "df = pd.DataFrame(budget)\n",
      "\n",
      "# Print the DataFrame\n",
      "print(df)\n",
      "\n",
      "# Export the DataFrame to an Excel file\n",
      "df.to_excel('budget.xlsx', index=False)\n",
      "```\n",
      "\n",
      "This script will create a budget spreadsheet with the following columns:\n",
      "\n",
      "*   Income\n",
      "*   Fixed Expenses (including rent and utilities, car payment, and other expenses)\n",
      "*   Variable Expenses (including food)\n",
      "*   Total Expenses\n",
      "\n",
      "The script will also export the DataFrame to an Excel file named `budget.xlsx`.\n",
      "\n",
      "**Example Output:**\n",
      "\n",
      "| Income | Fixed Expenses | Variable Expenses | Total Expenses |\n",
      "| --- | --- | --- | --- |\n",
      "| 53255 | 3208 | 300 | 3508 |\n",
      "\n",
      "**Tips and Variations:**\n",
      "\n",
      "*   You can customize the script to include additional income and expenses by adding more columns to the `budget` dictionary and the `df` DataFrame.\n",
      "*   You can also use this script as a starting point to create a more detailed budget spreadsheet by adding more columns and rows to the `df` DataFrame.\n",
      "*   To make the script more user-friendly, you can add a prompt to ask the user to input their income and expenses, and then use those values to populate the `budget` dictionary and the `df` DataFrame.\n",
      "*   To make the script more automated, you can use a scheduling tool like `schedule` to run the script at regular intervals and update the budget spreadsheet accordingly.\n"
     ]
    }
   ],
   "source": [
    "formatted_prompt = f\"Q: {val_budget[0]['question']}\\n\\nA: \"\n",
    "inputs = tokenizer.encode(formatted_prompt, return_tensors = \"pt\").to(model.device)\n",
    "output = model.generate(inputs, max_new_tokens = 800, pad_token_id = tokenizer.pad_token_id, do_sample = False)\n",
    "generated_text = tokenizer.decode(output[0], skip_special_tokens = True)\n",
    "print(generated_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "0ac9a8ce-4fa0-4630-b4d5-2a1fe19029ad",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "del model\n",
    "torch.cuda.empty_cache()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "b637a1dc-5de4-434f-a199-488121e4fc92",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e7367950b76e48d78fe4ea8adcc11321",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(\"mistralai/Ministral-8B-Instruct-2410\")\n",
    "model = AutoModelForCausalLM.from_pretrained(\"mistralai/Ministral-8B-Instruct-2410\", device_map = \"auto\", torch_dtype = torch.bfloat16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "ad7349d5-e70d-4684-a85b-9bd937161805",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepare the model and tokenizer \n",
    "tokenizer.pad_token = tokenizer.eos_token # set padding token to EOS token\n",
    "model.config.poad_token_id = tokenizer.pad_token_id # set the padding token for model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "aea53718-1062-41c7-87a9-d96ac3fc13e3",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "`pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way.\n",
      "Passed an already-initialized model through `pretrained`, assuming single-process call to evaluate() or custom distributed integration\n",
      "100%|██████████| 30/30 [00:00<00:00, 597.42it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 624.61it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 629.97it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 628.70it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 632.95it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 625.95it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.59it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.62it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 632.55it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 638.64it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 618.78it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 633.69it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 622.05it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 627.75it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 446.39it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 610.02it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 617.17it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 622.85it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 612.45it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 612.01it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 621.72it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 621.97it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 624.63it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 626.03it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 636.17it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 626.13it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 626.59it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 640.30it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 637.37it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.40it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 629.62it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 632.29it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 452.92it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 622.28it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.87it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 624.62it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 631.57it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.24it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 637.52it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.20it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 640.64it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 629.27it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 628.75it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 619.60it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 638.59it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 629.08it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 331.37it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 287.76it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 427.76it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 634.93it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 621.34it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 626.57it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 627.44it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 619.38it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 621.84it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 629.56it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 623.88it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 71.09it/s]\n",
      "Running loglikelihood requests: 100%|██████████| 6840/6840 [01:30<00:00, 75.91it/s]\n",
      "Running generate_until requests: 100%|██████████| 30/30 [02:34<00:00,  5.15s/it]\n",
      "fatal: not a git repository (or any parent up to mount point /sfs/gpfs)\n",
      "Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\n"
     ]
    }
   ],
   "source": [
    "\n",
    "results2 = lm_eval.simple_evaluate(\n",
    "    model = 'hf',\n",
    "    model_args = {\"pretrained\": model, \"dtype\": \"bfloat16\", \"toeknzier\": tokenizer},\n",
    "    tasks = ['gsm8k_cot',  'mmlu'],\n",
    "    task_manager = task_manager,\n",
    "    log_samples = True, \n",
    "    batch_size = \"1\", \n",
    "    limit = 30, \n",
    "    random_seed = 42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "dd0bd94d-5195-4203-a868-558ea77dfb32",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'gsm8k_cot': {'alias': 'gsm8k_cot',\n",
       "  'exact_match,strict-match': np.float64(0.6666666666666666),\n",
       "  'exact_match_stderr,strict-match': 0.08753762190648169,\n",
       "  'exact_match,flexible-extract': np.float64(0.7),\n",
       "  'exact_match_stderr,flexible-extract': 0.0850962943396763},\n",
       " 'mmlu': {'acc,none': 0.6450292397660818,\n",
       "  'acc_stderr,none': np.float64(0.011026946921383438),\n",
       "  'alias': 'mmlu'},\n",
       " 'mmlu_humanities': {'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': np.float64(0.022655549762135505),\n",
       "  'alias': ' - humanities'},\n",
       " 'mmlu_formal_logic': {'alias': '  - formal_logic',\n",
       "  'acc,none': 0.5,\n",
       "  'acc_stderr,none': 0.09284766908852593},\n",
       " 'mmlu_high_school_european_history': {'alias': '  - high_school_european_history',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.08948554539839962},\n",
       " 'mmlu_high_school_us_history': {'alias': '  - high_school_us_history',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_high_school_world_history': {'alias': '  - high_school_world_history',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_international_law': {'alias': '  - international_law',\n",
       "  'acc,none': 0.8666666666666667,\n",
       "  'acc_stderr,none': 0.06312427686319994},\n",
       " 'mmlu_jurisprudence': {'alias': '  - jurisprudence',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.07854032324531729},\n",
       " 'mmlu_logical_fallacies': {'alias': '  - logical_fallacies',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_moral_disputes': {'alias': '  - moral_disputes',\n",
       "  'acc,none': 0.6,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_moral_scenarios': {'alias': '  - moral_scenarios',\n",
       "  'acc,none': 0.26666666666666666,\n",
       "  'acc_stderr,none': 0.0821175682735253},\n",
       " 'mmlu_philosophy': {'alias': '  - philosophy',\n",
       "  'acc,none': 0.7,\n",
       "  'acc_stderr,none': 0.08509629433967632},\n",
       " 'mmlu_prehistory': {'alias': '  - prehistory',\n",
       "  'acc,none': 0.5333333333333333,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_professional_law': {'alias': '  - professional_law',\n",
       "  'acc,none': 0.5333333333333333,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_world_religions': {'alias': '  - world_religions',\n",
       "  'acc,none': 0.8666666666666667,\n",
       "  'acc_stderr,none': 0.06312427686319991},\n",
       " 'mmlu_other': {'acc,none': 0.6820512820512821,\n",
       "  'acc_stderr,none': np.float64(0.02296366746299997),\n",
       "  'alias': ' - other'},\n",
       " 'mmlu_business_ethics': {'alias': '  - business_ethics',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_clinical_knowledge': {'alias': '  - clinical_knowledge',\n",
       "  'acc,none': 0.6,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_college_medicine': {'alias': '  - college_medicine',\n",
       "  'acc,none': 0.6,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_global_facts': {'alias': '  - global_facts',\n",
       "  'acc,none': 0.43333333333333335,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_human_aging': {'alias': '  - human_aging',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.08948554539839962},\n",
       " 'mmlu_management': {'alias': '  - management',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.0821175682735253},\n",
       " 'mmlu_marketing': {'alias': '  - marketing',\n",
       "  'acc,none': 0.8666666666666667,\n",
       "  'acc_stderr,none': 0.06312427686319994},\n",
       " 'mmlu_medical_genetics': {'alias': '  - medical_genetics',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.0785403232453173},\n",
       " 'mmlu_miscellaneous': {'alias': '  - miscellaneous',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_nutrition': {'alias': '  - nutrition',\n",
       "  'acc,none': 0.9,\n",
       "  'acc_stderr,none': 0.055708601453115535},\n",
       " 'mmlu_professional_accounting': {'alias': '  - professional_accounting',\n",
       "  'acc,none': 0.5666666666666667,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_professional_medicine': {'alias': '  - professional_medicine',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.08948554539839962},\n",
       " 'mmlu_virology': {'alias': '  - virology',\n",
       "  'acc,none': 0.5333333333333333,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_social_sciences': {'acc,none': 0.7166666666666667,\n",
       "  'acc_stderr,none': np.float64(0.023102765218675773),\n",
       "  'alias': ' - social sciences'},\n",
       " 'mmlu_econometrics': {'alias': '  - econometrics',\n",
       "  'acc,none': 0.43333333333333335,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_high_school_geography': {'alias': '  - high_school_geography',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.07854032324531726},\n",
       " 'mmlu_high_school_government_and_politics': {'alias': '  - high_school_government_and_politics',\n",
       "  'acc,none': 0.8666666666666667,\n",
       "  'acc_stderr,none': 0.06312427686319991},\n",
       " 'mmlu_high_school_macroeconomics': {'alias': '  - high_school_macroeconomics',\n",
       "  'acc,none': 0.5333333333333333,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_high_school_microeconomics': {'alias': '  - high_school_microeconomics',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.08211756827352532},\n",
       " 'mmlu_high_school_psychology': {'alias': '  - high_school_psychology',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.0821175682735253},\n",
       " 'mmlu_human_sexuality': {'alias': '  - human_sexuality',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478331},\n",
       " 'mmlu_professional_psychology': {'alias': '  - professional_psychology',\n",
       "  'acc,none': 0.7,\n",
       "  'acc_stderr,none': 0.08509629433967632},\n",
       " 'mmlu_public_relations': {'alias': '  - public_relations',\n",
       "  'acc,none': 0.6,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_security_studies': {'alias': '  - security_studies',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_sociology': {'alias': '  - sociology',\n",
       "  'acc,none': 0.7,\n",
       "  'acc_stderr,none': 0.0850962943396763},\n",
       " 'mmlu_us_foreign_policy': {'alias': '  - us_foreign_policy',\n",
       "  'acc,none': 0.9,\n",
       "  'acc_stderr,none': 0.055708601453115555},\n",
       " 'mmlu_stem': {'acc,none': 0.5596491228070175,\n",
       "  'acc_stderr,none': np.float64(0.019856630503018412),\n",
       "  'alias': ' - stem'},\n",
       " 'mmlu_abstract_algebra': {'alias': '  - abstract_algebra',\n",
       "  'acc,none': 0.4,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_anatomy': {'alias': '  - anatomy',\n",
       "  'acc,none': 0.5666666666666667,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_astronomy': {'alias': '  - astronomy',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.0785403232453173},\n",
       " 'mmlu_college_biology': {'alias': '  - college_biology',\n",
       "  'acc,none': 0.9,\n",
       "  'acc_stderr,none': 0.055708601453115555},\n",
       " 'mmlu_college_chemistry': {'alias': '  - college_chemistry',\n",
       "  'acc,none': 0.4,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_college_computer_science': {'alias': '  - college_computer_science',\n",
       "  'acc,none': 0.5666666666666667,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_college_mathematics': {'alias': '  - college_mathematics',\n",
       "  'acc,none': 0.4,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_college_physics': {'alias': '  - college_physics',\n",
       "  'acc,none': 0.36666666666666664,\n",
       "  'acc_stderr,none': 0.0894855453983996},\n",
       " 'mmlu_computer_security': {'alias': '  - computer_security',\n",
       "  'acc,none': 0.7,\n",
       "  'acc_stderr,none': 0.0850962943396763},\n",
       " 'mmlu_conceptual_physics': {'alias': '  - conceptual_physics',\n",
       "  'acc,none': 0.5333333333333333,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_electrical_engineering': {'alias': '  - electrical_engineering',\n",
       "  'acc,none': 0.6,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_elementary_mathematics': {'alias': '  - elementary_mathematics',\n",
       "  'acc,none': 0.5,\n",
       "  'acc_stderr,none': 0.09284766908852593},\n",
       " 'mmlu_high_school_biology': {'alias': '  - high_school_biology',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_high_school_chemistry': {'alias': '  - high_school_chemistry',\n",
       "  'acc,none': 0.5666666666666667,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_high_school_computer_science': {'alias': '  - high_school_computer_science',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478328},\n",
       " 'mmlu_high_school_mathematics': {'alias': '  - high_school_mathematics',\n",
       "  'acc,none': 0.3,\n",
       "  'acc_stderr,none': 0.0850962943396763},\n",
       " 'mmlu_high_school_physics': {'alias': '  - high_school_physics',\n",
       "  'acc,none': 0.4,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_high_school_statistics': {'alias': '  - high_school_statistics',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.08948554539839962},\n",
       " 'mmlu_machine_learning': {'alias': '  - machine_learning',\n",
       "  'acc,none': 0.4,\n",
       "  'acc_stderr,none': 0.09097176522946843}}"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results2['results']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "3220c534-873e-485b-9ce7-6069d64c0510",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Q: My short term goal is to save for a $1774 vacation in the next year, my medium term goal is to save for down payment for a new car, around 5227 in the next 2 or 3 years, and my long term goal is to save for a down payment for a house around 151861 in the next ten years, can you help me integrate these goals into my budget as well as where I should store these savings?\n",
      "\n",
      "A: 1. **Budgeting for Savings:**\n",
      "\n",
      "   - **Short Term (Vacation):**\n",
      "     - Allocate a specific amount each month towards your vacation fund. For example, if you save $148 per month, you'll reach your goal in 12 months.\n",
      "     - Consider setting up an automatic transfer from your checking account to your savings account each month.\n",
      "\n",
      "   - **Medium Term (Car Down Payment):**\n",
      "     - Allocate a specific amount each month towards your car down payment. For example, if you save $436 per month, you'll reach your goal in 2 years.\n",
      "     - Consider setting up an automatic transfer from your checking account to your savings account each month.\n",
      "\n",
      "   - **Long Term (House Down Payment):**\n",
      "     - Allocate a specific amount each month towards your house down payment. For example, if you save $1265 per month, you'll reach your goal in 10 years.\n",
      "     - Consider setting up an automatic transfer from your checking account to your savings account each month.\n",
      "\n",
      "2. **Where to Store Your Savings:**\n",
      "\n",
      "   - **Short Term (Vacation):**\n",
      "     - Consider a high-yield savings account or a money market account. These accounts offer easy access to your funds and typically have no or low fees.\n",
      "\n",
      "   - **Medium Term (Car Down Payment):**\n",
      "     - Consider a high-yield savings account or a certificate of deposit (CD). CDs offer a fixed interest rate and can be a good option if you don't need to access your funds for a few years.\n",
      "\n",
      "   - **Long Term (House Down Payment):**\n",
      "     - Consider a high-yield savings account, a CD, or a retirement account like a Roth IRA. If you're eligible, a Roth IRA offers tax-free growth and withdrawals, which can be beneficial for long-term savings.\n",
      "\n",
      "3. **Additional Tips:**\n",
      "\n",
      "   - **Emergency Fund:** Before focusing on your savings goals, ensure you have an emergency fund set aside (ideally 3-6 months' worth of living expenses). This can help protect you from unexpected expenses or job loss.\n",
      "\n",
      "   - **Review and Adjust:** Regularly review your budget and savings progress. Life changes and unexpected expenses can impact your savings goals, so it's important to stay flexible and adjust your plan as needed.\n",
      "\n",
      "   - **Increase Your Income:** Consider ways to increase your income, such as taking on a side job, selling unwanted items, or negotiating a raise. This can help you reach your savings goals faster.\n",
      "\n",
      "   - **Track Your Progress:** Use a budgeting app or spreadsheet to track your savings progress. Seeing your progress can motivate you to stay on track and reach your goals.\n"
     ]
    }
   ],
   "source": [
    "formatted_prompt = f\"Q: {val_goals[0]['question']}\\n\\nA: \"\n",
    "inputs = tokenizer.encode(formatted_prompt, return_tensors = \"pt\").to(model.device)\n",
    "output = model.generate(inputs, max_new_tokens = 800, pad_token_id = tokenizer.pad_token_id, do_sample = False)\n",
    "generated_text = tokenizer.decode(output[0], skip_special_tokens = True)\n",
    "print(generated_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "d9d39064-fe27-40d8-a33a-1193eb145b2a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Q: I  have an income of about 53255 a year and my monthly expenses include 2208 a month in rent and utilities, a 700 car payment, $300 in food, and about 205 a month in other expenses. Using python, can you create for me a budget spreadsheet and export it to excel?\n",
      "\n",
      "A:  To create a budget spreadsheet and export it to Excel using Python, you can use the `pandas` library to handle the data and `openpyxl` to write the data to an Excel file. Below is a step-by-step guide and the corresponding Python code to achieve this:\n",
      "\n",
      "1. **Install the necessary libraries**:\n",
      "   You need to install `pandas` and `openpyxl` if you haven't already. You can install them using pip:\n",
      "   ```sh\n",
      "   pip install pandas openpyxl\n",
      "   ```\n",
      "\n",
      "2. **Create the budget spreadsheet**:\n",
      "   Here's a Python script that creates a budget spreadsheet and exports it to an Excel file:\n",
      "\n",
      "   ```python\n",
      "   import pandas as pd\n",
      "\n",
      "   # Define your income and expenses\n",
      "   income = 53255\n",
      "   monthly_expenses = {\n",
      "       'Rent and Utilities': 2208,\n",
      "       'Car Payment': 700,\n",
      "       'Food': 300,\n",
      "       'Other Expenses': 205\n",
      "   }\n",
      "\n",
      "   # Calculate monthly income\n",
      "   monthly_income = income / 12\n",
      "\n",
      "   # Create a DataFrame for the budget\n",
      "   budget_df = pd.DataFrame({\n",
      "       'Category': ['Income', 'Rent and Utilities', 'Car Payment', 'Food', 'Other Expenses'],\n",
      "       'Amount': [monthly_income, monthly_expenses['Rent and Utilities'], monthly_expenses['Car Payment'], monthly_expenses['Food'], monthly_expenses['Other Expenses']]\n",
      "   })\n",
      "\n",
      "   # Calculate total expenses and remaining income\n",
      "   total_expenses = budget_df[budget_df['Category'] != 'Income']['Amount'].sum()\n",
      "   remaining_income = monthly_income - total_expenses\n",
      "\n",
      "   # Add the remaining income to the DataFrame\n",
      "   budget_df = budget_df.append({'Category': 'Remaining Income', 'Amount': remaining_income}, ignore_index=True)\n",
      "\n",
      "   # Save the DataFrame to an Excel file\n",
      "   budget_df.to_excel('budget_spreadsheet.xlsx', index=False)\n",
      "\n",
      "   print(\"Budget spreadsheet has been created and saved as 'budget_spreadsheet.xlsx'\")\n",
      "   ```\n",
      "\n",
      "3. **Run the script**:\n",
      "   Save the script to a file, for example, `create_budget.py`, and run it using Python:\n",
      "   ```sh\n",
      "   python create_budget.py\n",
      "   ```\n",
      "\n",
      "This script will create a budget spreadsheet with your income and expenses, calculate the remaining income, and save it as `budget_spreadsheet.xlsx` in the same directory where you run the script.\n"
     ]
    }
   ],
   "source": [
    "formatted_prompt = f\"Q: {val_budget[0]['question']}\\n\\nA: \"\n",
    "inputs = tokenizer.encode(formatted_prompt, return_tensors = \"pt\").to(model.device)\n",
    "output = model.generate(inputs, max_new_tokens = 800, pad_token_id = tokenizer.pad_token_id, do_sample = False)\n",
    "generated_text = tokenizer.decode(output[0], skip_special_tokens = True)\n",
    "print(generated_text)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "90674e69-a32d-4e2c-b97c-fbae5f085c37",
   "metadata": {},
   "source": [
    "## Few Shot Prompting for Goals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "8ab6ea2f-769c-4c65-8c29-4e1c8710090b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "del model\n",
    "torch.cuda.empty_cache()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "9a95c43c-7f28-4efa-a9a5-bc405659ccbb",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "os.environ['HF_HOME'] = \"Documents/MSDS/DS5002/trained_lora_model_project/best_model\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "525f072b-05cf-4f2f-8e20-caddc0ee4485",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f3e71633bd6e416392e1cedf4df5fed8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(\"TheFinAI/Fino1-8B\")\n",
    "model = AutoModelForCausalLM.from_pretrained(\"TheFinAI/Fino1-8B\", device_map = \"auto\", torch_dtype = torch.bfloat16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "595d19ee-64a4-4cc4-a541-247c3c0d9c98",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "test_goals = goals[2500:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "788dc05e-8c80-4a28-a9dc-276a4e2d0f1d",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Device set to use cuda:0\n"
     ]
    }
   ],
   "source": [
    "pipe = pipeline(\n",
    "    \"text-generation\", \n",
    "    model=model, \n",
    "    torch_dtype=torch.bfloat16, \n",
    "    device_map=\"auto\", \n",
    "    tokenizer = tokenizer, \n",
    "    max_new_tokens = 750,\n",
    "    do_sample = False,\n",
    "    temperature = 0\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "d5aaac66-2704-4a5b-9370-96cc7be8b9da",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def few_shot_goal(df3,pipe,n = 1,q = 10):\n",
    "    examples = []\n",
    "    for i in range(n):\n",
    "        instruct = df3['instruct'].iloc[i]\n",
    "        examples.append(instruct)\n",
    "    examples.append(df3.iloc[q]['question_1'])\n",
    "    examples = \"\\n\\n\".join(examples)\n",
    "    text = pipe(examples)\n",
    "    print(text[0]['generated_text'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "de1acda8-b16c-4d44-ac92-996a57138282",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/scratch/tar3kh/llm_course_2/lib/python3.11/site-packages/transformers/generation/configuration_utils.py:636: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Q: My short term goal is to save for a $1774 vacation in the next year, my medium term goal is to save for down payment for a new car, around 5227 in the next 2 or 3 years, and my long term goal is to save for a down payment for a house around 151861 in the next ten years, can you help me integrate these goals into my budget as well as where I should store these savings?\n",
      "\n",
      "A: Lets think step by step. 1. Short-Term Goal: $1774 Vacation (1 Year)\n",
      "Timeline: 12 months\n",
      "Monthly Savings Needed: 1774 / 12 = 148.0\n",
      "\n",
      "Best Storage Option: High-yield savings account (HYSA)\n",
      "Easy access\n",
      "Earns some interest\n",
      "Safe from market fluctuations,\n",
      "\n",
      "2. Medium-Term Goal: $5227  Car Down Payment (2–3 Years)\n",
      "Timeline Options:\n",
      "2 years (24 months) → $218.0/month\n",
      "3 years (36 months) → $145.0/month\n",
      "Best Storage Option: HYSA or conservative investment\n",
      "If comfortable with some risk, a mix of HYSA + conservative investments (e.g., CDs, bond ETFs)\n",
      "If risk-averse, keep it in an HYSA,\n",
      "\n",
      "3. Long-Term Goal: $151861   House Down Payment (10 Years)\n",
      "Timeline: 120 months\n",
      "Monthly Savings Needed: 151861 / 120 = 1266.0 \n",
      "\n",
      "Best Storage Option: Investment account\n",
      "Given the long time horizon, investing in a mix of index funds (S&P 500, total stock market) + bonds could provide higher returns.\n",
      "Consider Roth IRA (if eligible) or brokerage account to allow tax-efficient growth.\n",
      "\n",
      "Summary of Total Savings Targets:\n",
      "Total Monthly Savings goal = $1559.0 - $1631.0/month\n",
      "\n",
      "Q: My short term goal is to save for a $2474 vacation in the next year, my medium term goal is to save for down payment for a new car, around 6601 in the next 2 or 3 years, and my long term goal is to save for a down payment for a house around 164733 in the next ten years, can you help me integrate these goals into my budget as well as where I should store these savings?\n",
      "\n",
      "A: Lets think step by step. 1. Short-Term Goal: $2474 Vacation (1 Year)\n",
      "Timeline: 12 months\n",
      "Monthly Savings Needed: 2474 / 12 = 206.0\n",
      "\n",
      "Best Storage Option: High-yield savings account (HYSA)\n",
      "Easy access\n",
      "Earns some interest\n",
      "Safe from market fluctuations,\n",
      "\n",
      "2. Medium-Term Goal: $6601  Car Down Payment (2–3 Years)\n",
      "Timeline Options:\n",
      "2 years (24 months) → $275.0/month\n",
      "3 years (36 months) → $183.0/month\n",
      "Best Storage Option: HYSA or conservative investment\n",
      "If comfortable with some risk, a mix of HYSA + conservative investments (e.g., CDs, bond ETFs)\n",
      "If risk-averse, keep it in an HYSA,\n",
      "\n",
      "3. Long-Term Goal: $164733   House Down Payment (10 Years)\n",
      "Timeline: 120 months\n",
      "Monthly Savings Needed: 164733 / 120 = 1373.0 \n",
      "\n",
      "Best Storage Option: Investment account\n",
      "Given the long time horizon, investing in a mix of index funds (S&P 500, total stock market) + bonds could provide higher returns.\n",
      "Consider Roth IRA (if eligible) or brokerage account to allow tax-efficient growth.\n",
      "\n",
      "Summary of Total Savings Targets:\n",
      "Total Monthly Savings goal = $1762.0 - $1854.0/month\n",
      "\n",
      "Q: My short term goal is to save for a $3357 vacation in the next year, my medium term goal is to save for down payment for a new car, around 6867 in the next 2 or 3 years, and my long term goal is to save for a down payment for a house around 115061 in the next ten years, can you help me integrate these goals into my budget as well as where I should store these savings?\n",
      "\n",
      "A: Lets think step by step. 1. Short-Term Goal: $3357 Vacation (1 Year)\n",
      "Timeline: 12 months\n",
      "Monthly Savings Needed: 3357 / 12 = 280.0\n",
      "\n",
      "Best Storage Option: High-yield savings account (HYSA)\n",
      "Easy access\n",
      "Earns some interest\n",
      "Safe from market fluctuations,\n",
      "\n",
      "2. Medium-Term Goal: $6867  Car Down Payment (2–3 Years)\n",
      "Timeline Options:\n",
      "2 years (24 months) → $286.0/month\n",
      "3 years (36 months) → $191.0/month\n",
      "Best Storage Option: HYSA or conservative investment\n",
      "If comfortable with some risk, a mix of HYSA + conservative investments (e.g., CDs, bond ETFs)\n",
      "If risk-averse, keep it in an HYSA,\n",
      "\n",
      "3. Long-Term Goal: $115061   House Down Payment (10 Years)\n",
      "Timeline: 120 months\n",
      "Monthly Savings Needed: 115061 / 120 = 959.0 \n",
      "\n",
      "Best Storage Option: Investment account\n",
      "Given the long time horizon, investing in a mix of index funds (S&P 500, total stock market) + bonds could provide higher returns.\n",
      "Consider Roth IRA (if eligible) or brokerage account to allow tax-efficient growth.\n",
      "\n",
      "Summary of Total Savings Targets:\n",
      "Total Monthly Savings goal = $1429.0 - $1525.0/month\n",
      "\n",
      "Q: My short term goal is to save for a $1843 vacation in the next year, my medium term goal is to save for down payment for a new car, around 7441 in the next 2 or 3 years, and my long term goal is to save for a down payment for a house around 187903 in the next ten years, can you help me integrate these goals into my budget as well as where I should store these savings?\n",
      "\n",
      "A: Lets think step by step. 1. Short-Term Goal: $1843 Vacation (1 Year)\n",
      "Timeline: 12 months\n",
      "Monthly Savings Needed: 1843 / 12 = 153.0\n",
      "\n",
      "Best Storage Option: High-yield savings account (HYSA)\n",
      "Easy access\n",
      "Earns some interest\n",
      "Safe from market fluctuations,\n",
      "\n",
      "2. Medium-Term Goal: $7441  Car Down Payment (2–3 Years)\n",
      "Timeline Options:\n",
      "2 years (24 months) → $310.0/month\n",
      "3 years (36 months) → $206.0/month\n",
      "Best Storage Option: HYSA or conservative investment\n",
      "If comfortable with some risk, a mix of HYSA + conservative investments (e.g., CDs, bond ETFs)\n",
      "If risk-averse, keep it in an HYSA,\n",
      "\n",
      "3. Long-Term Goal: $187903   House Down Payment (10 Years)\n",
      "Timeline: 120 months\n",
      "Monthly Savings Needed: 187903 / 120 = 1567.0 \n",
      "\n",
      "Best Storage Option: Investment account\n",
      "Given the long time horizon, investing in a mix of index funds (S&P 500, total stock market) + bonds could provide higher returns.\n",
      "Consider Roth IRA (if eligible) or brokerage account to allow tax-efficient growth.\n",
      "\n",
      "Summary of Total Savings Targets:\n",
      "Total Monthly Savings goal = $2030.0 - $2120.0/month\n",
      "\n",
      "## Thinking\n",
      "\n",
      "Alright, let's figure out how to save for these goals. First, I need to break down each goal into smaller, manageable chunks. For the vacation, I want to save $1843 in a year. So, I'll divide that by 12 months, which gives me $153.0 per month. Easy enough.\n",
      "\n",
      "Next up is the car down payment. I'm aiming for $7441 over 2 to 3 years. If I go with the 2-year timeline, that's $310.0 per month. If I stretch it to 3 years, it's $206.0 per month. I'll stick with the 2-year plan for now.\n",
      "\n",
      "Now, onto the big one: saving for a house down payment. I need $187903 in 10 years. Let me do the math: $187903 divided by 120 months equals $1567.0 per month. That's a bit more substantial, but doable.\n",
      "\n",
      "So, what's the total monthly savings I need to aim for? Let's add them up: $153.0 for the vacation, $310.0 for the car, and $1567.0 for the house. That gives me a total of $2030.0 per month. \n",
      "\n",
      "I should probably double-check that I've got everything right. The vacation savings are $153.0, car is $310.0, and house is $1567.0. Yep, adding those up confirms the total is $2030.0 per month.\n",
      "\n",
      "Now, where should I store these savings? For the short-term goal, like the vacation, a high-yield savings account (HYSA) is perfect. It's easily accessible, earns some interest, and keeps my money safe from market fluctuations.\n",
      "\n",
      "For the medium-term goal, the car down payment, I can also use a HYSA or consider a mix of HYSA and conservative investments if I'm comfortable with a bit of risk. This will help grow my savings over the 2-year period.\n",
      "\n",
      "For the long-term goal, the house down payment, I'll need to invest in a mix of index funds and bonds. This will allow me to grow my savings over the 10-year period, given the long time horizon.\n",
      "\n",
      "In conclusion, I've got a clear plan: save\n"
     ]
    }
   ],
   "source": [
    "few_shot_goal(test_goals,pipe,n = 3,q=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "7687bd76-8ec6-4069-bf14-233be6efff27",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "`pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way.\n",
      "Passed an already-initialized model through `pretrained`, assuming single-process call to evaluate() or custom distributed integration\n",
      "100%|██████████| 30/30 [00:00<00:00, 622.54it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 620.90it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 624.98it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 636.38it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 636.78it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 620.74it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.07it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 630.23it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 407.78it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 624.93it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 641.87it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 630.35it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 620.48it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 636.17it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 633.90it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.94it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 458.23it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 617.72it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 640.94it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 628.91it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 636.75it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 634.69it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 643.04it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 637.35it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 641.38it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 631.82it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 645.73it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 636.58it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 638.31it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 638.65it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.82it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 644.89it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 629.31it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 639.52it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 450.13it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 634.88it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 636.95it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 633.42it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 642.98it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 644.13it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 646.38it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 643.25it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 644.68it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 642.12it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 650.10it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 641.65it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 644.04it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 620.65it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 633.96it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 630.25it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 626.48it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 635.92it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 580.31it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 614.04it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 614.74it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 615.03it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 468.15it/s]\n",
      "100%|██████████| 30/30 [00:00<00:00, 56.67it/s]\n",
      "Running loglikelihood requests: 100%|██████████| 6840/6840 [01:21<00:00, 83.78it/s]\n",
      "Running generate_until requests:   0%|          | 0/30 [00:00<?, ?it/s]/scratch/tar3kh/llm_course_2/lib/python3.11/site-packages/transformers/generation/configuration_utils.py:631: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
      "  warnings.warn(\n",
      "Running generate_until requests: 100%|██████████| 30/30 [03:09<00:00,  6.32s/it]\n",
      "fatal: not a git repository (or any parent up to mount point /sfs/gpfs)\n",
      "Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\n"
     ]
    }
   ],
   "source": [
    "results3 = lm_eval.simple_evaluate(\n",
    "    model = 'hf',\n",
    "    model_args = {\"pretrained\": model, \"dtype\": \"bfloat16\", \"toeknzier\": tokenizer},\n",
    "    tasks = ['gsm8k_cot',  'mmlu'],\n",
    "    task_manager = task_manager,\n",
    "    log_samples = True, \n",
    "    batch_size = \"1\", \n",
    "    limit = 30, \n",
    "    random_seed = 42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "e5fa13b0-e3b5-4ef2-8e8f-6e68d9121116",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'gsm8k_cot': {'alias': 'gsm8k_cot',\n",
       "  'exact_match,strict-match': np.float64(0.6333333333333333),\n",
       "  'exact_match_stderr,strict-match': 0.0894855453983996,\n",
       "  'exact_match,flexible-extract': np.float64(0.6333333333333333),\n",
       "  'exact_match_stderr,flexible-extract': 0.0894855453983996},\n",
       " 'mmlu': {'acc,none': 0.6684210526315789,\n",
       "  'acc_stderr,none': np.float64(0.010724424663842536),\n",
       "  'alias': 'mmlu'},\n",
       " 'mmlu_humanities': {'acc,none': 0.7076923076923077,\n",
       "  'acc_stderr,none': np.float64(0.02268555050327971),\n",
       "  'alias': ' - humanities'},\n",
       " 'mmlu_formal_logic': {'alias': '  - formal_logic',\n",
       "  'acc,none': 0.5333333333333333,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_high_school_european_history': {'alias': '  - high_school_european_history',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.08948554539839962},\n",
       " 'mmlu_high_school_us_history': {'alias': '  - high_school_us_history',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.08211756827352532},\n",
       " 'mmlu_high_school_world_history': {'alias': '  - high_school_world_history',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_international_law': {'alias': '  - international_law',\n",
       "  'acc,none': 0.9,\n",
       "  'acc_stderr,none': 0.055708601453115555},\n",
       " 'mmlu_jurisprudence': {'alias': '  - jurisprudence',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.08211756827352532},\n",
       " 'mmlu_logical_fallacies': {'alias': '  - logical_fallacies',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478331},\n",
       " 'mmlu_moral_disputes': {'alias': '  - moral_disputes',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.0894855453983996},\n",
       " 'mmlu_moral_scenarios': {'alias': '  - moral_scenarios',\n",
       "  'acc,none': 0.5,\n",
       "  'acc_stderr,none': 0.09284766908852593},\n",
       " 'mmlu_philosophy': {'alias': '  - philosophy',\n",
       "  'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': 0.0875376219064817},\n",
       " 'mmlu_prehistory': {'alias': '  - prehistory',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.0821175682735253},\n",
       " 'mmlu_professional_law': {'alias': '  - professional_law',\n",
       "  'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': 0.08753762190648169},\n",
       " 'mmlu_world_religions': {'alias': '  - world_religions',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478328},\n",
       " 'mmlu_other': {'acc,none': 0.7128205128205128,\n",
       "  'acc_stderr,none': np.float64(0.021964544728876025),\n",
       "  'alias': ' - other'},\n",
       " 'mmlu_business_ethics': {'alias': '  - business_ethics',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_clinical_knowledge': {'alias': '  - clinical_knowledge',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.08211756827352529},\n",
       " 'mmlu_college_medicine': {'alias': '  - college_medicine',\n",
       "  'acc,none': 0.7333333333333333,\n",
       "  'acc_stderr,none': 0.08211756827352529},\n",
       " 'mmlu_global_facts': {'alias': '  - global_facts',\n",
       "  'acc,none': 0.4,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_human_aging': {'alias': '  - human_aging',\n",
       "  'acc,none': 0.5333333333333333,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_management': {'alias': '  - management',\n",
       "  'acc,none': 0.8666666666666667,\n",
       "  'acc_stderr,none': 0.06312427686319994},\n",
       " 'mmlu_marketing': {'alias': '  - marketing',\n",
       "  'acc,none': 0.8666666666666667,\n",
       "  'acc_stderr,none': 0.06312427686319991},\n",
       " 'mmlu_medical_genetics': {'alias': '  - medical_genetics',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.0785403232453173},\n",
       " 'mmlu_miscellaneous': {'alias': '  - miscellaneous',\n",
       "  'acc,none': 0.8666666666666667,\n",
       "  'acc_stderr,none': 0.06312427686319994},\n",
       " 'mmlu_nutrition': {'alias': '  - nutrition',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.07854032324531726},\n",
       " 'mmlu_professional_accounting': {'alias': '  - professional_accounting',\n",
       "  'acc,none': 0.4666666666666667,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_professional_medicine': {'alias': '  - professional_medicine',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478331},\n",
       " 'mmlu_virology': {'alias': '  - virology',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.0894855453983996},\n",
       " 'mmlu_social_sciences': {'acc,none': 0.7583333333333333,\n",
       "  'acc_stderr,none': np.float64(0.021975401318080102),\n",
       "  'alias': ' - social sciences'},\n",
       " 'mmlu_econometrics': {'alias': '  - econometrics',\n",
       "  'acc,none': 0.4666666666666667,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_high_school_geography': {'alias': '  - high_school_geography',\n",
       "  'acc,none': 0.8666666666666667,\n",
       "  'acc_stderr,none': 0.06312427686319994},\n",
       " 'mmlu_high_school_government_and_politics': {'alias': '  - high_school_government_and_politics',\n",
       "  'acc,none': 0.9,\n",
       "  'acc_stderr,none': 0.05570860145311553},\n",
       " 'mmlu_high_school_macroeconomics': {'alias': '  - high_school_macroeconomics',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.0894855453983996},\n",
       " 'mmlu_high_school_microeconomics': {'alias': '  - high_school_microeconomics',\n",
       "  'acc,none': 0.7,\n",
       "  'acc_stderr,none': 0.0850962943396763},\n",
       " 'mmlu_high_school_psychology': {'alias': '  - high_school_psychology',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478331},\n",
       " 'mmlu_human_sexuality': {'alias': '  - human_sexuality',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_professional_psychology': {'alias': '  - professional_psychology',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.07854032324531729},\n",
       " 'mmlu_public_relations': {'alias': '  - public_relations',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.0894855453983996},\n",
       " 'mmlu_security_studies': {'alias': '  - security_studies',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_sociology': {'alias': '  - sociology',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_us_foreign_policy': {'alias': '  - us_foreign_policy',\n",
       "  'acc,none': 0.9,\n",
       "  'acc_stderr,none': 0.055708601453115555},\n",
       " 'mmlu_stem': {'acc,none': 0.5543859649122806,\n",
       "  'acc_stderr,none': np.float64(0.01938330262875528),\n",
       "  'alias': ' - stem'},\n",
       " 'mmlu_abstract_algebra': {'alias': '  - abstract_algebra',\n",
       "  'acc,none': 0.4,\n",
       "  'acc_stderr,none': 0.09097176522946843},\n",
       " 'mmlu_anatomy': {'alias': '  - anatomy',\n",
       "  'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': 0.0875376219064817},\n",
       " 'mmlu_astronomy': {'alias': '  - astronomy',\n",
       "  'acc,none': 0.7666666666666667,\n",
       "  'acc_stderr,none': 0.0785403232453173},\n",
       " 'mmlu_college_biology': {'alias': '  - college_biology',\n",
       "  'acc,none': 0.8666666666666667,\n",
       "  'acc_stderr,none': 0.06312427686319992},\n",
       " 'mmlu_college_chemistry': {'alias': '  - college_chemistry',\n",
       "  'acc,none': 0.4666666666666667,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_college_computer_science': {'alias': '  - college_computer_science',\n",
       "  'acc,none': 0.5333333333333333,\n",
       "  'acc_stderr,none': 0.09264111117062017},\n",
       " 'mmlu_college_mathematics': {'alias': '  - college_mathematics',\n",
       "  'acc,none': 0.2,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_college_physics': {'alias': '  - college_physics',\n",
       "  'acc,none': 0.43333333333333335,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_computer_security': {'alias': '  - computer_security',\n",
       "  'acc,none': 0.8,\n",
       "  'acc_stderr,none': 0.07427813527082075},\n",
       " 'mmlu_conceptual_physics': {'alias': '  - conceptual_physics',\n",
       "  'acc,none': 0.6333333333333333,\n",
       "  'acc_stderr,none': 0.08948554539839962},\n",
       " 'mmlu_electrical_engineering': {'alias': '  - electrical_engineering',\n",
       "  'acc,none': 0.5,\n",
       "  'acc_stderr,none': 0.09284766908852593},\n",
       " 'mmlu_elementary_mathematics': {'alias': '  - elementary_mathematics',\n",
       "  'acc,none': 0.36666666666666664,\n",
       "  'acc_stderr,none': 0.08948554539839962},\n",
       " 'mmlu_high_school_biology': {'alias': '  - high_school_biology',\n",
       "  'acc,none': 0.8666666666666667,\n",
       "  'acc_stderr,none': 0.06312427686319992},\n",
       " 'mmlu_high_school_chemistry': {'alias': '  - high_school_chemistry',\n",
       "  'acc,none': 0.6666666666666666,\n",
       "  'acc_stderr,none': 0.08753762190648169},\n",
       " 'mmlu_high_school_computer_science': {'alias': '  - high_school_computer_science',\n",
       "  'acc,none': 0.8333333333333334,\n",
       "  'acc_stderr,none': 0.06920456654478331},\n",
       " 'mmlu_high_school_mathematics': {'alias': '  - high_school_mathematics',\n",
       "  'acc,none': 0.26666666666666666,\n",
       "  'acc_stderr,none': 0.08211756827352527},\n",
       " 'mmlu_high_school_physics': {'alias': '  - high_school_physics',\n",
       "  'acc,none': 0.36666666666666664,\n",
       "  'acc_stderr,none': 0.0894855453983996},\n",
       " 'mmlu_high_school_statistics': {'alias': '  - high_school_statistics',\n",
       "  'acc,none': 0.43333333333333335,\n",
       "  'acc_stderr,none': 0.0920186554465537},\n",
       " 'mmlu_machine_learning': {'alias': '  - machine_learning',\n",
       "  'acc,none': 0.4666666666666667,\n",
       "  'acc_stderr,none': 0.09264111117062017}}"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results3['results']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1345da8f-a8a6-493b-b28b-7021edb6b16b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm_course_2",
   "language": "python",
   "name": "llm_course_2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}