File size: 14,841 Bytes
174101d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "849d6959-9a62-48a8-99ef-546326471ded",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['HF_HOME'] = \"/scratch/tar3kh/models/cache\"\n",
    "import torch \n",
    "from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline\n",
    "from datasets import load_dataset #datasets is huggingface's dataset package\n",
    "from peft import get_peft_model, LoraConfig, TaskType\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import PIL\n",
    "\n",
    "import lm_eval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2e1b8342-5173-4769-befe-f7b223b55bdb",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4bbdb7ede23f4270936e8dd44eddaf80",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a243af727b72473f92b73655c02e99b2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2a5cff447f3d432e8c2cd09e5f300eb1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8152bab4fba2498ab1c329c218bc5a69",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "36ca4d5a7a934c06b0633212e77f0549",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "88435538f5cb43e992ac4ed4735b1fb0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c03c2b9db40b4e84b2fa1a5e34b29fca",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0facfe4509ad491a8f3faf5630924ab1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(\"TheFinAI/Fino1-8B\")\n",
    "model = AutoModelForCausalLM.from_pretrained(\"ThinkTim21/FinPlan-1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "6c71bde4-febf-420d-af03-fc66884fca74",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Prepare the model and tokenizer \n",
    "tokenizer.pad_token = tokenizer.eos_token # set padding token to EOS token\n",
    "model.config.poad_token_id = tokenizer.pad_token_id # set the padding token for model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "58d314e6-5d05-40f4-abbd-33745b98a1b7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "budget = pd.read_csv(\"budget_dataset.csv\")\n",
    "goals = pd.read_csv(\"goals_dataset.csv\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a2639954-4a6a-421c-8353-c12a1825833e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "budget['instruct_lora'] = budget.apply(\n",
    "    lambda row: f\"Q: {row['question']}\\n\\nA: \",\n",
    "    axis=1\n",
    ")\n",
    "\n",
    "goals['instruct_lora'] = goals.apply(\n",
    "    lambda row: f\"Q: {row['question']}\\n\\nA: \",\n",
    "    axis=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "381c73e0-65ac-457e-9af9-8b6be214284f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "48c03562b88f4f8c833c4c16c2b62725",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/2500 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "384114c4a98e4c9cb4bdaaefeb07bedf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/500 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from datasets import load_dataset, Dataset #datasets is huggingface's dataset package\n",
    "budget = budget.sample(frac = 1, random_state = 42) # randomly shuffle DF\n",
    "train_budget = budget[:2500]\n",
    "val_budget = budget[2500:]\n",
    "train_budget = Dataset.from_pandas(train_budget)\n",
    "val_budget = Dataset.from_pandas(val_budget)\n",
    "train_budget = train_budget.map(lambda samples: tokenizer(samples['instruct']), batched = True)\n",
    "val_budget = val_budget.map(lambda samples: tokenizer(samples['instruct']), batched = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "383bb5d8-122a-41f3-9955-25357326c6d8",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9182c49cc3b941c9814b050ff0bec026",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/2500 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bd3814a7314842e6bac6f6fc201bc36a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/500 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "goals = goals.sample(frac = 1, random_state = 42) # randomly shuffle DF\n",
    "train_goals = goals[:2500]\n",
    "val_goals = goals[2500:]\n",
    "train_goals = Dataset.from_pandas(train_goals)\n",
    "val_goals = Dataset.from_pandas(val_goals)\n",
    "train_goals = train_goals.map(lambda samples: tokenizer(samples['instruct']), batched = True)\n",
    "val_goals = val_goals.map(lambda samples: tokenizer(samples['instruct']), batched = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "4ae2b8aa-55bb-4e9c-b93b-c7040c7d3ed3",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/scratch/tar3kh/llm_course_2/lib/python3.11/site-packages/transformers/generation/configuration_utils.py:631: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
      "  warnings.warn(\n",
      "/scratch/tar3kh/llm_course_2/lib/python3.11/site-packages/transformers/generation/configuration_utils.py:636: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Q: My short term goal is to save for a $1774 vacation in the next year, my medium term goal is to save for down payment for a new car, around 5227 in the next 2 or 3 years, and my long term goal is to save for a down payment for a house around 151861 in the next ten years, can you help me integrate these goals into my budget as well as where I should store these savings?\n",
      "\n",
      "A:  Thinking\n",
      "\n",
      "Alright, let's figure out how to make these savings goals work with your budget. First, I want to make sure I understand what you're aiming for. You've got three goals: saving for a vacation, a new car, and eventually a house. Let's break each down.\n",
      "\n",
      "For the vacation, you're looking to save $1774 in the next year. That's a pretty manageable goal, especially if you break it down into monthly chunks. Let's see, $1774 divided by 12 months gives you about $147.83 per month. Not too bad, right?\n",
      "\n",
      "Next up is saving for a new car. You're aiming for $5227 in 2-3 years. That's a bit longer, so let's spread it out. If you divide $5227 by 24 months, you get about $218.08 per month. That's a bit more than the vacation savings, but still doable.\n",
      "\n",
      "Now, onto the big one: saving for a house down payment. You're looking at $151,861 in ten years. That's a long-term goal, so let's think about it in terms of monthly contributions. If you divide $151,861 by 120 months, you get about $1265.09 per month. That's a significant amount, but it's spread out over ten years, so it's manageable.\n",
      "\n",
      "Okay, now let's think about where to store these savings. For the vacation fund, you could just keep it in a separate savings account at your bank. It's easy to access and won't earn much interest, but it's perfect for short-term goals.\n",
      "\n",
      "For the car fund, you might consider a high-yield savings account. It'll earn a bit more interest than a regular savings account, and it's still easily accessible.\n",
      "\n",
      "The house fund is a different story. Since it's a long-term goal, you might want to consider a dedicated savings account or even a certificate of deposit (CD) with a longer term. This will help you avoid dipping into the funds for other expenses and earn a bit more interest over time.\n",
      "\n",
      "Let's recap. For the vacation, keep it simple with a regular savings account. For the car, a high-yield savings account should work. And for the house, consider a dedicated savings account or a CD. Now, let's make sure you're on track to meet these goals by regularly reviewing your progress and adjusting as needed.\n",
      "\n",
      "Oh, and don't forget to automate these transfers so you don't miss a payment. Set up monthly transfers from your checking account to each of these savings accounts, and you'll be on your way to achieving these goals in no time!\n",
      "\n",
      "## Final Response\n",
      "\n",
      "To integrate these savings goals into your budget, let's break down each goal into manageable monthly contributions. \n",
      "\n",
      "1. **Vacation Savings**: Save $1774 in the next year. Divide this by 12 months: approximately $147.83 per month.\n",
      "2. **Car Savings**: Save $5227 in 2-3 years. Divide this by 24 months: approximately $218.08 per month.\n",
      "3. **House Savings**: Save $151,861 in ten years. Divide this by 120 months: approximately $1265.09 per month.\n",
      "\n",
      "For storing these savings, consider the following:\n",
      "\n",
      "- **Vacation Fund**: Keep in a separate savings account at your bank for easy access.\n",
      "- **Car Fund**: Use a high-yield savings account to earn a bit more interest.\n",
      "- **House Fund**: Consider a dedicated savings account or a certificate of deposit (CD) with a longer term to avoid dipping into the funds and earn more interest over time.\n",
      "\n",
      "To ensure you're on track, automate monthly transfers from your checking account to each of these savings accounts. Regularly review your progress and adjust as needed to meet these goals. By following this plan, you'll be well on your way to achieving your savings objectives.\n"
     ]
    }
   ],
   "source": [
    "formatted_prompt = f\"Q: {val_goals[0]['question']}\\n\\nA: \"\n",
    "inputs = tokenizer.encode(formatted_prompt, return_tensors = \"pt\").to(model.device)\n",
    "output = model.generate(inputs, max_new_tokens = 800, pad_token_id = tokenizer.pad_token_id, do_sample = False)\n",
    "generated_text = tokenizer.decode(output[0], skip_special_tokens = True)\n",
    "print(generated_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "706fdd9f-ea8d-4ef5-9bd2-a4709dfb75cf",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "formatted_prompt = f\"Q: {val_budget[0]['question']}\\n\\nA: \"\n",
    "inputs = tokenizer.encode(formatted_prompt, return_tensors = \"pt\").to(model.device)\n",
    "output = model.generate(inputs, max_new_tokens = 800, pad_token_id = tokenizer.pad_token_id, do_sample = False)\n",
    "generated_text = tokenizer.decode(output[0], skip_special_tokens = True)\n",
    "print(generated_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9444cd08-5469-432f-adba-5cf95068d5b5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm_course_2",
   "language": "python",
   "name": "llm_course_2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}