{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e119e71a-f88a-4d5c-90fb-e60b84c4f42c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import (\n",
    "    AutoModelForCausalLM,\n",
    "    AutoTokenizer,\n",
    "    AutoTokenizer,\n",
    ")\n",
    "from peft import PeftModel, PeftConfig\n",
    "import torch\n",
    "import gradio as gr\n",
    "\n",
    "d_map = {\"\": torch.cuda.current_device()} if torch.cuda.is_available() else None\n",
    "local_model_path = \"outputs/checkpoint-100\"     # Path to the combined weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ba591ab9-5029-46e8-b9a9-428de3896e62",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "913fc55a00aa4a2799729c6461060d28",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4bf4ee1d7d0446d7a9c5775bdd823d3b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4d512679e32f4ca4ae7aad1f821fda4d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "665dd2a779b94e6fba7c5fdc6313f504",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9c1efb636ab24518b58b78bb2fa90cf7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e502f8a721354757a12a34879ceba413",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "80a0725ee12c4659b5271308553d2b23",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3d1e7af016a64550949738a7dfb0daed",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d85c88ad430748b48026e227e2b58b8d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "196bda4d38fc4d198a26154b06089754",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "692a1a2768724c40aa90d6cec836d16a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Loading the base Model\n",
    "config = PeftConfig.from_pretrained(local_model_path)\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    config.base_model_name_or_path, \n",
    "    return_dict=True,\n",
    "    torch_dtype=torch.float16,\n",
    "    device_map=d_map,\n",
    "    trust_remote_code=True\n",
    ")\n",
    "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c1d36c14-0bfc-4215-8576-bb390a3a6114",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PeftModelForCausalLM(\n",
       "  (base_model): LoraModel(\n",
       "    (model): MistralForCausalLM(\n",
       "      (model): MistralModel(\n",
       "        (embed_tokens): Embedding(32000, 4096)\n",
       "        (layers): ModuleList(\n",
       "          (0-31): 32 x MistralDecoderLayer(\n",
       "            (self_attn): MistralAttention(\n",
       "              (q_proj): lora.Linear(\n",
       "                (base_layer): Linear(in_features=4096, out_features=4096, bias=False)\n",
       "                (lora_dropout): ModuleDict(\n",
       "                  (default): Dropout(p=0.05, inplace=False)\n",
       "                )\n",
       "                (lora_A): ModuleDict(\n",
       "                  (default): Linear(in_features=4096, out_features=8, bias=False)\n",
       "                )\n",
       "                (lora_B): ModuleDict(\n",
       "                  (default): Linear(in_features=8, out_features=4096, bias=False)\n",
       "                )\n",
       "                (lora_embedding_A): ParameterDict()\n",
       "                (lora_embedding_B): ParameterDict()\n",
       "              )\n",
       "              (k_proj): lora.Linear(\n",
       "                (base_layer): Linear(in_features=4096, out_features=1024, bias=False)\n",
       "                (lora_dropout): ModuleDict(\n",
       "                  (default): Dropout(p=0.05, inplace=False)\n",
       "                )\n",
       "                (lora_A): ModuleDict(\n",
       "                  (default): Linear(in_features=4096, out_features=8, bias=False)\n",
       "                )\n",
       "                (lora_B): ModuleDict(\n",
       "                  (default): Linear(in_features=8, out_features=1024, bias=False)\n",
       "                )\n",
       "                (lora_embedding_A): ParameterDict()\n",
       "                (lora_embedding_B): ParameterDict()\n",
       "              )\n",
       "              (v_proj): lora.Linear(\n",
       "                (base_layer): Linear(in_features=4096, out_features=1024, bias=False)\n",
       "                (lora_dropout): ModuleDict(\n",
       "                  (default): Dropout(p=0.05, inplace=False)\n",
       "                )\n",
       "                (lora_A): ModuleDict(\n",
       "                  (default): Linear(in_features=4096, out_features=8, bias=False)\n",
       "                )\n",
       "                (lora_B): ModuleDict(\n",
       "                  (default): Linear(in_features=8, out_features=1024, bias=False)\n",
       "                )\n",
       "                (lora_embedding_A): ParameterDict()\n",
       "                (lora_embedding_B): ParameterDict()\n",
       "              )\n",
       "              (o_proj): lora.Linear(\n",
       "                (base_layer): Linear(in_features=4096, out_features=4096, bias=False)\n",
       "                (lora_dropout): ModuleDict(\n",
       "                  (default): Dropout(p=0.05, inplace=False)\n",
       "                )\n",
       "                (lora_A): ModuleDict(\n",
       "                  (default): Linear(in_features=4096, out_features=8, bias=False)\n",
       "                )\n",
       "                (lora_B): ModuleDict(\n",
       "                  (default): Linear(in_features=8, out_features=4096, bias=False)\n",
       "                )\n",
       "                (lora_embedding_A): ParameterDict()\n",
       "                (lora_embedding_B): ParameterDict()\n",
       "              )\n",
       "              (rotary_emb): MistralRotaryEmbedding()\n",
       "            )\n",
       "            (mlp): MistralMLP(\n",
       "              (gate_proj): lora.Linear(\n",
       "                (base_layer): Linear(in_features=4096, out_features=14336, bias=False)\n",
       "                (lora_dropout): ModuleDict(\n",
       "                  (default): Dropout(p=0.05, inplace=False)\n",
       "                )\n",
       "                (lora_A): ModuleDict(\n",
       "                  (default): Linear(in_features=4096, out_features=8, bias=False)\n",
       "                )\n",
       "                (lora_B): ModuleDict(\n",
       "                  (default): Linear(in_features=8, out_features=14336, bias=False)\n",
       "                )\n",
       "                (lora_embedding_A): ParameterDict()\n",
       "                (lora_embedding_B): ParameterDict()\n",
       "              )\n",
       "              (up_proj): lora.Linear(\n",
       "                (base_layer): Linear(in_features=4096, out_features=14336, bias=False)\n",
       "                (lora_dropout): ModuleDict(\n",
       "                  (default): Dropout(p=0.05, inplace=False)\n",
       "                )\n",
       "                (lora_A): ModuleDict(\n",
       "                  (default): Linear(in_features=4096, out_features=8, bias=False)\n",
       "                )\n",
       "                (lora_B): ModuleDict(\n",
       "                  (default): Linear(in_features=8, out_features=14336, bias=False)\n",
       "                )\n",
       "                (lora_embedding_A): ParameterDict()\n",
       "                (lora_embedding_B): ParameterDict()\n",
       "              )\n",
       "              (down_proj): lora.Linear(\n",
       "                (base_layer): Linear(in_features=14336, out_features=4096, bias=False)\n",
       "                (lora_dropout): ModuleDict(\n",
       "                  (default): Dropout(p=0.05, inplace=False)\n",
       "                )\n",
       "                (lora_A): ModuleDict(\n",
       "                  (default): Linear(in_features=14336, out_features=8, bias=False)\n",
       "                )\n",
       "                (lora_B): ModuleDict(\n",
       "                  (default): Linear(in_features=8, out_features=4096, bias=False)\n",
       "                )\n",
       "                (lora_embedding_A): ParameterDict()\n",
       "                (lora_embedding_B): ParameterDict()\n",
       "              )\n",
       "              (act_fn): SiLU()\n",
       "            )\n",
       "            (input_layernorm): MistralRMSNorm()\n",
       "            (post_attention_layernorm): MistralRMSNorm()\n",
       "          )\n",
       "        )\n",
       "        (norm): MistralRMSNorm()\n",
       "      )\n",
       "      (lm_head): Linear(in_features=4096, out_features=32000, bias=False)\n",
       "    )\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# load the base model with the Lora model\n",
    "mergedModel = PeftModel.from_pretrained(model, local_model_path)\n",
    "# model = model.merge_and_unload()\n",
    "mergedModel.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "09fa4575-0dec-4e62-a43f-77e57f68c4a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "def inferance(query: str, model, tokenizer, temp = 1.0, limit = 200) -> str:\n",
    "  device = \"cuda:0\"\n",
    "\n",
    "  prompt_template = \"\"\"\n",
    "  Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
    "  ### Question:\n",
    "  {query}\n",
    "\n",
    "  ### Answer:\n",
    "  \"\"\"\n",
    "  prompt = prompt_template.format(query=query)\n",
    "\n",
    "  encodeds = tokenizer(prompt, return_tensors=\"pt\", add_special_tokens=True)\n",
    "\n",
    "  model_inputs = encodeds.to(device)\n",
    "\n",
    "  generated_ids = model.generate(**model_inputs, max_new_tokens=int(limit), temperature=temp, do_sample=True, pad_token_id=tokenizer.eos_token_id)\n",
    "  decoded = tokenizer.batch_decode(generated_ids)\n",
    "  return (decoded[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ba47700b-0787-4677-a5a1-c1a1b4063fe2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7864\n",
      "Running on public URL: https://b9a01680128206a745.gradio.live\n",
      "\n",
      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"https://b9a01680128206a745.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "\n",
    "def predict(temp, limit, text):\n",
    "    prompt = text\n",
    "    out = inferance(prompt, mergedModel, tokenizer, temp = 1.0, limit = 200)\n",
    "    return out\n",
    "\n",
    "pred = gr.Interface(\n",
    "    predict,\n",
    "    inputs=[\n",
    "        gr.Slider(0.001, 10, value=0.1, label=\"Temperature\"),\n",
    "        gr.Slider(1, 1024, value=128, label=\"Token Limit\"),\n",
    "        gr.Textbox(\n",
    "            label=\"Input\",\n",
    "            lines=1,\n",
    "            value=\"#### Human: What's the capital of Australia?#### Assistant: \",\n",
    "        ),\n",
    "    ],\n",
    "    outputs='text',\n",
    ")\n",
    "\n",
    "pred.launch(share=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "8e0529f7-5e0d-4822-b826-ca49ca10c172",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Need \u001b[32m'write'\u001b[0m access token to create a Spaces repo.\n",
      "\n",
      "    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|\n",
      "    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|\n",
      "    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|\n",
      "    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|\n",
      "    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|\n",
      "\n",
      "    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
      "Token: \n",
      "\u001b[31mAborted.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!gradio deploy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1eecb055-93d7-485c-bb85-c025c06cd94c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9e25a85a-5276-47ff-996d-ddb426eafac4",
   "metadata": {},
   "outputs": [],
   "source": [
    "from datasets import load_dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "b7bece25-22f4-4489-a5e8-10a1c28aeae8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "441a29ce1fd14a259d8538497f16798a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading readme:   0%|          | 0.00/709 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "57c38be620174f12b1c13f1794c29b83",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/42.9M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d955c0a3620242978f507e27ddebf41b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split: 0 examples [00:00, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "data = load_dataset(\"gbharti/finance-alpaca\", split='train')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "7a2b2ac8-f6ce-40ff-b785-d79e8e673aa6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'output': \"This is definitely a scam. I had a friend sign up for a very similar offer and what they did was send a fake check and then asked to transfer the same amount to them. So now you just send them a couple grand and you're holding a fake check.\",\n",
       " 'text': '',\n",
       " 'instruction': 'What risks are there acting as a broker between PayPal and electronic bank transfers?',\n",
       " 'input': ''}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[1021]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c20eb320-5c64-4118-82a9-0cd65319bd57",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7863\n",
      "Running on public URL: https://f9ab2457c317a5c384.gradio.live\n",
      "\n",
      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"https://f9ab2457c317a5c384.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import gradio as gr\n",
    "\n",
    "def predict(temp, limit, text):\n",
    "    prompt = text\n",
    "    out = inferance(prompt, model, tokenizer, temp = 1.0, limit = 200)\n",
    "    return out\n",
    "\n",
    "pred = gr.Interface(\n",
    "    predict,\n",
    "    inputs=[\n",
    "        gr.Slider(0.001, 10, value=0.1, label=\"Temperature\"),\n",
    "        gr.Slider(1, 1024, value=128, label=\"Token Limit\"),\n",
    "        gr.Textbox(\n",
    "            label=\"Input\",\n",
    "            lines=1,\n",
    "            value=\"#### Human: What's the capital of Australia?#### Assistant: \",\n",
    "        ),\n",
    "    ],\n",
    "    outputs='text',\n",
    ")\n",
    "\n",
    "pred.launch(share=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6cac6e2f-d6eb-4311-95da-082bd92145c5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}