{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e119e71a-f88a-4d5c-90fb-e60b84c4f42c", "metadata": {}, "outputs": [], "source": [ "from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", " AutoTokenizer,\n", ")\n", "from peft import PeftModel, PeftConfig\n", "import torch\n", "import gradio as gr\n", "\n", "d_map = {\"\": torch.cuda.current_device()} if torch.cuda.is_available() else None\n", "local_model_path = \"outputs/checkpoint-100\" # Path to the combined weights" ] }, { "cell_type": "code", "execution_count": 2, "id": "ba591ab9-5029-46e8-b9a9-428de3896e62", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "913fc55a00aa4a2799729c6461060d28", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/571 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4bf4ee1d7d0446d7a9c5775bdd823d3b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors.index.json: 0%| | 0.00/25.1k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4d512679e32f4ca4ae7aad1f821fda4d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading shards: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "665dd2a779b94e6fba7c5fdc6313f504", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00001-of-00002.safetensors: 0%| | 0.00/9.94G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9c1efb636ab24518b58b78bb2fa90cf7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00002-of-00002.safetensors: 0%| | 0.00/4.54G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e502f8a721354757a12a34879ceba413", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "80a0725ee12c4659b5271308553d2b23", "version_major": 2, "version_minor": 0 }, "text/plain": [ "generation_config.json: 0%| | 0.00/116 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3d1e7af016a64550949738a7dfb0daed", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/967 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d85c88ad430748b48026e227e2b58b8d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.model: 0%| | 0.00/493k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "196bda4d38fc4d198a26154b06089754", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/1.80M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "692a1a2768724c40aa90d6cec836d16a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/72.0 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Loading the base Model\n", "config = PeftConfig.from_pretrained(local_model_path)\n", "\n", "model = AutoModelForCausalLM.from_pretrained(\n", " config.base_model_name_or_path, \n", " return_dict=True,\n", " torch_dtype=torch.float16,\n", " device_map=d_map,\n", " trust_remote_code=True\n", ")\n", "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)" ] }, { "cell_type": "code", "execution_count": 4, "id": "c1d36c14-0bfc-4215-8576-bb390a3a6114", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PeftModelForCausalLM(\n", " (base_model): LoraModel(\n", " (model): MistralForCausalLM(\n", " (model): MistralModel(\n", " (embed_tokens): Embedding(32000, 4096)\n", " (layers): ModuleList(\n", " (0-31): 32 x MistralDecoderLayer(\n", " (self_attn): MistralAttention(\n", " (q_proj): lora.Linear(\n", " (base_layer): Linear(in_features=4096, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (k_proj): lora.Linear(\n", " (base_layer): Linear(in_features=4096, out_features=1024, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=1024, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (v_proj): lora.Linear(\n", " (base_layer): Linear(in_features=4096, out_features=1024, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=1024, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (o_proj): lora.Linear(\n", " (base_layer): Linear(in_features=4096, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (rotary_emb): MistralRotaryEmbedding()\n", " )\n", " (mlp): MistralMLP(\n", " (gate_proj): lora.Linear(\n", " (base_layer): Linear(in_features=4096, out_features=14336, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=14336, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (up_proj): lora.Linear(\n", " (base_layer): Linear(in_features=4096, out_features=14336, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=14336, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (down_proj): lora.Linear(\n", " (base_layer): Linear(in_features=14336, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=14336, out_features=8, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=8, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (act_fn): SiLU()\n", " )\n", " (input_layernorm): MistralRMSNorm()\n", " (post_attention_layernorm): MistralRMSNorm()\n", " )\n", " )\n", " (norm): MistralRMSNorm()\n", " )\n", " (lm_head): Linear(in_features=4096, out_features=32000, bias=False)\n", " )\n", " )\n", ")" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# load the base model with the Lora model\n", "mergedModel = PeftModel.from_pretrained(model, local_model_path)\n", "# model = model.merge_and_unload()\n", "mergedModel.eval()" ] }, { "cell_type": "code", "execution_count": 7, "id": "09fa4575-0dec-4e62-a43f-77e57f68c4a9", "metadata": {}, "outputs": [], "source": [ "def inferance(query: str, model, tokenizer, temp = 1.0, limit = 200) -> str:\n", " device = \"cuda:0\"\n", "\n", " prompt_template = \"\"\"\n", " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n", " ### Question:\n", " {query}\n", "\n", " ### Answer:\n", " \"\"\"\n", " prompt = prompt_template.format(query=query)\n", "\n", " encodeds = tokenizer(prompt, return_tensors=\"pt\", add_special_tokens=True)\n", "\n", " model_inputs = encodeds.to(device)\n", "\n", " generated_ids = model.generate(**model_inputs, max_new_tokens=int(limit), temperature=temp, do_sample=True, pad_token_id=tokenizer.eos_token_id)\n", " decoded = tokenizer.batch_decode(generated_ids)\n", " return (decoded[0])" ] }, { "cell_type": "code", "execution_count": 9, "id": "ba47700b-0787-4677-a5a1-c1a1b4063fe2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7864\n", "Running on public URL: https://b9a01680128206a745.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "