{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e119e71a-f88a-4d5c-90fb-e60b84c4f42c", "metadata": {}, "outputs": [], "source": [ "from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", " AutoTokenizer,\n", ")\n", "from peft import PeftModel, PeftConfig\n", "import torch\n", "import gradio as gr\n", "\n", "d_map = {\"\": torch.cuda.current_device()} if torch.cuda.is_available() else None\n", "local_model_path = \"outputs/checkpoint-100\" # Path to the combined weights" ] }, { "cell_type": "code", "execution_count": 2, "id": "ba591ab9-5029-46e8-b9a9-428de3896e62", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "913fc55a00aa4a2799729c6461060d28", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/571 [00:00 str:\n", " device = \"cuda:0\"\n", "\n", " prompt_template = \"\"\"\n", " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n", " ### Question:\n", " {query}\n", "\n", " ### Answer:\n", " \"\"\"\n", " prompt = prompt_template.format(query=query)\n", "\n", " encodeds = tokenizer(prompt, return_tensors=\"pt\", add_special_tokens=True)\n", "\n", " model_inputs = encodeds.to(device)\n", "\n", " generated_ids = model.generate(**model_inputs, max_new_tokens=int(limit), temperature=temp, do_sample=True, pad_token_id=tokenizer.eos_token_id)\n", " decoded = tokenizer.batch_decode(generated_ids)\n", " return (decoded[0])" ] }, { "cell_type": "code", "execution_count": 9, "id": "ba47700b-0787-4677-a5a1-c1a1b4063fe2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7864\n", "Running on public URL: https://b9a01680128206a745.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "\n", "def predict(temp, limit, text):\n", " prompt = text\n", " out = inferance(prompt, mergedModel, tokenizer, temp = 1.0, limit = 200)\n", " return out\n", "\n", "pred = gr.Interface(\n", " predict,\n", " inputs=[\n", " gr.Slider(0.001, 10, value=0.1, label=\"Temperature\"),\n", " gr.Slider(1, 1024, value=128, label=\"Token Limit\"),\n", " gr.Textbox(\n", " label=\"Input\",\n", " lines=1,\n", " value=\"#### Human: What's the capital of Australia?#### Assistant: \",\n", " ),\n", " ],\n", " outputs='text',\n", ")\n", "\n", "pred.launch(share=True)" ] }, { "cell_type": "code", "execution_count": 15, "id": "8e0529f7-5e0d-4822-b826-ca49ca10c172", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Need \u001b[32m'write'\u001b[0m access token to create a Spaces repo.\n", "\n", " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n", " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n", " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n", "\n", " To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n", "Token: \n", "\u001b[31mAborted.\u001b[0m\n" ] } ], "source": [ "!gradio deploy" ] }, { "cell_type": "code", "execution_count": 10, "id": "1eecb055-93d7-485c-bb85-c025c06cd94c", "metadata": {}, "outputs": [], "source": [ "import datasets" ] }, { "cell_type": "code", "execution_count": 11, "id": "9e25a85a-5276-47ff-996d-ddb426eafac4", "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset" ] }, { "cell_type": "code", "execution_count": 12, "id": "b7bece25-22f4-4489-a5e8-10a1c28aeae8", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "441a29ce1fd14a259d8538497f16798a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/709 [00:00" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import gradio as gr\n", "\n", "def predict(temp, limit, text):\n", " prompt = text\n", " out = inferance(prompt, model, tokenizer, temp = 1.0, limit = 200)\n", " return out\n", "\n", "pred = gr.Interface(\n", " predict,\n", " inputs=[\n", " gr.Slider(0.001, 10, value=0.1, label=\"Temperature\"),\n", " gr.Slider(1, 1024, value=128, label=\"Token Limit\"),\n", " gr.Textbox(\n", " label=\"Input\",\n", " lines=1,\n", " value=\"#### Human: What's the capital of Australia?#### Assistant: \",\n", " ),\n", " ],\n", " outputs='text',\n", ")\n", "\n", "pred.launch(share=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "6cac6e2f-d6eb-4311-95da-082bd92145c5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }