Spaces:
Running
Running
Upload lora_finetune.ipynb
Browse files- lora_finetune.ipynb +15 -314
lora_finetune.ipynb
CHANGED
@@ -30,24 +30,13 @@
|
|
30 |
},
|
31 |
{
|
32 |
"cell_type": "code",
|
33 |
-
"execution_count":
|
34 |
"id": "3c1b6b6a-3d7a-4ff3-a7aa-6340fb09fbca",
|
35 |
"metadata": {
|
36 |
"id": "3c1b6b6a-3d7a-4ff3-a7aa-6340fb09fbca",
|
37 |
"outputId": "bc85d52f-7294-4404-a83f-d9a4af717dc8"
|
38 |
},
|
39 |
-
"outputs": [
|
40 |
-
{
|
41 |
-
"name": "stdout",
|
42 |
-
"output_type": "stream",
|
43 |
-
"text": [
|
44 |
-
"Note: you may need to restart the kernel to use updated packages.\n",
|
45 |
-
"Note: you may need to restart the kernel to use updated packages.\n",
|
46 |
-
"Note: you may need to restart the kernel to use updated packages.\n",
|
47 |
-
"Note: you may need to restart the kernel to use updated packages.\n"
|
48 |
-
]
|
49 |
-
}
|
50 |
-
],
|
51 |
"source": [
|
52 |
"# Install dependencies.\n",
|
53 |
"%pip install tiktoken xformers bitsandbytes transformers accelerate wandb dadaptation prodigyopt -q\n",
|
@@ -69,43 +58,10 @@
|
|
69 |
},
|
70 |
{
|
71 |
"cell_type": "code",
|
72 |
-
"execution_count":
|
73 |
"id": "e4838d36",
|
74 |
"metadata": {},
|
75 |
-
"outputs": [
|
76 |
-
{
|
77 |
-
"name": "stdout",
|
78 |
-
"output_type": "stream",
|
79 |
-
"text": [
|
80 |
-
"--2024-11-25 10:59:07-- https://gist.github.com/bar-fingerman-dev/c9860c5ff8b155ae132243d12c3ca416/archive/04617f93ddbea80e2681f1422dcd7cf01335d500.zip\n",
|
81 |
-
"Resolving gist.github.com (gist.github.com)... 140.82.114.3\n",
|
82 |
-
"Connecting to gist.github.com (gist.github.com)|140.82.114.3|:443... connected.\n",
|
83 |
-
"HTTP request sent, awaiting response... 302 Found\n",
|
84 |
-
"Location: https://codeload.github.com/gist/c9860c5ff8b155ae132243d12c3ca416/zip/04617f93ddbea80e2681f1422dcd7cf01335d500 [following]\n",
|
85 |
-
"--2024-11-25 10:59:07-- https://codeload.github.com/gist/c9860c5ff8b155ae132243d12c3ca416/zip/04617f93ddbea80e2681f1422dcd7cf01335d500\n",
|
86 |
-
"Resolving codeload.github.com (codeload.github.com)... 140.82.114.10\n",
|
87 |
-
"Connecting to codeload.github.com (codeload.github.com)|140.82.114.10|:443... connected.\n",
|
88 |
-
"HTTP request sent, awaiting response... 200 OK\n",
|
89 |
-
"Length: 20362 (20K) [application/zip]\n",
|
90 |
-
"Saving to: ‘04617f93ddbea80e2681f1422dcd7cf01335d500.zip’\n",
|
91 |
-
"\n",
|
92 |
-
"04617f93ddbea80e268 100%[===================>] 19.88K --.-KB/s in 0.001s \n",
|
93 |
-
"\n",
|
94 |
-
"2024-11-25 10:59:07 (20.6 MB/s) - ‘04617f93ddbea80e2681f1422dcd7cf01335d500.zip’ saved [20362/20362]\n",
|
95 |
-
"\n"
|
96 |
-
]
|
97 |
-
},
|
98 |
-
{
|
99 |
-
"name": "stdout",
|
100 |
-
"output_type": "stream",
|
101 |
-
"text": [
|
102 |
-
"Archive: 04617f93ddbea80e2681f1422dcd7cf01335d500.zip\n",
|
103 |
-
"04617f93ddbea80e2681f1422dcd7cf01335d500\n",
|
104 |
-
" inflating: ./c9860c5ff8b155ae132243d12c3ca416-04617f93ddbea80e2681f1422dcd7cf01335d500/bria_lora_train.py \n",
|
105 |
-
"rm: refusing to remove '.' or '..' directory: skipping './'\n"
|
106 |
-
]
|
107 |
-
}
|
108 |
-
],
|
109 |
"source": [
|
110 |
"ZIP_NAME=\"04617f93ddbea80e2681f1422dcd7cf01335d500.zip\"\n",
|
111 |
"EXTRACT_DIR=\"./\"\n",
|
@@ -302,7 +258,7 @@
|
|
302 |
},
|
303 |
{
|
304 |
"cell_type": "code",
|
305 |
-
"execution_count":
|
306 |
"id": "c1dd96ce-328a-43e1-a707-64fef13b4512",
|
307 |
"metadata": {
|
308 |
"colab": {
|
@@ -314,33 +270,7 @@
|
|
314 |
"outputId": "ac3ffb05-bb0e-476f-cdd9-54f817d96bb0",
|
315 |
"tags": []
|
316 |
},
|
317 |
-
"outputs": [
|
318 |
-
{
|
319 |
-
"data": {
|
320 |
-
"application/vnd.jupyter.widget-view+json": {
|
321 |
-
"model_id": "a59f3fbf50d640d7b133712a611d29ee",
|
322 |
-
"version_major": 2,
|
323 |
-
"version_minor": 0
|
324 |
-
},
|
325 |
-
"text/plain": [
|
326 |
-
"Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
|
327 |
-
]
|
328 |
-
},
|
329 |
-
"metadata": {},
|
330 |
-
"output_type": "display_data"
|
331 |
-
},
|
332 |
-
{
|
333 |
-
"name": "stderr",
|
334 |
-
"output_type": "stream",
|
335 |
-
"text": [
|
336 |
-
"Some kwargs in processor config are unused and will not have any effect: num_additional_image_tokens. \n",
|
337 |
-
"0it [00:00, ?it/s]Expanding inputs for image tokens in LLaVa should be done in processing. Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. Using processors without these attributes in the config is deprecated and will throw an error in v4.47.\n",
|
338 |
-
"Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n",
|
339 |
-
"10it [00:17, 1.84s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
|
340 |
-
"30it [00:47, 1.59s/it]\n"
|
341 |
-
]
|
342 |
-
}
|
343 |
-
],
|
344 |
"source": [
|
345 |
"from transformers import pipeline, AutoProcessor\n",
|
346 |
"from PIL import Image \n",
|
@@ -419,72 +349,10 @@
|
|
419 |
},
|
420 |
{
|
421 |
"cell_type": "code",
|
422 |
-
"execution_count":
|
423 |
"id": "e9e2f15d",
|
424 |
"metadata": {},
|
425 |
-
"outputs": [
|
426 |
-
{
|
427 |
-
"data": {
|
428 |
-
"application/vnd.jupyter.widget-view+json": {
|
429 |
-
"model_id": "1adf99a71ed04f6aaaa1cd89c4954625",
|
430 |
-
"version_major": 2,
|
431 |
-
"version_minor": 0
|
432 |
-
},
|
433 |
-
"text/plain": [
|
434 |
-
"Saving the dataset (0/1 shards): 0%| | 0/30 [00:00<?, ? examples/s]"
|
435 |
-
]
|
436 |
-
},
|
437 |
-
"metadata": {},
|
438 |
-
"output_type": "display_data"
|
439 |
-
},
|
440 |
-
{
|
441 |
-
"data": {
|
442 |
-
"text/html": [
|
443 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
444 |
-
" <thead>\n",
|
445 |
-
" <tr style=\"text-align: right;\">\n",
|
446 |
-
" <th>Image</th>\n",
|
447 |
-
" <th>text</th>\n",
|
448 |
-
" <th>generated_caption</th>\n",
|
449 |
-
" </tr>\n",
|
450 |
-
" </thead>\n",
|
451 |
-
" <tbody>\n",
|
452 |
-
" <tr>\n",
|
453 |
-
" <td><img src=\"ds_image_0.png\" width=\"100\"></td>\n",
|
454 |
-
" <td>N/A</td>\n",
|
455 |
-
" <td>A br!@ image of A pink and white cartoon character with a long tongue sticking out. in the style of 5fc3b10f</td>\n",
|
456 |
-
" </tr>\n",
|
457 |
-
" <tr>\n",
|
458 |
-
" <td><img src=\"ds_image_1.png\" width=\"100\"></td>\n",
|
459 |
-
" <td>N/A</td>\n",
|
460 |
-
" <td>A br!@ image of A cartoon character with a smiling face and a pirate's hat is shown in the image. The character is surrounded by smoke, giving it a playful and whimsical appearance. in the style of 5fc3b10f</td>\n",
|
461 |
-
" </tr>\n",
|
462 |
-
" <tr>\n",
|
463 |
-
" <td><img src=\"ds_image_2.png\" width=\"100\"></td>\n",
|
464 |
-
" <td>N/A</td>\n",
|
465 |
-
" <td>A br!@ image of A green cartoon fish with a face and a mouth is shown. in the style of 5fc3b10f</td>\n",
|
466 |
-
" </tr>\n",
|
467 |
-
" <tr>\n",
|
468 |
-
" <td><img src=\"ds_image_3.png\" width=\"100\"></td>\n",
|
469 |
-
" <td>N/A</td>\n",
|
470 |
-
" <td>A br!@ image of A cartoon dinosaur with a red eye and a gray body is sitting on the ground. in the style of 5fc3b10f</td>\n",
|
471 |
-
" </tr>\n",
|
472 |
-
" <tr>\n",
|
473 |
-
" <td><img src=\"ds_image_4.png\" width=\"100\"></td>\n",
|
474 |
-
" <td>N/A</td>\n",
|
475 |
-
" <td>A br!@ image of A cartoon character with a large body and a long neck is standing on the ground. The character has a mouth open, and its teeth are visible. The character is wearing a brown and white shirt. in the style of 5fc3b10f</td>\n",
|
476 |
-
" </tr>\n",
|
477 |
-
" </tbody>\n",
|
478 |
-
"</table>"
|
479 |
-
],
|
480 |
-
"text/plain": [
|
481 |
-
"<IPython.core.display.HTML object>"
|
482 |
-
]
|
483 |
-
},
|
484 |
-
"metadata": {},
|
485 |
-
"output_type": "display_data"
|
486 |
-
}
|
487 |
-
],
|
488 |
"source": [
|
489 |
"from datasets import Dataset, Features, Image as HFImage, Value, DatasetDict, Split\n",
|
490 |
"\n",
|
@@ -557,31 +425,13 @@
|
|
557 |
},
|
558 |
{
|
559 |
"cell_type": "code",
|
560 |
-
"execution_count":
|
561 |
"id": "15c833e9-14d6-4e1d-8a79-24e5ef667290",
|
562 |
"metadata": {
|
563 |
"id": "15c833e9-14d6-4e1d-8a79-24e5ef667290",
|
564 |
"outputId": "ad92425f-9539-46f3-ddec-4b4a7e68c42b"
|
565 |
},
|
566 |
-
"outputs": [
|
567 |
-
{
|
568 |
-
"name": "stderr",
|
569 |
-
"output_type": "stream",
|
570 |
-
"text": [
|
571 |
-
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
572 |
-
"To disable this warning, you can either:\n",
|
573 |
-
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
574 |
-
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
575 |
-
]
|
576 |
-
},
|
577 |
-
{
|
578 |
-
"name": "stdout",
|
579 |
-
"output_type": "stream",
|
580 |
-
"text": [
|
581 |
-
"Configuration already exists at /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml, will not override. Run `accelerate config` manually or pass a different `save_location`.\n"
|
582 |
-
]
|
583 |
-
}
|
584 |
-
],
|
585 |
"source": [
|
586 |
"!accelerate config default"
|
587 |
]
|
@@ -612,18 +462,7 @@
|
|
612 |
"outputId": "b9440fd0-b0a1-4ce3-ba0d-9270484f72ba",
|
613 |
"tags": []
|
614 |
},
|
615 |
-
"outputs": [
|
616 |
-
{
|
617 |
-
"name": "stdout",
|
618 |
-
"output_type": "stream",
|
619 |
-
"text": [
|
620 |
-
"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
|
621 |
-
"Token is valid (permission: read).\n",
|
622 |
-
"Your token has been saved to /home/ubuntu/.cache/huggingface/token\n",
|
623 |
-
"Login successful\n"
|
624 |
-
]
|
625 |
-
}
|
626 |
-
],
|
627 |
"source": [
|
628 |
"from huggingface_hub import login\n",
|
629 |
"login(token=\"...\") "
|
@@ -723,101 +562,10 @@
|
|
723 |
},
|
724 |
{
|
725 |
"cell_type": "code",
|
726 |
-
"execution_count":
|
727 |
"id": "1347b60c",
|
728 |
"metadata": {},
|
729 |
-
"outputs": [
|
730 |
-
{
|
731 |
-
"name": "stderr",
|
732 |
-
"output_type": "stream",
|
733 |
-
"text": [
|
734 |
-
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
735 |
-
"To disable this warning, you can either:\n",
|
736 |
-
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
737 |
-
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
738 |
-
]
|
739 |
-
},
|
740 |
-
{
|
741 |
-
"name": "stdout",
|
742 |
-
"output_type": "stream",
|
743 |
-
"text": [
|
744 |
-
"11/25/2024 11:04:22 - INFO - __main__ - Distributed environment: NO\n",
|
745 |
-
"Num processes: 1\n",
|
746 |
-
"Process index: 0\n",
|
747 |
-
"Local process index: 0\n",
|
748 |
-
"Device: cuda\n",
|
749 |
-
"\n",
|
750 |
-
"Mixed precision type: bf16\n",
|
751 |
-
"\n",
|
752 |
-
"You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.\n",
|
753 |
-
"You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.\n",
|
754 |
-
"{'dynamic_thresholding_ratio', 'sample_max_value', 'clip_sample', 'rescale_betas_zero_snr', 'thresholding', 'variance_type', 'clip_sample_range'} was not found in config. Values will be initialized to default values.\n",
|
755 |
-
"{'use_quant_conv', 'latents_mean', 'use_post_quant_conv', 'latents_std', 'mid_block_add_attention', 'shift_factor'} was not found in config. Values will be initialized to default values.\n",
|
756 |
-
"11/25/2024 11:04:48 - INFO - datasets - PyTorch version 2.4.1 available.\n",
|
757 |
-
"11/25/2024 11:04:50 - INFO - __main__ - ***** Running training *****\n",
|
758 |
-
"11/25/2024 11:04:50 - INFO - __main__ - Num examples = 30\n",
|
759 |
-
"11/25/2024 11:04:50 - INFO - __main__ - Num batches each epoch = 30\n",
|
760 |
-
"11/25/2024 11:04:50 - INFO - __main__ - Num Epochs = 125\n",
|
761 |
-
"11/25/2024 11:04:50 - INFO - __main__ - Instantaneous batch size per device = 1\n",
|
762 |
-
"11/25/2024 11:04:50 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 4\n",
|
763 |
-
"11/25/2024 11:04:50 - INFO - __main__ - Gradient Accumulation steps = 4\n",
|
764 |
-
"11/25/2024 11:04:50 - INFO - __main__ - Total optimization steps = 1000\n",
|
765 |
-
"Resuming from checkpoint checkpoint-400\n",
|
766 |
-
"11/25/2024 11:04:50 - INFO - accelerate.accelerator - Loading states from /mnt/re-2024-model-demo-v3/checkpoint-400\n",
|
767 |
-
"11/25/2024 11:04:51 - INFO - accelerate.checkpointing - All model weights loaded successfully\n",
|
768 |
-
"/opt/conda/envs/pytorch/lib/python3.10/site-packages/accelerate/checkpointing.py:228: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
|
769 |
-
" optimizer_state = torch.load(input_optimizer_file, map_location=map_location)\n",
|
770 |
-
"11/25/2024 11:04:52 - INFO - accelerate.checkpointing - All optimizer states loaded successfully\n",
|
771 |
-
"/opt/conda/envs/pytorch/lib/python3.10/site-packages/accelerate/checkpointing.py:236: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
|
772 |
-
" scheduler.load_state_dict(torch.load(input_scheduler_file))\n",
|
773 |
-
"11/25/2024 11:04:52 - INFO - accelerate.checkpointing - All scheduler states loaded successfully\n",
|
774 |
-
"11/25/2024 11:04:52 - INFO - accelerate.checkpointing - All dataloader sampler states loaded successfully\n",
|
775 |
-
"/opt/conda/envs/pytorch/lib/python3.10/site-packages/accelerate/checkpointing.py:265: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
|
776 |
-
" states = torch.load(input_dir.joinpath(f\"{RNG_STATE_NAME}_{process_index}.pkl\"))\n",
|
777 |
-
"11/25/2024 11:04:52 - INFO - accelerate.checkpointing - All random states loaded successfully\n",
|
778 |
-
"11/25/2024 11:04:52 - INFO - accelerate.accelerator - Loading in 0 custom states\n",
|
779 |
-
"Steps: 40%|████████████████▍ | 400/1000 [00:00<?, ?it/s]/opt/conda/envs/pytorch/lib/python3.10/site-packages/torch/utils/checkpoint.py:1399: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
|
780 |
-
" with device_autocast_ctx, torch.cpu.amp.autocast(**cpu_autocast_kwargs), recompute_context: # type: ignore[attr-defined]\n",
|
781 |
-
"Steps: 60%|██████ | 600/1000 [15:02<26:47, 4.02s/it, loss=0.061, lr=0.0001]11/25/2024 11:19:54 - INFO - accelerate.accelerator - Saving current state to /mnt/re-2024-model-demo-v3/checkpoint-600\n",
|
782 |
-
"Model weights saved in /mnt/re-2024-model-demo-v3/checkpoint-600/pytorch_lora_weights.safetensors\n",
|
783 |
-
"11/25/2024 11:19:55 - INFO - accelerate.checkpointing - Optimizer state saved in /mnt/re-2024-model-demo-v3/checkpoint-600/optimizer.bin\n",
|
784 |
-
"11/25/2024 11:19:55 - INFO - accelerate.checkpointing - Scheduler state saved in /mnt/re-2024-model-demo-v3/checkpoint-600/scheduler.bin\n",
|
785 |
-
"11/25/2024 11:19:55 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in /mnt/re-2024-model-demo-v3/checkpoint-600/sampler.bin\n",
|
786 |
-
"11/25/2024 11:19:55 - INFO - accelerate.checkpointing - Random states saved in /mnt/re-2024-model-demo-v3/checkpoint-600/random_states_0.pkl\n",
|
787 |
-
"11/25/2024 11:19:55 - INFO - __main__ - Saved state to /mnt/re-2024-model-demo-v3/checkpoint-600\n",
|
788 |
-
"Steps: 80%|████████ | 800/1000 [29:50<13:17, 3.99s/it, loss=0.104, lr=0.0001]11/25/2024 11:34:43 - INFO - accelerate.accelerator - Saving current state to /mnt/re-2024-model-demo-v3/checkpoint-800\n",
|
789 |
-
"Model weights saved in /mnt/re-2024-model-demo-v3/checkpoint-800/pytorch_lora_weights.safetensors\n",
|
790 |
-
"11/25/2024 11:34:43 - INFO - accelerate.checkpointing - Optimizer state saved in /mnt/re-2024-model-demo-v3/checkpoint-800/optimizer.bin\n",
|
791 |
-
"11/25/2024 11:34:43 - INFO - accelerate.checkpointing - Scheduler state saved in /mnt/re-2024-model-demo-v3/checkpoint-800/scheduler.bin\n",
|
792 |
-
"11/25/2024 11:34:43 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in /mnt/re-2024-model-demo-v3/checkpoint-800/sampler.bin\n",
|
793 |
-
"11/25/2024 11:34:43 - INFO - accelerate.checkpointing - Random states saved in /mnt/re-2024-model-demo-v3/checkpoint-800/random_states_0.pkl\n",
|
794 |
-
"11/25/2024 11:34:43 - INFO - __main__ - Saved state to /mnt/re-2024-model-demo-v3/checkpoint-800\n",
|
795 |
-
"Steps: 100%|███████| 1000/1000 [44:38<00:00, 4.03s/it, loss=0.00663, lr=0.0001]11/25/2024 11:49:30 - INFO - accelerate.accelerator - Saving current state to /mnt/re-2024-model-demo-v3/checkpoint-1000\n",
|
796 |
-
"Model weights saved in /mnt/re-2024-model-demo-v3/checkpoint-1000/pytorch_lora_weights.safetensors\n",
|
797 |
-
"11/25/2024 11:49:31 - INFO - accelerate.checkpointing - Optimizer state saved in /mnt/re-2024-model-demo-v3/checkpoint-1000/optimizer.bin\n",
|
798 |
-
"11/25/2024 11:49:31 - INFO - accelerate.checkpointing - Scheduler state saved in /mnt/re-2024-model-demo-v3/checkpoint-1000/scheduler.bin\n",
|
799 |
-
"11/25/2024 11:49:31 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in /mnt/re-2024-model-demo-v3/checkpoint-1000/sampler.bin\n",
|
800 |
-
"11/25/2024 11:49:31 - INFO - accelerate.checkpointing - Random states saved in /mnt/re-2024-model-demo-v3/checkpoint-1000/random_states_0.pkl\n",
|
801 |
-
"11/25/2024 11:49:31 - INFO - __main__ - Saved state to /mnt/re-2024-model-demo-v3/checkpoint-1000\n",
|
802 |
-
"Steps: 100%|█████████| 1000/1000 [44:38<00:00, 4.03s/it, loss=0.013, lr=0.0001]Model weights saved in /mnt/re-2024-model-demo-v3/pytorch_lora_weights.safetensors\n",
|
803 |
-
"{'use_quant_conv', 'latents_mean', 'use_post_quant_conv', 'latents_std', 'mid_block_add_attention', 'shift_factor'} was not found in config. Values will be initialized to default values.\n",
|
804 |
-
"{'feature_extractor', 'add_watermarker', 'image_encoder'} was not found in config. Values will be initialized to default values.\n",
|
805 |
-
"\n",
|
806 |
-
"Loading pipeline components...: 0%| | 0/7 [00:00<?, ?it/s]\u001b[ALoaded tokenizer as CLIPTokenizer from `tokenizer` subfolder of briaai/BRIA-2.3.\n",
|
807 |
-
"Loaded tokenizer_2 as CLIPTokenizer from `tokenizer_2` subfolder of briaai/BRIA-2.3.\n",
|
808 |
-
"\n",
|
809 |
-
"Loading pipeline components...: 43%|█████▌ | 3/7 [00:00<00:00, 27.15it/s]\u001b[ALoaded text_encoder as CLIPTextModel from `text_encoder` subfolder of briaai/BRIA-2.3.\n",
|
810 |
-
"{'rescale_betas_zero_snr'} was not found in config. Values will be initialized to default values.\n",
|
811 |
-
"Loaded scheduler as EulerAncestralDiscreteScheduler from `scheduler` subfolder of briaai/BRIA-2.3.\n",
|
812 |
-
"Loaded text_encoder_2 as CLIPTextModelWithProjection from `text_encoder_2` subfolder of briaai/BRIA-2.3.\n",
|
813 |
-
"\n",
|
814 |
-
"Loading pipeline components...: 86%|███████████▏ | 6/7 [00:01<00:00, 2.60it/s]\u001b[ALoaded unet as UNet2DConditionModel from `unet` subfolder of briaai/BRIA-2.3.\n",
|
815 |
-
"Loading pipeline components...: 100%|█████████████| 7/7 [00:07<00:00, 1.06s/it]\n",
|
816 |
-
"Loading unet.\n",
|
817 |
-
"Steps: 100%|█████████| 1000/1000 [44:49<00:00, 4.48s/it, loss=0.013, lr=0.0001]\n"
|
818 |
-
]
|
819 |
-
}
|
820 |
-
],
|
821 |
"source": [
|
822 |
"# --resume_from_checkpoint=\"latest\" \\ \n",
|
823 |
"\n",
|
@@ -856,7 +604,7 @@
|
|
856 |
},
|
857 |
{
|
858 |
"cell_type": "code",
|
859 |
-
"execution_count":
|
860 |
"id": "844c968c-7f9d-4b83-8544-343979c982d0",
|
861 |
"metadata": {
|
862 |
"colab": {
|
@@ -869,22 +617,7 @@
|
|
869 |
"outputId": "c0e354cd-ebe1-4679-a042-aae1990c1deb",
|
870 |
"tags": []
|
871 |
},
|
872 |
-
"outputs": [
|
873 |
-
{
|
874 |
-
"data": {
|
875 |
-
"application/vnd.jupyter.widget-view+json": {
|
876 |
-
"model_id": "f7e19a8ae90b4774a92d8d9e6cd104de",
|
877 |
-
"version_major": 2,
|
878 |
-
"version_minor": 0
|
879 |
-
},
|
880 |
-
"text/plain": [
|
881 |
-
"Loading pipeline components...: 0%| | 0/7 [00:00<?, ?it/s]"
|
882 |
-
]
|
883 |
-
},
|
884 |
-
"metadata": {},
|
885 |
-
"output_type": "display_data"
|
886 |
-
}
|
887 |
-
],
|
888 |
"source": [
|
889 |
"import torch\n",
|
890 |
"from huggingface_hub import hf_hub_download, upload_file\n",
|
@@ -1456,38 +1189,6 @@
|
|
1456 |
"source": [
|
1457 |
"del pipe"
|
1458 |
]
|
1459 |
-
},
|
1460 |
-
{
|
1461 |
-
"cell_type": "markdown",
|
1462 |
-
"id": "a8df092c",
|
1463 |
-
"metadata": {},
|
1464 |
-
"source": [
|
1465 |
-
"## Deploy for inference 🚀"
|
1466 |
-
]
|
1467 |
-
},
|
1468 |
-
{
|
1469 |
-
"cell_type": "code",
|
1470 |
-
"execution_count": null,
|
1471 |
-
"id": "7a2db8e6",
|
1472 |
-
"metadata": {},
|
1473 |
-
"outputs": [],
|
1474 |
-
"source": [
|
1475 |
-
"# bin bash\n",
|
1476 |
-
"\n",
|
1477 |
-
"docker build --target infer_engin -t triton_server .\n",
|
1478 |
-
"\n",
|
1479 |
-
"docker run -d \\\n",
|
1480 |
-
"\t--restart always \\\n",
|
1481 |
-
"\t--gpus=1 \\\n",
|
1482 |
-
" --shm-size=4g --ulimit memlock=-1 --ulimit stack=67108864 \\\n",
|
1483 |
-
" -p 8000:8000 \\\n",
|
1484 |
-
" -p 8001:8001 \\\n",
|
1485 |
-
" -v /mnt/models:/mnt/models \\\n",
|
1486 |
-
" triton_server \\\n",
|
1487 |
-
" tritonserver \\\n",
|
1488 |
-
"\t--strict-model-config=false \\\n",
|
1489 |
-
" --model-repository=/mnt/models"
|
1490 |
-
]
|
1491 |
}
|
1492 |
],
|
1493 |
"metadata": {
|
|
|
30 |
},
|
31 |
{
|
32 |
"cell_type": "code",
|
33 |
+
"execution_count": null,
|
34 |
"id": "3c1b6b6a-3d7a-4ff3-a7aa-6340fb09fbca",
|
35 |
"metadata": {
|
36 |
"id": "3c1b6b6a-3d7a-4ff3-a7aa-6340fb09fbca",
|
37 |
"outputId": "bc85d52f-7294-4404-a83f-d9a4af717dc8"
|
38 |
},
|
39 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
"source": [
|
41 |
"# Install dependencies.\n",
|
42 |
"%pip install tiktoken xformers bitsandbytes transformers accelerate wandb dadaptation prodigyopt -q\n",
|
|
|
58 |
},
|
59 |
{
|
60 |
"cell_type": "code",
|
61 |
+
"execution_count": null,
|
62 |
"id": "e4838d36",
|
63 |
"metadata": {},
|
64 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
"source": [
|
66 |
"ZIP_NAME=\"04617f93ddbea80e2681f1422dcd7cf01335d500.zip\"\n",
|
67 |
"EXTRACT_DIR=\"./\"\n",
|
|
|
258 |
},
|
259 |
{
|
260 |
"cell_type": "code",
|
261 |
+
"execution_count": null,
|
262 |
"id": "c1dd96ce-328a-43e1-a707-64fef13b4512",
|
263 |
"metadata": {
|
264 |
"colab": {
|
|
|
270 |
"outputId": "ac3ffb05-bb0e-476f-cdd9-54f817d96bb0",
|
271 |
"tags": []
|
272 |
},
|
273 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
"source": [
|
275 |
"from transformers import pipeline, AutoProcessor\n",
|
276 |
"from PIL import Image \n",
|
|
|
349 |
},
|
350 |
{
|
351 |
"cell_type": "code",
|
352 |
+
"execution_count": null,
|
353 |
"id": "e9e2f15d",
|
354 |
"metadata": {},
|
355 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
"source": [
|
357 |
"from datasets import Dataset, Features, Image as HFImage, Value, DatasetDict, Split\n",
|
358 |
"\n",
|
|
|
425 |
},
|
426 |
{
|
427 |
"cell_type": "code",
|
428 |
+
"execution_count": null,
|
429 |
"id": "15c833e9-14d6-4e1d-8a79-24e5ef667290",
|
430 |
"metadata": {
|
431 |
"id": "15c833e9-14d6-4e1d-8a79-24e5ef667290",
|
432 |
"outputId": "ad92425f-9539-46f3-ddec-4b4a7e68c42b"
|
433 |
},
|
434 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
435 |
"source": [
|
436 |
"!accelerate config default"
|
437 |
]
|
|
|
462 |
"outputId": "b9440fd0-b0a1-4ce3-ba0d-9270484f72ba",
|
463 |
"tags": []
|
464 |
},
|
465 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
"source": [
|
467 |
"from huggingface_hub import login\n",
|
468 |
"login(token=\"...\") "
|
|
|
562 |
},
|
563 |
{
|
564 |
"cell_type": "code",
|
565 |
+
"execution_count": null,
|
566 |
"id": "1347b60c",
|
567 |
"metadata": {},
|
568 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
569 |
"source": [
|
570 |
"# --resume_from_checkpoint=\"latest\" \\ \n",
|
571 |
"\n",
|
|
|
604 |
},
|
605 |
{
|
606 |
"cell_type": "code",
|
607 |
+
"execution_count": null,
|
608 |
"id": "844c968c-7f9d-4b83-8544-343979c982d0",
|
609 |
"metadata": {
|
610 |
"colab": {
|
|
|
617 |
"outputId": "c0e354cd-ebe1-4679-a042-aae1990c1deb",
|
618 |
"tags": []
|
619 |
},
|
620 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
621 |
"source": [
|
622 |
"import torch\n",
|
623 |
"from huggingface_hub import hf_hub_download, upload_file\n",
|
|
|
1189 |
"source": [
|
1190 |
"del pipe"
|
1191 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1192 |
}
|
1193 |
],
|
1194 |
"metadata": {
|