Kuberwastaken commited on
Commit
c42af7b
·
1 Parent(s): 70a7738

Annoying rope scaling llama error

Browse files
Files changed (1) hide show
  1. app.py +17 -4
app.py CHANGED
@@ -5,7 +5,7 @@ import cv2
5
  from PIL import Image
6
  import numpy as np
7
  from transformers import pipeline, AutoProcessor, AutoModelForVision2Seq
8
- from transformers import AutoModelForCausalLM, AutoTokenizer
9
  import time
10
 
11
  from transformers import BlipProcessor, BlipForConditionalGeneration
@@ -41,13 +41,26 @@ def analyze_image(image, vision_components):
41
 
42
  def initialize_llm():
43
  model_id = "meta-llama/Llama-3.2-1B-Instruct"
44
- tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
 
 
45
  model = AutoModelForCausalLM.from_pretrained(
46
  model_id,
 
47
  torch_dtype=torch.bfloat16,
48
- device_map="auto"
 
49
  )
50
-
51
  return {
52
  "model": model,
53
  "tokenizer": tokenizer
 
5
  from PIL import Image
6
  import numpy as np
7
  from transformers import pipeline, AutoProcessor, AutoModelForVision2Seq
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
9
  import time
10
 
11
  from transformers import BlipProcessor, BlipForConditionalGeneration
 
41
 
42
  def initialize_llm():
43
  model_id = "meta-llama/Llama-3.2-1B-Instruct"
44
+ hf_token = os.environ.get("HF_TOKEN")
45
+
46
+ # Load and patch config
47
+ config = AutoConfig.from_pretrained(model_id, token=hf_token)
48
+ if hasattr(config, "rope_scaling"):
49
+ rope_scaling = config.rope_scaling
50
+ if isinstance(rope_scaling, dict):
51
+ config.rope_scaling = {
52
+ "type": rope_scaling.get("type", "linear"),
53
+ "factor": rope_scaling.get("factor", 1.0)
54
+ }
55
+
56
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
57
  model = AutoModelForCausalLM.from_pretrained(
58
  model_id,
59
+ config=config,
60
  torch_dtype=torch.bfloat16,
61
+ device_map="auto",
62
+ token=hf_token
63
  )
 
64
  return {
65
  "model": model,
66
  "tokenizer": tokenizer