Coool2 commited on
Commit
8daf366
·
verified ·
1 Parent(s): acca590

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +4 -10
agent.py CHANGED
@@ -69,7 +69,6 @@ proj_llm = HuggingFaceLLM(
69
  tokenizer_name=model_id,
70
  device_map="auto",
71
  model_kwargs={"torch_dtype": "auto"},
72
- max_new_tokens = 8000,
73
  generate_kwargs={
74
  "temperature": 0.1,
75
  "top_p": 1.0
@@ -79,23 +78,18 @@ proj_llm = HuggingFaceLLM(
79
  code_llm = HuggingFaceLLM(
80
  model_name="Qwen/Qwen2.5-Coder-3B-Instruct",
81
  tokenizer_name="Qwen/Qwen2.5-Coder-3B-Instruct",
82
- device_map= "cpu", # Specify device here instead
83
  model_kwargs={
84
- "torch_dtype": "auto", # Use float32 for CPU
85
- "low_cpu_mem_usage": True, # Memory optimization
86
- },
87
  # Set generation parameters for precise, non-creative code output
88
  generate_kwargs={"do_sample": False}
89
  )
90
 
91
  embed_model = HuggingFaceEmbedding(
92
  model_name="llamaindex/vdr-2b-multi-v1",
93
- device="cpu",
94
  trust_remote_code=True,
95
- model_kwargs={
96
- "torch_dtype": "auto", # Use float32 for CPU
97
- "low_cpu_mem_usage": True, # Still get memory optimization
98
- }
99
  )
100
 
101
  logging.basicConfig(level=logging.INFO)
 
69
  tokenizer_name=model_id,
70
  device_map="auto",
71
  model_kwargs={"torch_dtype": "auto"},
 
72
  generate_kwargs={
73
  "temperature": 0.1,
74
  "top_p": 1.0
 
78
  code_llm = HuggingFaceLLM(
79
  model_name="Qwen/Qwen2.5-Coder-3B-Instruct",
80
  tokenizer_name="Qwen/Qwen2.5-Coder-3B-Instruct",
81
+ device_map= "auto", # Specify device here instead
82
  model_kwargs={
83
+ "torch_dtype": "auto"},
 
 
84
  # Set generation parameters for precise, non-creative code output
85
  generate_kwargs={"do_sample": False}
86
  )
87
 
88
  embed_model = HuggingFaceEmbedding(
89
  model_name="llamaindex/vdr-2b-multi-v1",
90
+ device="auto",
91
  trust_remote_code=True,
92
+ model_kwargs={"torch_dtype": "auto"}
 
 
 
93
  )
94
 
95
  logging.basicConfig(level=logging.INFO)