boryasbora commited on
Commit
ce4c586
·
verified ·
1 Parent(s): 0e18b88

Delete huggingface_llm.py

Browse files
Files changed (1) hide show
  1. huggingface_llm.py +0 -60
huggingface_llm.py DELETED
@@ -1,60 +0,0 @@
1
- from langchain.llms.base import LLM
2
- from langchain.callbacks.manager import CallbackManagerForLLMRun
3
- from typing import Any, List, Optional, Dict
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
- import torch
6
- from pydantic import Field, PrivateAttr
7
-
8
- class HuggingFaceLLM(LLM):
9
- model_id: str = Field(..., description="Hugging Face model ID")
10
- temperature: float = Field(default=0.7, description="Sampling temperature")
11
- max_tokens: int = Field(default=256, description="Maximum number of tokens to generate")
12
- device: str = Field(default="cpu", description="Device to run the model on")
13
-
14
- _model: Optional[Any] = PrivateAttr(default=None)
15
- _tokenizer: Optional[Any] = PrivateAttr(default=None)
16
-
17
- def __init__(self, **kwargs):
18
- super().__init__(**kwargs)
19
- self.device = "cuda" if torch.cuda.is_available() and self.device != "cpu" else "cpu"
20
- self._load_model()
21
-
22
- def _load_model(self):
23
- self._tokenizer = AutoTokenizer.from_pretrained(self.model_id)
24
- self._model = AutoModelForCausalLM.from_pretrained(self.model_id)
25
- self._model = self._model.to(torch.device(self.device))
26
-
27
- @property
28
- def _llm_type(self) -> str:
29
- return "custom_huggingface"
30
-
31
- def _call(
32
- self,
33
- prompt: str,
34
- stop: Optional[List[str]] = None,
35
- run_manager: Optional[CallbackManagerForLLMRun] = None,
36
- **kwargs: Any,
37
- ) -> str:
38
- input_ids = self._tokenizer.encode(prompt, return_tensors="pt").to(self.device)
39
-
40
- with torch.no_grad():
41
- output = self._model.generate(
42
- input_ids,
43
- max_new_tokens=self.max_tokens,
44
- temperature=self.temperature,
45
- do_sample=True,
46
- pad_token_id=self._tokenizer.eos_token_id
47
- )
48
-
49
- response = self._tokenizer.decode(output[0], skip_special_tokens=True)
50
- return response[len(prompt):].strip()
51
-
52
- @property
53
- def _identifying_params(self) -> Dict[str, Any]:
54
- return {"model_id": self.model_id, "temperature": self.temperature, "max_tokens": self.max_tokens, "device": self.device}
55
-
56
- def __setattr__(self, name, value):
57
- if name in ["_model", "_tokenizer"]:
58
- object.__setattr__(self, name, value)
59
- else:
60
- super().__setattr__(name, value)