Spaces:

boryasbora
/

chatbot_ohw_projects

Sleeping

App Files Files Community

boryasbora commited on Aug 30, 2024

Commit

ce4c586

verified ·

1 Parent(s): 0e18b88

Delete huggingface_llm.py

Browse files

Files changed (1) hide show

huggingface_llm.py +0 -60

huggingface_llm.py DELETED Viewed

@@ -1,60 +0,0 @@
-from langchain.llms.base import LLM
-from langchain.callbacks.manager import CallbackManagerForLLMRun
-from typing import Any, List, Optional, Dict
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-from pydantic import Field, PrivateAttr
-class HuggingFaceLLM(LLM):
-    model_id: str = Field(..., description="Hugging Face model ID")
-    temperature: float = Field(default=0.7, description="Sampling temperature")
-    max_tokens: int = Field(default=256, description="Maximum number of tokens to generate")
-    device: str = Field(default="cpu", description="Device to run the model on")
-    _model: Optional[Any] = PrivateAttr(default=None)
-    _tokenizer: Optional[Any] = PrivateAttr(default=None)
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self.device = "cuda" if torch.cuda.is_available() and self.device != "cpu" else "cpu"
-        self._load_model()
-    def _load_model(self):
-        self._tokenizer = AutoTokenizer.from_pretrained(self.model_id)
-        self._model = AutoModelForCausalLM.from_pretrained(self.model_id)
-        self._model = self._model.to(torch.device(self.device))
-    @property
-    def _llm_type(self) -> str:
-        return "custom_huggingface"
-    def _call(
-        self,
-        prompt: str,
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> str:
-        input_ids = self._tokenizer.encode(prompt, return_tensors="pt").to(self.device)
-        with torch.no_grad():
-            output = self._model.generate(
-                input_ids,
-                max_new_tokens=self.max_tokens,
-                temperature=self.temperature,
-                do_sample=True,
-                pad_token_id=self._tokenizer.eos_token_id
-            )
-        response = self._tokenizer.decode(output[0], skip_special_tokens=True)
-        return response[len(prompt):].strip()
-    @property
-    def _identifying_params(self) -> Dict[str, Any]:
-        return {"model_id": self.model_id, "temperature": self.temperature, "max_tokens": self.max_tokens, "device": self.device}
-    def __setattr__(self, name, value):
-        if name in ["_model", "_tokenizer"]:
-            object.__setattr__(self, name, value)
-        else:
-            super().__setattr__(name, value)