import gradio as gr import torch from transformers import pipeline, AutoTokenizer, AutoModel, LlamaForCausalLM from peft import PeftModel #pipe = pipeline("text-generation", model="furquan/opt_2_7_b_prompt_tuned_sentiment_analysis", trust_remote_code=True, cache_dir="/local/home/furquanh/myProjects/week12/").to('cuda') # tokenizer = AutoTokenizer.from_pretrained("furquan/opt-1-3b-prompt-tuned-sentiment-analysis", trust_remote_code=True) # model = AutoModel.from_pretrained("furquan/opt-1-3b-prompt-tuned-sentiment-analysis", trust_remote_code=True) model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token="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") tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf") model = PeftModel.from_pretrained(model, "furquan/llama2-sentiment-prompt-tuned") title = "OPT-1.3B" description = "This demo uses meta's LLama-2-7b Causal LM as base model that was prompt tuned on the mteb/tweet_sentiment_extraction dataset to only output the sentiment of a given text." article = "
" def sentiment(text): if text[-1] != ' ': text = f"{text} " tokenized = tokenizer(text, return_tensors='pt') with torch.no_grad(): outputs = model.generate( input_ids=tokenized["input_ids"], attention_mask=tokenized["attention_mask"], max_new_tokens=1 ) return f"text: {text} Sentiment: {tokenizer.decode(outputs[0], skip_special_tokens=True).split(' ')[-1]}" iface = gr.Interface(fn=sentiment, inputs="text", outputs="text", title=title, description=description, article=article) iface.launch()