import torch from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer def get_tinyllama(): tinyllama = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto") return tinyllama def get_qwen2ins1b(): model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen2-1.5B-Instruct", torch_dtype="auto", device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct") return {'model': model, 'tokenizer': tokenizer} def response_tinyllama( model=None, messages=None ): messages_dict = [ { "role": "system", "content": "You are a friendly and helpful chatbot", } ] for step in messages: messages_dict.append({'role': 'user', 'content': step[0]}) if len(step) >= 2: messages_dict.append({'role': 'assistant', 'content': step[1]}) prompt = model.tokenizer.apply_chat_template(messages_dict, tokenize=False, add_generation_prompt=True) outputs = model(prompt, max_new_tokens=64, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) return outputs[0]['generated_text'].split('<|assistant|>')[1].strip() def response_qwen2ins1b( model=None, messages=None ): messages_dict = [ { "role": "system", "content": "You are a friendly and helpful chatbot", } ] for step in messages: messages_dict.append({'role': 'user', 'content': step[0]}) if len(step) >= 2: messages_dict.append({'role': 'assistant', 'content': step[1]}) text = model['tokenizer'].apply_chat_template( messages_dict, tokenize=False, add_generation_prompt=True ) model_inputs = model['tokenizer']([text], return_tensors="pt") generated_ids = model['model'].generate( model_inputs.input_ids, max_new_tokens=512 ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = model['tokenizer'].batch_decode(generated_ids, skip_special_tokens=True)[0] return response # outputs[0]['generated_text'] #.split('<|assistant|>')[1].strip()