import gradio as gr import spaces import torch from threading import Thread from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, TextIteratorStreamer model_id = "textcleanlm/textclean-4B" model = None tokenizer = None def load_model(): global model, tokenizer if model is None: tokenizer = AutoTokenizer.from_pretrained(model_id) # Add padding token if needed if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Try different model classes for model_class in [AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel]: try: model = model_class.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map="auto" ) break except: continue if model is None: raise ValueError(f"Could not load model {model_id}") return model, tokenizer @spaces.GPU(duration=60) def clean_text(text): model, tokenizer = load_model() inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True) inputs = {k: v.cuda() for k, v in inputs.items()} # Enable streaming streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) generation_kwargs = dict( **inputs, max_length=4096, num_beams=1, # Set to 1 for streaming do_sample=True, temperature=1.0, streamer=streamer, ) # Run generation in a separate thread thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() # Yield text as it's generated generated_text = "" for new_text in streamer: generated_text += new_text yield generated_text thread.join() iface = gr.Interface( fn=clean_text, inputs=gr.Textbox( lines=5, placeholder="Enter text to clean...", label="Input Text" ), outputs=gr.Textbox( lines=5, label="Cleaned Text" ), title="TextClean-4B Demo", description="Simple demo for text cleaning using textcleanlm/textclean-4B model" ) if __name__ == "__main__": iface.launch()