# Import Hugging Face libraries and datasets from transformers import Trainer, TrainingArguments, BertForSequenceClassification, BertTokenizer from datasets import load_dataset # Load dataset dataset = load_dataset("imdb") # Preprocess data: Tokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') def tokenize_function(examples): return tokenizer(examples['text'], padding='max_length', truncation=True) # Apply tokenization tokenized_datasets = dataset.map(tokenize_function, batched=True) # Load pre-trained model model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2) # Training Arguments training_args = TrainingArguments( output_dir='/results', # Save path in the Space environment evaluation_strategy="epoch", # Eval after each epoch learning_rate=2e-5, # Learning rate per_device_train_batch_size=8, # Training batch size per_device_eval_batch_size=8, # Eval batch size num_train_epochs=3, # Number of epochs weight_decay=0.01 # Weight decay for regularization ) # Trainer setup trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["test"] ) # Train the model (this will run in the cloud, no need for local machine) trainer.train()