In [3]:
import torch

from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer

from model.distilbert import DistilBertClassificationModel
from model.llama import LlamaClassificationModel

In [2]:
repo_id = "ppak10/defect-classification-llama-baseline-25-epochs"

In [3]:
# Initialize the model
# model = DistilBertClassificationModel(repo_id)
model = LlamaClassificationModel()

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(repo_id)

classification_head_path = hf_hub_download(
 repo_id=repo_id,
 repo_type="model",
 filename="classification_head.pt"
)

model.classifier.load_state_dict(torch.load(classification_head_path))
model.eval() # Set the model to evaluation mode

LlamaConfig {
 "_attn_implementation_autoset": true,
 "_name_or_path": "meta-llama/Llama-3.2-1B",
 "architectures": [
 "LlamaForCausalLM"
 ],
 "attention_bias": false,
 "attention_dropout": 0.0,
 "bos_token_id": 128000,
 "eos_token_id": 128001,
 "head_dim": 64,
 "hidden_act": "silu",
 "hidden_size": 2048,
 "initializer_range": 0.02,
 "intermediate_size": 8192,
 "max_position_embeddings": 131072,
 "mlp_bias": false,
 "model_type": "llama",
 "num_attention_heads": 32,
 "num_hidden_layers": 16,
 "num_key_value_heads": 8,
 "pretraining_tp": 1,
 "rms_norm_eps": 1e-05,
 "rope_scaling": {
 "factor": 32.0,
 "high_freq_factor": 4.0,
 "low_freq_factor": 1.0,
 "original_max_position_embeddings": 8192,
 "rope_type": "llama3"
 },
 "rope_theta": 500000.0,
 "tie_word_embeddings": true,
 "torch_dtype": "bfloat16",
 "transformers_version": "4.47.0",
 "use_cache": true,
 "vocab_size": 128256
}



 model.classifier.load_state_dict(torch.load(classification_head_path))


LlamaClassificationModel(
 (base_model): LlamaModel(
 (embed_tokens): Embedding(128256, 2048)
 (layers): ModuleList(
 (0-15): 16 x LlamaDecoderLayer(
 (self_attn): LlamaSdpaAttention(
 (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
 (k_proj): Linear(in_features=2048, out_features=512, bias=False)
 (v_proj): Linear(in_features=2048, out_features=512, bias=False)
 (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
 (rotary_emb): LlamaRotaryEmbedding()
 )
 (mlp): LlamaMLP(
 (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
 (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
 (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
 (act_fn): SiLU()
 )
 (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
 (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
 )
 )
 (norm): LlamaRMSNorm((2048,), eps=1e-05)
 (rotary_emb): LlamaRotaryEmbedding()
 )
 (classifier): Linear(in_features=2048, out_features=4, bi

In [56]:
# text = "What defects would occur with a beam size of 100 microns, a power of 500 W, a velocity of 100 mm/s and layer height of 10 microns and a hatch spacing of 10 microns for Ti-6Al-4V"
# text = "SS316L[SEP]500 W[SEP]10.0 mm/s[SEP]500.0 microns[SEP]500.0 microns[SEP]100.0 microns"
text = "SS316L[SEP]250.0 W[SEP]280.0 mm/s[SEP][SEP]950.0 microns[SEP]600.0 microns"

# Ensure the model is on the GPU
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cpu"
model = model.to(device)

# Tokenize input for the entire batch and move to GPU
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=256)
inputs = {key: value.to(device) for key, value in inputs.items()}

# Perform inference
outputs = model(**inputs)

# Extract logits and apply sigmoid activation for multi-label classification
logits = outputs["logits"]
probs = torch.sigmoid(logits)

# Convert probabilities to one-hot encoded labels
preds = (probs > 0.5).int()

# None, keyhole, lack of fusion, balling
print(preds)

tensor([[1, 0, 0, 0]], dtype=torch.int32)


In [1]:
import torch.nn as nn
from transformers import PreTrainedModel

class PretrainedLlamaClassificationModel(PreTrainedModel):
 def __init__(self, config):
 super().__init__(config)
 self.base_model = AutoModel.from_pretrained(config.model_path, config=config)
 self.classifier = nn.Linear(config.hidden_size, config.num_labels)
 self.config = config

 def forward(self, input_ids, attention_mask, labels=None):
 outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
 summed_representation = outputs.last_hidden_state.sum(dim=1)
 logits = self.classifier(summed_representation)
 loss = None
 if labels is not None:
 loss_fn = nn.BCEWithLogitsLoss()
 loss = loss_fn(logits, labels.float())
 return {"loss": loss, "logits": logits}


 from .autonotebook import tqdm as notebook_tqdm


In [1]:
from transformers import AutoModel, pipeline

repo_id = "ppak10/defect-classification-llama-baseline-25-epochs"
model = AutoModel.from_pretrained(repo_id)
# tokenizer = AutoTokenizer.from_pretrained(repo_id)

# classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
# result = classification_pipeline("Test input text")
# print(result)


 from .autonotebook import tqdm as notebook_tqdm
Some weights of LlamaModel were not initialized from the model checkpoint at ppak10/defect-classification-llama-baseline-25-epochs and are newly initialized: ['embed_tokens.weight', 'layers.0.input_layernorm.weight', 'layers.0.mlp.down_proj.weight', 'layers.0.mlp.gate_proj.weight', 'layers.0.mlp.up_proj.weight', 'layers.0.post_attention_layernorm.weight', 'layers.0.self_attn.k_proj.weight', 'layers.0.self_attn.o_proj.weight', 'layers.0.self_attn.q_proj.weight', 'layers.0.self_attn.v_proj.weight', 'layers.1.input_layernorm.weight', 'layers.1.mlp.down_proj.weight', 'layers.1.mlp.gate_proj.weight', 'layers.1.mlp.up_proj.weight', 'layers.1.post_attention_layernorm.weight', 'layers.1.self_attn.k_proj.weight', 'layers.1.self_attn.o_proj.weight', 'layers.1.self_attn.q_proj.weight', 'layers.1.self_attn.v_proj.weight', 'layers.10.input_layernorm.weight', 'layers.10.mlp.down_proj.weight', 'layers.10.mlp.gate_proj.weight', 'layers.10.mlp.up_proj.we

In [2]:
print(model)

LlamaModel(
 (embed_tokens): Embedding(32000, 2048)
 (layers): ModuleList(
 (0-31): 32 x LlamaDecoderLayer(
 (self_attn): LlamaSdpaAttention(
 (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
 (k_proj): Linear(in_features=2048, out_features=2048, bias=False)
 (v_proj): Linear(in_features=2048, out_features=2048, bias=False)
 (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
 (rotary_emb): LlamaRotaryEmbedding()
 )
 (mlp): LlamaMLP(
 (gate_proj): Linear(in_features=2048, out_features=11008, bias=False)
 (up_proj): Linear(in_features=2048, out_features=11008, bias=False)
 (down_proj): Linear(in_features=11008, out_features=2048, bias=False)
 (act_fn): SiLU()
 )
 (input_layernorm): LlamaRMSNorm((2048,), eps=1e-06)
 (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-06)
 )
 )
 (norm): LlamaRMSNorm((2048,), eps=1e-06)
 (rotary_emb): LlamaRotaryEmbedding()
)
