Zhengping commited on
Commit
122d947
·
verified ·
1 Parent(s): bd0677e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +78 -1
README.md CHANGED
@@ -41,7 +41,84 @@ This is the model card of a 🤗 transformers model that has been pushed on the
41
 
42
  <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
 
44
- [More Information Needed]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  ### Downstream Use [optional]
47
 
 
41
 
42
  <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
 
44
+ ```python
45
+ import transformers
46
+ import torch
47
+ from transformers.pipelines import PIPELINE_REGISTRY
48
+ from transformers import (
49
+ pipeline,
50
+ AutoModelForCausalLM,
51
+ PreTrainedTokenizer
52
+ )
53
+ from typing import (
54
+ Dict,
55
+ Callable,
56
+ Tuple,
57
+ List,
58
+ )
59
+ from src.pipelines.level_to_score_pipeline import LevelToScorePipeline
60
+ from src.rank_dicts import SingleLabelRankDict
61
+ from src.chat_templates import UNLITemplate
62
+
63
+
64
+ model = transformers.AutoModelForCausalLM.from_pretrained(
65
+ "ckpt/merged-dp3",
66
+ torch_dtype="auto",
67
+ attn_implementation="flash_attention_2",
68
+ )
69
+ tokenizer = transformers.AutoTokenizer.from_pretrained(
70
+ "ckpt/merged-dp3",
71
+ )
72
+
73
+ rank_dict = SingleLabelRankDict.from_tokenizer(tokenizer)
74
+
75
+ PIPELINE_REGISTRY.register_pipeline(
76
+ "level-to-score",
77
+ pipeline_class=LevelToScorePipeline,
78
+ pt_model=AutoModelForCausalLM
79
+ )
80
+
81
+ # This allows fine-grained labeling, the greedy decoding gives a coarse score,
82
+ # one can also attach their own level-to-score function to the pipeline, e.g. using UNLI
83
+ # label transformation to get it more binarized
84
+ def _level_to_score_func(
85
+ logits: Tuple[torch.FloatTensor],
86
+ tokenizer: PreTrainedTokenizer
87
+ ) -> Tuple[List[float], List[float]]:
88
+ """ """
89
+ logits = logits[0]
90
+ num_labels = len(rank_dict)
91
+ considering_ids = tokenizer.convert_tokens_to_ids([f" <|label_level_{i}|>" for i in range(num_labels)])
92
+ selective_logits = torch.index_select(logits, 1, torch.tensor(considering_ids, device=logits.device))
93
+ step_size = 1 / num_labels
94
+ expectation = torch.tensor([[i * step_size + 1 / 2 * step_size for i in range(num_labels)]], device=selective_logits.device)
95
+ scores = torch.softmax(selective_logits, dim=-1) @ expectation.T
96
+ scores = scores.squeeze(-1).tolist()
97
+ return scores, selective_logits.tolist()
98
+
99
+ pipe = pipeline(
100
+ "level-to-score",
101
+ model=model,
102
+ max_new_tokens=2,
103
+ tokenizer=tokenizer,
104
+ device=0,
105
+ level_to_score_func=_level_to_score_func,
106
+ torch_dtype=torch.bfloat16,
107
+ )
108
+
109
+ template = UNLITemplate()
110
+
111
+ premise = "Sam is sleeping."
112
+ hypothesis = "Sam is awake."
113
+
114
+ inputs = template.get_prompt_template(premise=premise, hypothesis=hypothesis) +\
115
+ template.get_completion_template(is_completion=True)
116
+
117
+ result = pipe(inputs)
118
+ print(result)
119
+ ```
120
+
121
+ See our code repo for the definition of the scoring pipeline and templates.
122
 
123
  ### Downstream Use [optional]
124