Update finetune3.py
Browse files- finetune3.py +17 -9
finetune3.py
CHANGED
@@ -67,8 +67,8 @@ def main():
|
|
67 |
# Load data
|
68 |
df = load_data()
|
69 |
|
70 |
-
# Show
|
71 |
-
st.subheader("
|
72 |
st.write(df.head())
|
73 |
|
74 |
# Prepare data
|
@@ -76,7 +76,7 @@ def main():
|
|
76 |
tokenizer, model = load_tokenizer_and_model(model_name, num_labels=5)
|
77 |
dataset, num_labels = prepare_data(df, tokenizer)
|
78 |
|
79 |
-
# Update the model with the
|
80 |
if num_labels != 5:
|
81 |
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
|
82 |
|
@@ -93,8 +93,8 @@ def main():
|
|
93 |
train_dataset = create_dataset(train_data)
|
94 |
eval_dataset = create_dataset(eval_data)
|
95 |
|
96 |
-
# Show
|
97 |
-
st.subheader("
|
98 |
train_df = pd.DataFrame({
|
99 |
'input_ids': [ids[:10] for ids in train_dataset['input_ids'][:5]], # Show first 10 tokens for brevity
|
100 |
'attention_mask': [mask[:10] for mask in train_dataset['attention_mask'][:5]],
|
@@ -136,20 +136,28 @@ def main():
|
|
136 |
if os.path.exists(model_dir):
|
137 |
files = os.listdir(model_dir)
|
138 |
st.write("Contents of `./finetuned_model` directory:")
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
140 |
else:
|
141 |
st.write("Directory `./finetuned_model` does not exist.")
|
142 |
|
143 |
-
#
|
144 |
st.subheader("Model File Contents")
|
145 |
if st.button('Show Model File Contents'):
|
146 |
if os.path.exists(model_dir):
|
147 |
files = os.listdir(model_dir)
|
148 |
for file in files:
|
149 |
file_path = os.path.join(model_dir, file)
|
150 |
-
|
151 |
st.write(f"**{file}:**")
|
152 |
-
|
|
|
|
|
|
|
153 |
else:
|
154 |
st.write("Directory `./finetuned_model` does not exist.")
|
155 |
|
|
|
67 |
# Load data
|
68 |
df = load_data()
|
69 |
|
70 |
+
# Show data
|
71 |
+
st.subheader("Data from January 2016")
|
72 |
st.write(df.head())
|
73 |
|
74 |
# Prepare data
|
|
|
76 |
tokenizer, model = load_tokenizer_and_model(model_name, num_labels=5)
|
77 |
dataset, num_labels = prepare_data(df, tokenizer)
|
78 |
|
79 |
+
# Update the model with the 5 number of labels based on the data
|
80 |
if num_labels != 5:
|
81 |
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
|
82 |
|
|
|
93 |
train_dataset = create_dataset(train_data)
|
94 |
eval_dataset = create_dataset(eval_data)
|
95 |
|
96 |
+
# Show training data
|
97 |
+
st.subheader("Training Data")
|
98 |
train_df = pd.DataFrame({
|
99 |
'input_ids': [ids[:10] for ids in train_dataset['input_ids'][:5]], # Show first 10 tokens for brevity
|
100 |
'attention_mask': [mask[:10] for mask in train_dataset['attention_mask'][:5]],
|
|
|
136 |
if os.path.exists(model_dir):
|
137 |
files = os.listdir(model_dir)
|
138 |
st.write("Contents of `./finetuned_model` directory:")
|
139 |
+
for file in files:
|
140 |
+
file_path = os.path.join(model_dir, file)
|
141 |
+
if os.path.isfile(file_path):
|
142 |
+
st.write(f"- {file}")
|
143 |
+
else:
|
144 |
+
st.write(f"- [DIR] {file}")
|
145 |
else:
|
146 |
st.write("Directory `./finetuned_model` does not exist.")
|
147 |
|
148 |
+
# show the file content of model files (e.g., config.json, pytorch_model.bin)
|
149 |
st.subheader("Model File Contents")
|
150 |
if st.button('Show Model File Contents'):
|
151 |
if os.path.exists(model_dir):
|
152 |
files = os.listdir(model_dir)
|
153 |
for file in files:
|
154 |
file_path = os.path.join(model_dir, file)
|
155 |
+
if os.path.isfile(file_path):
|
156 |
st.write(f"**{file}:**")
|
157 |
+
with open(file_path, 'r') as f:
|
158 |
+
st.write(f.read())
|
159 |
+
else:
|
160 |
+
st.write(f"**{file}:** [Directory - Contents not displayed]")
|
161 |
else:
|
162 |
st.write("Directory `./finetuned_model` does not exist.")
|
163 |
|