Spaces:

talktorhutika
/

patentdataset-finetune

Sleeping

App Files Files Community

talktorhutika commited on Jul 26, 2024

Commit

942a002

verified ·

1 Parent(s): e816d24

Update finetune3.py

Browse files

Files changed (1) hide show

finetune3.py +17 -9

finetune3.py CHANGED Viewed

@@ -67,8 +67,8 @@ def main():
     # Load data
     df = load_data()
-    # Show sample data
-    st.subheader("Sample Data from January 2016")
     st.write(df.head())
     # Prepare data
@@ -76,7 +76,7 @@ def main():
     tokenizer, model = load_tokenizer_and_model(model_name, num_labels=5)
     dataset, num_labels = prepare_data(df, tokenizer)
-    # Update the model with the correct number of labels based on the data
     if num_labels != 5:
         model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
@@ -93,8 +93,8 @@ def main():
     train_dataset = create_dataset(train_data)
     eval_dataset = create_dataset(eval_data)
-    # Show a sample of the training data
-    st.subheader("Sample of Training Data")
     train_df = pd.DataFrame({
         'input_ids': [ids[:10] for ids in train_dataset['input_ids'][:5]],  # Show first 10 tokens for brevity
         'attention_mask': [mask[:10] for mask in train_dataset['attention_mask'][:5]],
@@ -136,20 +136,28 @@ def main():
         if os.path.exists(model_dir):
             files = os.listdir(model_dir)
             st.write("Contents of `./finetuned_model` directory:")
-            st.write(files)
         else:
             st.write("Directory `./finetuned_model` does not exist.")
-    # Optionally, show the file content of model files (e.g., config.json, pytorch_model.bin)
     st.subheader("Model File Contents")
     if st.button('Show Model File Contents'):
         if os.path.exists(model_dir):
             files = os.listdir(model_dir)
             for file in files:
                 file_path = os.path.join(model_dir, file)
-                with open(file_path, 'r') as f:
                     st.write(f"**{file}:**")
-                    st.write(f.read())
         else:
             st.write("Directory `./finetuned_model` does not exist.")

     # Load data
     df = load_data()
+    # Show data
+    st.subheader("Data from January 2016")
     st.write(df.head())
     # Prepare data
     tokenizer, model = load_tokenizer_and_model(model_name, num_labels=5)
     dataset, num_labels = prepare_data(df, tokenizer)
+    # Update the model with the 5 number of labels based on the data
     if num_labels != 5:
         model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
     train_dataset = create_dataset(train_data)
     eval_dataset = create_dataset(eval_data)
+    # Show training data
+    st.subheader("Training Data")
     train_df = pd.DataFrame({
         'input_ids': [ids[:10] for ids in train_dataset['input_ids'][:5]],  # Show first 10 tokens for brevity
         'attention_mask': [mask[:10] for mask in train_dataset['attention_mask'][:5]],
         if os.path.exists(model_dir):
             files = os.listdir(model_dir)
             st.write("Contents of `./finetuned_model` directory:")
+            for file in files:
+                file_path = os.path.join(model_dir, file)
+                if os.path.isfile(file_path):
+                    st.write(f"- {file}")
+                else:
+                    st.write(f"- [DIR] {file}")
         else:
             st.write("Directory `./finetuned_model` does not exist.")
+    # show the file content of model files (e.g., config.json, pytorch_model.bin)
     st.subheader("Model File Contents")
     if st.button('Show Model File Contents'):
         if os.path.exists(model_dir):
             files = os.listdir(model_dir)
             for file in files:
                 file_path = os.path.join(model_dir, file)
+                if os.path.isfile(file_path):
                     st.write(f"**{file}:**")
+                    with open(file_path, 'r') as f:
+                        st.write(f.read())
+                else:
+                    st.write(f"**{file}:** [Directory - Contents not displayed]")
         else:
             st.write("Directory `./finetuned_model` does not exist.")