Kevin Fink
commited on
Commit
·
f1ce0f3
1
Parent(s):
42338b1
deve
Browse files
app.py
CHANGED
|
@@ -166,7 +166,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
| 166 |
|
| 167 |
elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
|
| 168 |
dataset = load_dataset(dataset_name.strip())
|
| 169 |
-
dataset['train'] = dataset['train'].select(range(15000))
|
| 170 |
train_size = len(dataset['train'])
|
| 171 |
third_size = train_size // 3
|
| 172 |
del dataset['test']
|
|
@@ -183,9 +182,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
| 183 |
|
| 184 |
|
| 185 |
if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
|
| 186 |
-
dataset = load_dataset(dataset_name.strip())
|
| 187 |
-
dataset['train'] = dataset['train'].select(range(15000))
|
| 188 |
-
dataset['validation'] = dataset['validation'].select(range(2000))
|
| 189 |
train_size = len(dataset['train'])
|
| 190 |
third_size = train_size // 3
|
| 191 |
second_third = dataset['train'].select(range(third_size, third_size*2))
|
|
@@ -201,7 +198,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
| 201 |
except Exception as e:
|
| 202 |
print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
|
| 203 |
dataset = load_dataset(dataset_name.strip())
|
| 204 |
-
dataset['train'] = dataset['train'].select(range(15000))
|
| 205 |
train_size = len(dataset['train'])
|
| 206 |
third_size = train_size // 3
|
| 207 |
# Tokenize the dataset
|
|
|
|
| 166 |
|
| 167 |
elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
|
| 168 |
dataset = load_dataset(dataset_name.strip())
|
|
|
|
| 169 |
train_size = len(dataset['train'])
|
| 170 |
third_size = train_size // 3
|
| 171 |
del dataset['test']
|
|
|
|
| 182 |
|
| 183 |
|
| 184 |
if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
|
| 185 |
+
dataset = load_dataset(dataset_name.strip())
|
|
|
|
|
|
|
| 186 |
train_size = len(dataset['train'])
|
| 187 |
third_size = train_size // 3
|
| 188 |
second_third = dataset['train'].select(range(third_size, third_size*2))
|
|
|
|
| 198 |
except Exception as e:
|
| 199 |
print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
|
| 200 |
dataset = load_dataset(dataset_name.strip())
|
|
|
|
| 201 |
train_size = len(dataset['train'])
|
| 202 |
third_size = train_size // 3
|
| 203 |
# Tokenize the dataset
|