Commit
·
366383d
1
Parent(s):
61074ba
ignore files in data collection
Browse files
project/DataCollectionPipeline.ipynb
CHANGED
@@ -3476,6 +3476,8 @@
|
|
3476 |
" subdirectory is not None and\n",
|
3477 |
" 'http' not in subdirectory and\n",
|
3478 |
" '#' not in subdirectory and\n",
|
|
|
|
|
3479 |
" mongoCollection.find_one({\"link\": newLink}) is None and\n",
|
3480 |
" newLink not in links\n",
|
3481 |
" ):\n",
|
|
|
3476 |
" subdirectory is not None and\n",
|
3477 |
" 'http' not in subdirectory and\n",
|
3478 |
" '#' not in subdirectory and\n",
|
3479 |
+
" '.zip' not in subdirectory and\n",
|
3480 |
+
" '.pdf' not in subdirectory and\n",
|
3481 |
" mongoCollection.find_one({\"link\": newLink}) is None and\n",
|
3482 |
" newLink not in links\n",
|
3483 |
" ):\n",
|