Update app.py
Browse files
app.py
CHANGED
@@ -4,11 +4,9 @@ from selenium.webdriver.common.by import By
|
|
4 |
from selenium.webdriver.chrome.options import Options
|
5 |
from selenium.webdriver.chrome.service import Service
|
6 |
import pandas as pd
|
7 |
-
from selenium.webdriver.common.keys import Keys
|
8 |
from selenium.webdriver.support.ui import WebDriverWait
|
9 |
from selenium.webdriver.support import expected_conditions as EC
|
10 |
import time
|
11 |
-
from datetime import datetime
|
12 |
from webdriver_manager.chrome import ChromeDriverManager
|
13 |
from webdriver_manager.chrome import ChromeType
|
14 |
import transformers
|
@@ -58,7 +56,7 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
58 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text")))
|
59 |
placeholder.text(f"Scrolled {item + 1} times")
|
60 |
progress_bar.progress((item + 1) / 30)
|
61 |
-
time.sleep(
|
62 |
except Exception as e:
|
63 |
st.error(f"Exception during scrolling: {e}")
|
64 |
break
|
@@ -70,13 +68,25 @@ if st.button("Sentiment Analysis", type="secondary"):
|
|
70 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
71 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
72 |
for comment in comments:
|
|
|
73 |
try:
|
74 |
-
#
|
75 |
-
timestamp_element = comment.find_element(By.XPATH, '
|
76 |
timestamp = timestamp_element.text
|
77 |
except Exception:
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
data.append({"Comment": comment.text, "comment_date": timestamp})
|
|
|
80 |
except Exception as e:
|
81 |
st.error(f"Exception during comment extraction: {e}")
|
82 |
driver.quit()
|
|
|
4 |
from selenium.webdriver.chrome.options import Options
|
5 |
from selenium.webdriver.chrome.service import Service
|
6 |
import pandas as pd
|
|
|
7 |
from selenium.webdriver.support.ui import WebDriverWait
|
8 |
from selenium.webdriver.support import expected_conditions as EC
|
9 |
import time
|
|
|
10 |
from webdriver_manager.chrome import ChromeDriverManager
|
11 |
from webdriver_manager.chrome import ChromeType
|
12 |
import transformers
|
|
|
56 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text")))
|
57 |
placeholder.text(f"Scrolled {item + 1} times")
|
58 |
progress_bar.progress((item + 1) / 30)
|
59 |
+
time.sleep(1) #Increased wait time for dynamic loading
|
60 |
except Exception as e:
|
61 |
st.error(f"Exception during scrolling: {e}")
|
62 |
break
|
|
|
68 |
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
|
69 |
comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
|
70 |
for comment in comments:
|
71 |
+
timestamp = "Timestamp not found"
|
72 |
try:
|
73 |
+
# Try a more direct XPath
|
74 |
+
timestamp_element = comment.find_element(By.XPATH, './/yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
|
75 |
timestamp = timestamp_element.text
|
76 |
except Exception:
|
77 |
+
try:
|
78 |
+
# Try a more general XPath
|
79 |
+
timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[contains(@class, "time-text")]')
|
80 |
+
timestamp = timestamp_element.text
|
81 |
+
except Exception:
|
82 |
+
try:
|
83 |
+
#try grabbing the a tag.
|
84 |
+
timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//a[@id="time"]')
|
85 |
+
timestamp = timestamp_element.text
|
86 |
+
except Exception as inner_e:
|
87 |
+
print(f"Timestamp not found for comment: {comment.text}. Error: {inner_e}") #debug
|
88 |
data.append({"Comment": comment.text, "comment_date": timestamp})
|
89 |
+
|
90 |
except Exception as e:
|
91 |
st.error(f"Exception during comment extraction: {e}")
|
92 |
driver.quit()
|