nlpblogs commited on
Commit
a77c47d
·
verified ·
1 Parent(s): 6fafb78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -4,11 +4,9 @@ from selenium.webdriver.common.by import By
4
  from selenium.webdriver.chrome.options import Options
5
  from selenium.webdriver.chrome.service import Service
6
  import pandas as pd
7
- from selenium.webdriver.common.keys import Keys
8
  from selenium.webdriver.support.ui import WebDriverWait
9
  from selenium.webdriver.support import expected_conditions as EC
10
  import time
11
- from datetime import datetime
12
  from webdriver_manager.chrome import ChromeDriverManager
13
  from webdriver_manager.chrome import ChromeType
14
  import transformers
@@ -58,7 +56,7 @@ if st.button("Sentiment Analysis", type="secondary"):
58
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text")))
59
  placeholder.text(f"Scrolled {item + 1} times")
60
  progress_bar.progress((item + 1) / 30)
61
- time.sleep(0.5)
62
  except Exception as e:
63
  st.error(f"Exception during scrolling: {e}")
64
  break
@@ -70,13 +68,25 @@ if st.button("Sentiment Analysis", type="secondary"):
70
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
71
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
72
  for comment in comments:
 
73
  try:
74
- # Robust XPath to find the timestamp
75
- timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
76
  timestamp = timestamp_element.text
77
  except Exception:
78
- timestamp = "Timestamp not found"
 
 
 
 
 
 
 
 
 
 
79
  data.append({"Comment": comment.text, "comment_date": timestamp})
 
80
  except Exception as e:
81
  st.error(f"Exception during comment extraction: {e}")
82
  driver.quit()
 
4
  from selenium.webdriver.chrome.options import Options
5
  from selenium.webdriver.chrome.service import Service
6
  import pandas as pd
 
7
  from selenium.webdriver.support.ui import WebDriverWait
8
  from selenium.webdriver.support import expected_conditions as EC
9
  import time
 
10
  from webdriver_manager.chrome import ChromeDriverManager
11
  from webdriver_manager.chrome import ChromeType
12
  import transformers
 
56
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text")))
57
  placeholder.text(f"Scrolled {item + 1} times")
58
  progress_bar.progress((item + 1) / 30)
59
+ time.sleep(1) #Increased wait time for dynamic loading
60
  except Exception as e:
61
  st.error(f"Exception during scrolling: {e}")
62
  break
 
68
  wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
69
  comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
70
  for comment in comments:
71
+ timestamp = "Timestamp not found"
72
  try:
73
+ # Try a more direct XPath
74
+ timestamp_element = comment.find_element(By.XPATH, './/yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
75
  timestamp = timestamp_element.text
76
  except Exception:
77
+ try:
78
+ # Try a more general XPath
79
+ timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[contains(@class, "time-text")]')
80
+ timestamp = timestamp_element.text
81
+ except Exception:
82
+ try:
83
+ #try grabbing the a tag.
84
+ timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//a[@id="time"]')
85
+ timestamp = timestamp_element.text
86
+ except Exception as inner_e:
87
+ print(f"Timestamp not found for comment: {comment.text}. Error: {inner_e}") #debug
88
  data.append({"Comment": comment.text, "comment_date": timestamp})
89
+
90
  except Exception as e:
91
  st.error(f"Exception during comment extraction: {e}")
92
  driver.quit()