Spaces:

nlpblogs
/

youtube-sentiment-analysis-app

Running

App Files Files Community

nlpblogs commited on Apr 9

Commit

a77c47d

verified ·

1 Parent(s): 6fafb78

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -6

app.py CHANGED Viewed

@@ -4,11 +4,9 @@ from selenium.webdriver.common.by import By
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service
 import pandas as pd
-from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 import time
-from datetime import datetime
 from webdriver_manager.chrome import ChromeDriverManager
 from webdriver_manager.chrome import ChromeType
 import transformers
@@ -58,7 +56,7 @@ if st.button("Sentiment Analysis", type="secondary"):
                         wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text")))
                         placeholder.text(f"Scrolled {item + 1} times")
                         progress_bar.progress((item + 1) / 30)
-                        time.sleep(0.5)
                     except Exception as e:
                         st.error(f"Exception during scrolling: {e}")
                         break
@@ -70,13 +68,25 @@ if st.button("Sentiment Analysis", type="secondary"):
                     wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
                     for comment in comments:
                         try:
-                            # Robust XPath to find the timestamp
-                            timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
                             timestamp = timestamp_element.text
                         except Exception:
-                            timestamp = "Timestamp not found"
                         data.append({"Comment": comment.text, "comment_date": timestamp})
                 except Exception as e:
                     st.error(f"Exception during comment extraction: {e}")
                 driver.quit()

 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service
 import pandas as pd
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 import time
 from webdriver_manager.chrome import ChromeDriverManager
 from webdriver_manager.chrome import ChromeType
 import transformers
                         wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#content #content-text")))
                         placeholder.text(f"Scrolled {item + 1} times")
                         progress_bar.progress((item + 1) / 30)
+                        time.sleep(1) #Increased wait time for dynamic loading
                     except Exception as e:
                         st.error(f"Exception during scrolling: {e}")
                         break
                     wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contents #contents")))
                     comments = driver.find_elements(By.CSS_SELECTOR, "#content #content-text")
                     for comment in comments:
+                        timestamp = "Timestamp not found"
                         try:
+                            # Try a more direct XPath
+                            timestamp_element = comment.find_element(By.XPATH, './/yt-formatted-string[@class="published-time-text style-scope ytd-comment-renderer"]')
                             timestamp = timestamp_element.text
                         except Exception:
+                            try:
+                                # Try a more general XPath
+                                timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//yt-formatted-string[contains(@class, "time-text")]')
+                                timestamp = timestamp_element.text
+                            except Exception:
+                                try:
+                                    #try grabbing the a tag.
+                                    timestamp_element = comment.find_element(By.XPATH, './ancestor::ytd-comment-renderer//a[@id="time"]')
+                                    timestamp = timestamp_element.text
+                                except Exception as inner_e:
+                                    print(f"Timestamp not found for comment: {comment.text}. Error: {inner_e}") #debug
                         data.append({"Comment": comment.text, "comment_date": timestamp})
                 except Exception as e:
                     st.error(f"Exception during comment extraction: {e}")
                 driver.quit()