duwing commited on
Commit
fec291e
ยท
verified ยท
1 Parent(s): 44f78be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -47
app.py CHANGED
@@ -5,13 +5,7 @@ import pandas as pd
5
  from transformers import *
6
  from tqdm import tqdm
7
  from tensorflow.python.client import device_lib
8
-
9
- from selenium import webdriver
10
- from selenium.webdriver.chrome.service import Service
11
- from selenium.webdriver.chrome.options import Options
12
- from selenium.webdriver.common.by import By
13
- from selenium.webdriver.support.ui import WebDriverWait
14
- from selenium.webdriver.support import expected_conditions as EC
15
  from bs4 import BeautifulSoup
16
  import time
17
 
@@ -68,55 +62,41 @@ def movie_evaluation_predict(sentence):
68
  elif predict_answer == 1:
69
  st.write("(๊ธ์ • ํ™•๋ฅ  : %.2f) ๊ธ์ •์ ์ธ ์˜ํ™” ํ‰๊ฐ€์ž…๋‹ˆ๋‹ค." % predict_value)
70
 
71
- def setup_driver():
72
- chrome_options = Options()
73
- chrome_options.add_argument("--headless") # ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์‹คํ–‰
74
- chrome_options.add_argument("--no-sandbox")
75
-
76
- driver = webdriver.Chrome(options=chrome_options)
77
- return driver
78
-
79
  def scrape_content(url):
80
- driver = setup_driver()
81
- try:
82
- driver.get(url)
83
- # ํŽ˜์ด์ง€ ๋กœ๋”ฉ ๋Œ€๊ธฐ
84
- time.sleep(3)
85
-
86
- # ๋ณธ๋ฌธ ์ถ”์ถœ
87
- soup = BeautifulSoup(driver.page_source, 'html.parser')
88
- content = soup.find('article') # ๋ณธ๋ฌธ ํƒœ๊ทธ์— ๋งž๊ฒŒ ์ˆ˜์ •
89
-
90
- # ๋Œ“๊ธ€ ์ถ”์ถœ
91
- comments = soup.find_all('span', class_='u_cbox_contents') # ๋Œ“๊ธ€ ํƒœ๊ทธ์— ๋งž๊ฒŒ ์ˆ˜์ •
92
-
93
- return {
94
- 'content': content.text if content else "๋ณธ๋ฌธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.",
95
- 'comments': [comment.text for comment in comments]
96
- }
97
- finally:
98
- driver.quit()
99
 
100
 
101
  def main():
102
  sentiment_model = create_sentiment_bert()
103
 
 
 
 
104
  url = st.text_input("URL์„ ์ž…๋ ฅํ•˜์„ธ์š”")
105
 
106
- if st.button("ํฌ๋กค๋ง ์‹œ์ž‘"):
107
  if url:
108
- with st.spinner("ํฌ๋กค๋ง ์ค‘..."):
109
- result = scrape_content(url)
110
-
111
- st.subheader("๋ณธ๋ฌธ")
112
- st.write(result['content'])
113
-
114
- st.subheader("๋Œ“๊ธ€")
115
- for idx, comment in enumerate(result['comments'], 1):
116
- st.write(f"{idx}. {comment}")
117
- else:
118
- st.error("URL์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”")
119
-
120
 
121
  '''
122
  test = st.form('test')
 
5
  from transformers import *
6
  from tqdm import tqdm
7
  from tensorflow.python.client import device_lib
8
+ import requests
 
 
 
 
 
 
9
  from bs4 import BeautifulSoup
10
  import time
11
 
 
62
  elif predict_answer == 1:
63
  st.write("(๊ธ์ • ํ™•๋ฅ  : %.2f) ๊ธ์ •์ ์ธ ์˜ํ™” ํ‰๊ฐ€์ž…๋‹ˆ๋‹ค." % predict_value)
64
 
 
 
 
 
 
 
 
 
65
  def scrape_content(url):
66
+ # ์›น ํŽ˜์ด์ง€ ์š”์ฒญ
67
+ response = requests.get(url)
68
+ soup = BeautifulSoup(response.content, 'html.parser')
69
+
70
+ # ๋ณธ๋ฌธ ์ถ”์ถœ
71
+ article_body = soup.find_all('div', class_='newsct_article')
72
+ content = ' '.join([p.get_text() for p in article_body])
73
+
74
+ # ๋Œ“๊ธ€ ์ถ”์ถœ (์˜ˆ์‹œ)
75
+ comments = soup.find_all('span', class_='u_cbox_contents')
76
+ comment_list = [comment.get_text() for comment in comments]
77
+
78
+ return content, comment_list
 
 
 
 
 
 
79
 
80
 
81
  def main():
82
  sentiment_model = create_sentiment_bert()
83
 
84
+ st.title("์›น ์ปจํ…์ธ  ์Šคํฌ๋ž˜ํผ")
85
+
86
+ # URL ์ž…๋ ฅ ๋ฐ›๊ธฐ
87
  url = st.text_input("URL์„ ์ž…๋ ฅํ•˜์„ธ์š”")
88
 
89
+ if st.button("์Šคํฌ๋žฉ ์‹œ์ž‘"):
90
  if url:
91
+ content, comments = scrape_content(url)
92
+
93
+ # ๊ฒฐ๊ณผ ํ‘œ์‹œ
94
+ st.subheader("๋ณธ๋ฌธ ๋‚ด์šฉ")
95
+ st.write(content)
96
+
97
+ st.subheader("๋Œ“๊ธ€")
98
+ for comment in comments:
99
+ st.write(comment)
 
 
 
100
 
101
  '''
102
  test = st.form('test')