JaishnaCodz commited on
Commit
2c83941
Β·
verified Β·
1 Parent(s): 16e02f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from newspaper import Article
3
  from transformers import pipeline
4
  import pytesseract
5
  from PIL import Image
@@ -20,14 +20,13 @@ def extract_text_from_image_url(img_url):
20
  return f"❌ OCR Error: {e}"
21
 
22
  # Extract blog
23
- def extract_text_from_url(url):
24
- try:
25
- article = Article(url)
26
- article.download()
27
- article.parse()
28
- return article.text
29
- except Exception as e:
30
- return f"❌ Blog Error: {e}"
31
 
32
  # Review line-by-line
33
  def review_lines(text):
 
1
  import gradio as gr
2
+ import trafilatura
3
  from transformers import pipeline
4
  import pytesseract
5
  from PIL import Image
 
20
  return f"❌ OCR Error: {e}"
21
 
22
  # Extract blog
23
+ def extract_text_from_url(url):
24
+ downloaded = trafilatura.fetch_url(url)
25
+ if downloaded:
26
+ return trafilatura.extract(downloaded)
27
+ else:
28
+ return "❌ Blog Error: Could not fetch content from the URL."
29
+
 
30
 
31
  # Review line-by-line
32
  def review_lines(text):