Spaces:
Runtime error
Runtime error
File size: 6,091 Bytes
5b744f4 b876064 5b744f4 b876064 5b744f4 b876064 5b744f4 b876064 5b744f4 b876064 5b744f4 b876064 8f0ec33 b876064 5b744f4 b876064 5b744f4 8f0ec33 5b744f4 b876064 5b744f4 b876064 5b744f4 b876064 5b744f4 b876064 5b744f4 8f0ec33 b876064 8f0ec33 b876064 8f0ec33 b876064 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import streamlit as st
import requests
import nltk
from transformers import pipeline
from rake_nltk import Rake
from nltk.corpus import stopwords
from fuzzywuzzy import fuzz
import openai
import os
from dotenv import load_dotenv
# Load environment variables for Llama 3
load_dotenv()
# Title of the app
st.title("Sentiment Analysis Comparison: Transformers vs Llama 3")
# Define the options for the dropdown menu, selecting a remote txt file already created to analyze the text
options = ['None', 'Appreciation Letter', 'Regret Letter', 'Kindness Tale', 'Lost Melody Tale', 'Twitter Example 1', 'Twitter Example 2']
# Create a dropdown menu to select options
selected_option = st.selectbox("Select a preset option", options)
# Define URLs for different options
urls = {
'Appreciation Letter': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Appreciation_Letter.txt",
'Regret Letter': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Regret_Letter.txt",
'Kindness Tale': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Kindness_Tale.txt",
'Lost Melody Tale': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Lost_Melody_Tale.txt",
'Twitter Example 1': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Twitter_Example_1.txt",
'Twitter Example 2': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Twitter_Example_2.txt"
}
# Function to fetch text content based on selected option
def fetch_text_content(selected_option):
return requests.get(urls[selected_option]).text if selected_option in urls else ""
# Fetch text content based on selected option
text = fetch_text_content(selected_option)
# Display text content in a text area
text = st.text_area('Enter the text to analyze', text)
# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
# Initialize sentiment, summarization, and keyword extraction pipelines for Transformers
pipe_sent = pipeline('sentiment-analysis')
pipe_summ = pipeline("summarization", model="facebook/bart-large-cnn")
# Llama 3 initialization
llama_api_key = os.getenv('HFSecret')
llama_base_url = "https://api-inference.huggingface.co/v1"
llama_repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
# Function to use Llama 3 for sentiment analysis, summarization, and keyword extraction
def analyze_with_llama(text):
headers = {
"Authorization": f"Bearer {llama_api_key}"
}
data = {
"inputs": text,
"parameters": {
"max_new_tokens": 200
}
}
# Perform the request
response = requests.post(f"{llama_base_url}/models/{llama_repo_id}", headers=headers, json=data)
return response.json()
# Function to extract keywords using RAKE and remove duplicates
def extract_keywords(text):
r = Rake()
r.extract_keywords_from_text(text)
phrases_with_scores = r.get_ranked_phrases_with_scores()
stop_words = set(stopwords.words('english'))
keywords = [(score, phrase) for score, phrase in phrases_with_scores if phrase.lower() not in stop_words]
keywords.sort(key=lambda x: x[0], reverse=True)
unique_keywords = []
seen_phrases = set()
for score, phrase in keywords:
if phrase not in seen_phrases:
similar_phrases = [seen_phrase for seen_phrase in seen_phrases if fuzz.ratio(phrase, seen_phrase) > 70]
if similar_phrases:
merged_phrase = max([phrase] + similar_phrases, key=len)
unique_keywords.append((score, merged_phrase))
else:
unique_keywords.append((score, phrase))
seen_phrases.add(phrase)
return unique_keywords[:10]
# Create two columns
col1, col2 = st.columns(2)
# Transformer-based analysis in the first column
with col1:
st.header("Transformer-based Analysis")
if st.button("Analyze with Transformers"):
with st.spinner("Analyzing with Transformers..."):
# Sentiment analysis
out_sentiment = pipe_sent(text)
sentiment_score = out_sentiment[0]['score']
sentiment_label = out_sentiment[0]['label']
sentiment_emoji = 'π' if sentiment_label == 'POSITIVE' else 'π'
sentiment_text = f"Sentiment Score: {sentiment_score}, Sentiment Label: {sentiment_label.capitalize()} {sentiment_emoji}"
with st.expander("Sentiment Analysis (Transformers)"):
st.write(sentiment_text)
# Summarization
out_summ = pipe_summ(text)
summarized_text = out_summ[0]['summary_text']
with st.expander("Summarization (Transformers)"):
st.write(summarized_text)
# Keyword extraction
keywords = extract_keywords(text)
keyword_list = [keyword[1] for keyword in keywords]
with st.expander("Keywords (Transformers)"):
st.write(keyword_list)
# Llama 3-based analysis in the second column
with col2:
st.header("Llama 3-based Analysis")
if st.button("Analyze with Llama 3"):
with st.spinner("Analyzing with Llama 3..."):
llama_response = analyze_with_llama(text)
if llama_response:
# Assuming the response returns in the same format, adjust if needed
sentiment_text = llama_response.get('sentiment_analysis', 'No sentiment detected')
summarized_text = llama_response.get('summarization', 'No summary available')
keywords = llama_response.get('keywords', 'No keywords available')
with st.expander("Sentiment Analysis (Llama 3)"):
st.write(sentiment_text)
with st.expander("Summarization (Llama 3)"):
st.write(summarized_text)
with st.expander("Keywords (Llama 3)"):
st.write(keywords)
|