File size: 6,091 Bytes
5b744f4
 
 
 
 
 
 
b876064
 
 
5b744f4
b876064
 
5b744f4
b876064
 
5b744f4
b876064
 
5b744f4
 
 
 
 
b876064
 
 
 
 
 
 
 
5b744f4
 
 
b876064
8f0ec33
 
b876064
5b744f4
b876064
 
 
 
 
 
 
 
5b744f4
8f0ec33
5b744f4
b876064
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b744f4
 
 
 
 
b876064
5b744f4
 
 
 
 
 
 
 
 
 
 
 
 
 
b876064
 
5b744f4
b876064
 
 
 
 
 
5b744f4
 
 
 
8f0ec33
b876064
 
 
 
 
8f0ec33
 
b876064
 
 
 
 
8f0ec33
 
b876064
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import streamlit as st
import requests
import nltk
from transformers import pipeline
from rake_nltk import Rake
from nltk.corpus import stopwords
from fuzzywuzzy import fuzz
import openai
import os
from dotenv import load_dotenv

# Load environment variables for Llama 3
load_dotenv()

# Title of the app
st.title("Sentiment Analysis Comparison: Transformers vs Llama 3")

# Define the options for the dropdown menu, selecting a remote txt file already created to analyze the text
options = ['None', 'Appreciation Letter', 'Regret Letter', 'Kindness Tale', 'Lost Melody Tale', 'Twitter Example 1', 'Twitter Example 2']

# Create a dropdown menu to select options
selected_option = st.selectbox("Select a preset option", options)

# Define URLs for different options
urls = {
    'Appreciation Letter': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Appreciation_Letter.txt",
    'Regret Letter': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Regret_Letter.txt",
    'Kindness Tale': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Kindness_Tale.txt",
    'Lost Melody Tale': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Lost_Melody_Tale.txt",
    'Twitter Example 1': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Twitter_Example_1.txt",
    'Twitter Example 2': "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Twitter_Example_2.txt"
}

# Function to fetch text content based on selected option
def fetch_text_content(selected_option):
    return requests.get(urls[selected_option]).text if selected_option in urls else ""

# Fetch text content based on selected option
text = fetch_text_content(selected_option)

# Display text content in a text area
text = st.text_area('Enter the text to analyze', text)

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Initialize sentiment, summarization, and keyword extraction pipelines for Transformers
pipe_sent = pipeline('sentiment-analysis')
pipe_summ = pipeline("summarization", model="facebook/bart-large-cnn")

# Llama 3 initialization
llama_api_key = os.getenv('HFSecret')
llama_base_url = "https://api-inference.huggingface.co/v1"
llama_repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"

# Function to use Llama 3 for sentiment analysis, summarization, and keyword extraction
def analyze_with_llama(text):
    headers = {
        "Authorization": f"Bearer {llama_api_key}"
    }
    data = {
        "inputs": text,
        "parameters": {
            "max_new_tokens": 200
        }
    }
    # Perform the request
    response = requests.post(f"{llama_base_url}/models/{llama_repo_id}", headers=headers, json=data)
    return response.json()

# Function to extract keywords using RAKE and remove duplicates
def extract_keywords(text):
    r = Rake()
    r.extract_keywords_from_text(text)
    phrases_with_scores = r.get_ranked_phrases_with_scores()
    stop_words = set(stopwords.words('english'))
    keywords = [(score, phrase) for score, phrase in phrases_with_scores if phrase.lower() not in stop_words]
    keywords.sort(key=lambda x: x[0], reverse=True)
    unique_keywords = []
    seen_phrases = set()
    for score, phrase in keywords:
        if phrase not in seen_phrases:
            similar_phrases = [seen_phrase for seen_phrase in seen_phrases if fuzz.ratio(phrase, seen_phrase) > 70]
            if similar_phrases:
                merged_phrase = max([phrase] + similar_phrases, key=len)
                unique_keywords.append((score, merged_phrase))
            else:
                unique_keywords.append((score, phrase))
            seen_phrases.add(phrase)
    return unique_keywords[:10]

# Create two columns
col1, col2 = st.columns(2)

# Transformer-based analysis in the first column
with col1:
    st.header("Transformer-based Analysis")
    if st.button("Analyze with Transformers"):
        with st.spinner("Analyzing with Transformers..."):
            # Sentiment analysis
            out_sentiment = pipe_sent(text)
            sentiment_score = out_sentiment[0]['score']
            sentiment_label = out_sentiment[0]['label']
            sentiment_emoji = '😊' if sentiment_label == 'POSITIVE' else '😞'
            sentiment_text = f"Sentiment Score: {sentiment_score}, Sentiment Label: {sentiment_label.capitalize()} {sentiment_emoji}"
            
            with st.expander("Sentiment Analysis (Transformers)"):
                st.write(sentiment_text)
            
            # Summarization
            out_summ = pipe_summ(text)
            summarized_text = out_summ[0]['summary_text']
            
            with st.expander("Summarization (Transformers)"):
                st.write(summarized_text)
            
            # Keyword extraction
            keywords = extract_keywords(text)
            keyword_list = [keyword[1] for keyword in keywords]
            
            with st.expander("Keywords (Transformers)"):
                st.write(keyword_list)

# Llama 3-based analysis in the second column
with col2:
    st.header("Llama 3-based Analysis")
    if st.button("Analyze with Llama 3"):
        with st.spinner("Analyzing with Llama 3..."):
            llama_response = analyze_with_llama(text)
            
            if llama_response:
                # Assuming the response returns in the same format, adjust if needed
                sentiment_text = llama_response.get('sentiment_analysis', 'No sentiment detected')
                summarized_text = llama_response.get('summarization', 'No summary available')
                keywords = llama_response.get('keywords', 'No keywords available')
                
                with st.expander("Sentiment Analysis (Llama 3)"):
                    st.write(sentiment_text)
                
                with st.expander("Summarization (Llama 3)"):
                    st.write(summarized_text)
                
                with st.expander("Keywords (Llama 3)"):
                    st.write(keywords)