Spaces:
Runtime error
Runtime error
File size: 2,517 Bytes
81446ef bce5fb4 81446ef bfa64b6 81446ef bfa64b6 b59b230 cfbeebd 81446ef 7ea96d2 ae44a4c 7057a87 4570d11 b9c3b16 81446ef eace371 81446ef b9c3b16 81446ef b9c3b16 81446ef b9c3b16 eda711d f26be33 b59b230 2a14cb0 b59b230 ce0fb45 2a14cb0 eda711d b9b0b35 eda711d 324fe53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import pandas as pd
import streamlit as st
from keybert import KeyBERT
import yake
from keyphrase_vectorizers import KeyphraseCountVectorizer
@st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=True)
def load_model():
model = KeyBERT("AI-Growth-Lab/PatentSBERTa")
return model
model = load_model()
st.title("Patent Text Extractor")
placeholder = st.empty()
text = placeholder.text_area("Paste or write text", height=300)
button = st.button("Extract Keywords")
#top_n = st.sidebar.slider("Select a number of keywords", 1, 10, 50,20)
#min_ngram = st.sidebar.number_input("Minimum number of words in each keyword", 1)
#max_ngram = st.sidebar.number_input("Maximum number of words in each keyword", 3)
#st.sidebar.code(f"ngram_range=({min_ngram}, {max_ngram})")
#params = {"docs": text_input, "top_n": top_n, "stop_words": 'english',"vectorizer":KeyphraseCountVectorizer()}
#add_diversity = st.sidebar.checkbox("Adjust diversity of keywords")
#if add_diversity:
#method = st.sidebar.selectbox("Select a method", ("Max Sum Similarity", "Maximal Marginal Relevance"))
#if method == "Max Sum Similarity":
#nr_candidates = st.sidebar.slider("nr_candidates", 20, 50, 20, 2)
#params["use_maxsum"] = True
#params["nr_candidates"] = nr_candidates
#elif method == "Maximal Marginal Relevance":
#diversity = st.sidebar.slider("diversity", 0.1, 1.0, 0.6, 0.01)
#params["use_mmr"] = True
#params["diversity"] = diversity
#kw_extractor = yake.KeywordExtractor(top=50)
#candidates = kw_extractor.extract_keywords(text_input)
#keyphrases = [candidate[0] for candidate in candidates]
#kw_model = KeyBERT(model="google/bigbird-pegasus-large-bigpatent")
from keybert import KeyBERT
kw_model = KeyBERT(model='AI-Growth-Lab/PatentSBERTa')
kw_extractor = yake.KeywordExtractor(top=50)
candidates = kw_extractor.extract_keywords(text)
candidates = [candidate[0] for candidate in candidates]
from keyphrase_vectorizers import KeyphraseCountVectorizer
keywords=kw_model.extract_keywords(text,candidates, keyphrase_ngram_range=(1, 3),
top_n=50,stop_words='english')
#keywords=predict_fn(text, model)
#if keywords != []:
#keywords = model.extract_keywords(text_input,keyphrases, keyphrase_ngram_range=(1, 3),
#top_n=50,stop_words='english',vectorizer=KeyphraseCountVectorizer())
if keywords != []:
st.info("Extracted keywords")
keywords = pd.DataFrame(keywords, columns=["Keyword", "Score"])
st.table(keywords) |