File size: 7,263 Bytes
d4ac1c4
 
c837e02
 
d4ac1c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c837e02
 
 
 
 
 
 
 
d4ac1c4
 
 
 
c837e02
 
 
 
 
d4ac1c4
 
 
 
 
 
 
c837e02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4ac1c4
 
 
 
 
 
c837e02
d4ac1c4
 
 
 
 
 
 
 
 
c837e02
 
d4ac1c4
c837e02
 
 
 
 
 
d4ac1c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c837e02
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import streamlit as st
import pandas as pd
import json
from tools import sourceformat as sf

#===config===
st.set_page_config(
    page_title="Coconut",
    page_icon="πŸ₯₯",
    layout="wide",
    initial_sidebar_state="collapsed"
)

hide_streamlit_style = """
            <style>
            #MainMenu 
            {visibility: hidden;}
            footer {visibility: hidden;}
            [data-testid="collapsedControl"] {display: none}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True)

st.page_link("https://www.coconut-libtool.com/the-app", label="Go to app", icon="πŸ₯₯")

def reset_data():
     st.cache_data.clear()

#===check filetype===
@st.cache_data(ttl=3600)
def get_ext(extype):
    extype = uploaded_file.name
    return extype
     
#===upload===
@st.cache_data(ttl=3600)
def upload(extype):
    keywords = pd.read_csv(uploaded_file)
    if "dimensions" in uploaded_file.name.lower():
        keywords = sf.dim(keywords)
        col_dict = {'MeSH terms': 'Keywords',
        'PubYear': 'Year',
        'Times cited': 'Cited by',
        'Publication Type': 'Document Type'
        }
        keywords.rename(columns=col_dict, inplace=True)
    return keywords

@st.cache_data(ttl=3600)
def conv_txt(extype):
    if("PMID" in (uploaded_file.read()).decode()):
        uploaded_file.seek(0)
        papers = sf.medline(uploaded_file)
        print(papers)
        return papers
    col_dict = {'TI': 'Title',
            'SO': 'Source title',
            'DE': 'Author Keywords',
            'DT': 'Document Type',
            'AB': 'Abstract',
            'TC': 'Cited by',
            'PY': 'Year',
            'ID': 'Keywords Plus',
            'rights_date_used': 'Year'}
    uploaded_file.seek(0)
    papers = pd.read_csv(uploaded_file, sep='\t')
    if("htid" in papers.columns):
        papers = sf.htrc(papers)
    papers.rename(columns=col_dict, inplace=True)
    print(papers)
    return papers


@st.cache_data(ttl=3600)
def conv_json(extype):
    col_dict={'title': 'title',
    'rights_date_used': 'Year',
    'content_provider_code':'Source title'
    }

    data = json.load(uploaded_file)
    hathifile = data['gathers']
    keywords = pd.DataFrame.from_records(hathifile)

    keywords = sf.htrc(keywords)
    keywords['Cited by'] = keywords.groupby(['Keywords'])['Keywords'].transform('size')
    keywords.rename(columns=col_dict,inplace=True)
    return keywords

@st.cache_data(ttl=3600)
def conv_pub(extype):
    if (get_ext(extype)).endswith('.tar.gz'):
        bytedata = extype.read()
        keywords = sf.readPub(bytedata)
    elif (get_ext(extype)).endswith('.xml'):
        bytedata = extype.read()
        keywords = sf.readxml(bytedata)
    return keywords

st.header('File Checker', anchor=False)
st.subheader('Put your file here...', anchor=False)

#===read data===
uploaded_file = st.file_uploader('', type=['csv','txt','json', 'tar.gz', 'xml'], on_change=reset_data)

if uploaded_file is not None:
    extype = get_ext(uploaded_file)
    if extype.endswith('.csv'):
        data = upload(extype) 
                  
    elif extype.endswith('.txt'):
        data = conv_txt(extype)

    elif extype.endswith('.json'):
        data = conv_json(extype)
    
    elif extype.endswith('.tar.gz') or extype.endswith('.xml'):
        data = conv_pub(uploaded_file)


    col1, col2, col3 = st.columns(3)
  
    with col1:
        #===check keywords===  
        keycheck = list(data.columns)
        keycheck = [k for k in keycheck if 'Keyword' in k]
        container1 = st.container(border=True)
        
        if not keycheck:
            container1.subheader('❌ Keyword Stem', divider='red', anchor=False)
            container1.write("Unfortunately, you don't have a column containing keywords in your data. Please check again. If you want to use it in another column, please rename it to 'Keywords'.")
        else:
            container1.subheader('βœ”οΈ Keyword Stem', divider='blue', anchor=False)
            container1.write('Congratulations! You can use Keywords Stem')

        #===Visualization===
        if 'Publication Year' in data.columns:
                   data.rename(columns={'Publication Year': 'Year', 'Citing Works Count': 'Cited by',
                                         'Publication Type': 'Document Type', 'Source Title': 'Source title'}, inplace=True)
    
        col2check = ['Document Type','Source title','Cited by','Year']
        miss_col = [column for column in col2check if column not in data.columns]
        container2 = st.container(border=True)
        
        if not miss_col:
            container2.subheader('βœ”οΈ Sunburst', divider='blue', anchor=False)
            container2.write('Congratulations! You can use Sunburst')
        else:
            container2.subheader('❌ Sunburst', divider='red', anchor=False)
            miss_col_str = ', '.join(miss_col)
            container2.write(f"Unfortunately, you don't have: {miss_col_str}. Please check again.")           

    with col2:   
        #===check any obj===
        coldf = sorted(data.select_dtypes(include=['object']).columns.tolist())
        container3 = st.container(border=True)
                
        if not coldf or data.shape[0] < 2:
            container3.subheader('❌ Topic Modeling', divider='red', anchor=False)
            container3.write("Unfortunately, you don't have a column containing object in your data. Please check again.")
        else:
            container3.subheader('βœ”οΈ Topic Modeling', divider='blue', anchor=False)
            container3.write('Congratulations! You can use Topic Modeling')

        #===Burst===
        container4 = st.container(border=True)
        if not coldf or 'Year' not in data.columns:
            container4.subheader('❌ Burst Detection', divider='red', anchor=False)
            container4.write("Unfortunately, you don't have a column containing object in your data or a 'Year' column. Please check again.")
        else:
            container4.subheader('βœ”οΈ Burst Detection', divider='blue', anchor=False)
            container4.write('Congratulations! You can use Burst Detection')

    with col3:
        #===bidirected===    
        container5 = st.container(border=True)        
        if not keycheck:
            container5.subheader('❌ Bidirected Network', divider='red', anchor=False)
            container5.write("Unfortunately, you don't have a column containing keywords in your data. Please check again. If you want to use it in another column, please rename it to 'Keywords'.")
        else:
            container5.subheader('βœ”οΈ Bidirected Network', divider='blue', anchor=False)
            container5.write('Congratulations! You can use Bidirected Network')

        #===scattertext===
        container6 = st.container(border=True)   
        if not coldf or data.shape[0] < 2:
            container6.subheader('❌ Scattertext', divider='red', anchor=False)
            container6.write("Unfortunately, you don't have a column containing object in your data. Please check again.")
        else:
            container6.subheader('βœ”οΈ Scattertext', divider='blue', anchor=False)
            container6.write('Congratulations! You can use Scattertext')