File size: 3,581 Bytes
b4fb2c2
 
 
 
 
 
 
 
 
 
 
 
b1d9ae7
b4fb2c2
 
 
 
 
b1d9ae7
 
 
b4fb2c2
 
 
 
 
 
 
72c071c
b4fb2c2
 
 
 
 
 
 
b1d9ae7
 
 
 
b4fb2c2
 
b1d9ae7
b4fb2c2
 
 
 
72c071c
b4fb2c2
 
 
 
bdd2367
b4fb2c2
004ced9
bdd2367
b4fb2c2
 
 
 
bdd2367
b4fb2c2
 
004ced9
b4fb2c2
004ced9
b4fb2c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bdd2367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4fb2c2
 
 
 
 
 
 
 
 
 
 
 
 
9a862ae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import sklearn
import fnmatch
import numpy as np
import gradio as gr
import pandas as pd


description = """
# 🧠 Neuro-Synth

<!-- Provide a quick summary of the dataset. -->

Neuro-Synth is a synthetic neuroimaging dataset ...

## Dataset Details

The dataset has the following characteristics:

* 
* 
* 

## Dataset Download

* For Safari users, please right click on the "Download" button below and select "Download Linked File"
* Other browser users can directly click on the "Download" button to save the data
* Download via command line:
```
wget https://rongguangw-neuro-synth.hf.space/file=dataset/synth_kde_all.csv
```
* You can also customize the number of samples by generating online in the below panels. The synthesized data will be showing in the lower right panel, and then, you can save the data by clicking on the "Download" button.
"""

citation = """
## Citation
```
@article{,
  title={},
  author={},
  journal={},
  volume={},
  pages={},
  year={2024}
}
```
"""

file_name = "dataset/synth_kde_all.csv"
save_name = "dataset/customized_neuro_synth.csv"
example_df = pd.read_csv(file_name)


def infer(sex, race, num_sample):
    num_sample = int(num_sample)
    col_dict = np.load("model/col_dict.npz", allow_pickle=True)['dict'].item()
    input = np.load("model/kde_{}_{}.npz".format(race.lower(), sex.lower()), allow_pickle=True)['model'].item() # "model/kde_white_female.npz"
    kde, scaler, cols_names = input['model'], input['scaler'], input['columns']
    
    sample = kde.sample(num_sample, random_state=0)
    sample = scaler.inverse_transform(sample)
    cov_list = np.array([[f'Synth_{i+1}', sex[0], race] for i in range(num_sample)]) # 'F', 'White'
    new_data = np.concatenate([cov_list, sample], axis=1)
    cols=['PTID','Sex','Race','Age']
    cols.extend([col_dict[i] for i in fnmatch.filter(cols_names,'H_*')])
    df_kde_synth = pd.DataFrame(new_data, columns=cols)
    df_kde_synth['Age'] = round(df_kde_synth['Age'].astype('float'))
    df_kde_synth.to_csv(save_name, index=False)
    return gr.Dataframe(df_kde_synth.head(), label='Results (only showing the first few rows)', show_label=True), gr.Button("Download", link="/file="+save_name)

with gr.Blocks() as demo:
    gr.Markdown(description)

    with gr.Group():
        example = gr.Dataframe(example_df.head(), 
                               label='Example data (only showing the first five rows, download to check the full table)',
                              show_label=True)
        gr.Button("Download", link="/file="+file_name)

        gr.Markdown("## Customized data generation")
        gr.Interface(
            fn=infer,
            inputs= [
                gr.Radio(
                choices=['Female', 'Male'],
                value='Female',
                type='index',
                label='Gender',
                interactive=True,
                ),
                gr.Radio(
                choices=['White', 'Black', 'Asian'],
                value='Asian',
                type='index',
                label='Race',
                interactive=True,
                ),
                gr.Textbox(
                label='Generate samples',
                show_label=True,
                placeholder='Enter sample number (in integer)...'
                )],
            #title='Customized data generation',
            outputs=["dataframe", "button"],
            #description='',
            cache_examples=False
        )

    gr.Markdown(citation)

if __name__ == "__main__":
    demo.launch(debug=True, share=True, allowed_paths=["dataset/"])