diff --git a/.gitattributes b/.gitattributes
index 502cfe46efc5b98be4b3ef557f033bd590d9fdca..45830bb2ea7c14d5472a6f43889c9723f3b1b880 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -44,3 +44,5 @@ models/pvq_extractor/Resonance.onnx filter=lfs diff=lfs merge=lfs -text
 models/pvq_extractor/Weight.onnx filter=lfs diff=lfs merge=lfs -text
 models/norm_flow/model.pt filter=lfs diff=lfs merge=lfs -text
 audio/1034_121119_000028_000001.wav filter=lfs diff=lfs merge=lfs -text
+Dataset/Audio_files/*.wav filter=lfs diff=lfs merge=lfs -text
+Dataset/Embeddings/**/*.pth filter=lfs diff=lfs merge=lfs -text
diff --git a/Dataset/Audio_files/1034_121119_000028_000001.wav b/Dataset/Audio_files/1034_121119_000028_000001.wav
new file mode 100644
index 0000000000000000000000000000000000000000..2dbcd7cf1a0fb50ce0d607d5db1a86f995734fff
--- /dev/null
+++ b/Dataset/Audio_files/1034_121119_000028_000001.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc00c4e893ccf708cae4366e36ede93b4e158f516323a0724fc6e9f956c76aff
+size 385964
diff --git a/Dataset/Audio_files/1088_129236_000006_000007.wav b/Dataset/Audio_files/1088_129236_000006_000007.wav
new file mode 100644
index 0000000000000000000000000000000000000000..e6d4df40f936d8a012e04e648f4b2d8890a2b440
--- /dev/null
+++ b/Dataset/Audio_files/1088_129236_000006_000007.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27b2e7191ba1cfad41bc1ab1bd09ec1af87062e48abbab1ef01809c76ed738da
+size 311084
diff --git a/Dataset/Audio_files/1422_149735_000006_000000.wav b/Dataset/Audio_files/1422_149735_000006_000000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..5cefd0c6b0c913b8bb42631477775b5c638f8ad4
--- /dev/null
+++ b/Dataset/Audio_files/1422_149735_000006_000000.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f59d7f7a3c7364d7ac254bd94d3384e9b8e173634eb8b7492ec751d8584f8bb5
+size 345644
diff --git a/Dataset/Audio_files/14_212_000019_000000.wav b/Dataset/Audio_files/14_212_000019_000000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..3337debbd72602bedb653eaaf1027653a3ee4485
--- /dev/null
+++ b/Dataset/Audio_files/14_212_000019_000000.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1271f49cf4855d1b7d9b87e99a0c79e5505acbfba94cd8f594c1df2a29d96027
+size 633652
diff --git a/Dataset/Audio_files/1535_141644_000004_000001.wav b/Dataset/Audio_files/1535_141644_000004_000001.wav
new file mode 100644
index 0000000000000000000000000000000000000000..54f34cd3adeabd635eca3a2d12e616cb5dbfdac8
--- /dev/null
+++ b/Dataset/Audio_files/1535_141644_000004_000001.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4cc6f0bc3b9ebecbc1dab5a430c37140337a6bbeaf6f75103d74b2b4e75b4f06
+size 295724
diff --git a/Dataset/Audio_files/1731_142320_000122_000005.wav b/Dataset/Audio_files/1731_142320_000122_000005.wav
new file mode 100644
index 0000000000000000000000000000000000000000..7005b27d4ee4863f4aa20d0814bb369565e32d7f
--- /dev/null
+++ b/Dataset/Audio_files/1731_142320_000122_000005.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3d3e88bc53ee1cad73100ea4ad6ccc6d9bcbc36145962d400122b658e27b7e8
+size 316844
diff --git a/Dataset/Audio_files/3009_10327_000027_000005.wav b/Dataset/Audio_files/3009_10327_000027_000005.wav
new file mode 100644
index 0000000000000000000000000000000000000000..137488422f1ecb73e27a98296ba184bad68ce7e7
--- /dev/null
+++ b/Dataset/Audio_files/3009_10327_000027_000005.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbe46722ef2d331a5bc1c552cd6ad3c8a69022a3c70b1c03b609856dc073ca32
+size 309164
diff --git a/Dataset/Audio_files/329_861_000024_000003.wav b/Dataset/Audio_files/329_861_000024_000003.wav
new file mode 100644
index 0000000000000000000000000000000000000000..0ecc6b2656a0ebac19680a94c156597ac20abf5a
--- /dev/null
+++ b/Dataset/Audio_files/329_861_000024_000003.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7dab66421315b9f22bbbaf909e69184c01eaba29e536c2b449c8a7310f2edce7
+size 261164
diff --git a/Dataset/Audio_files/4830_25904_000008_000001.wav b/Dataset/Audio_files/4830_25904_000008_000001.wav
new file mode 100644
index 0000000000000000000000000000000000000000..5bcb611ee93af1f3e1cddd58e6f121cf110a6311
--- /dev/null
+++ b/Dataset/Audio_files/4830_25904_000008_000001.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd41aaf86c9d6e394d9afcca5e3128aa6a52fd2948e3bcf6aa03e5c18f2c7eec
+size 483884
diff --git a/Dataset/Audio_files/4957_30119_000070_000001.wav b/Dataset/Audio_files/4957_30119_000070_000001.wav
new file mode 100644
index 0000000000000000000000000000000000000000..50f51c378a8c42aacde6bfa8b0c1c606ea4f87e4
--- /dev/null
+++ b/Dataset/Audio_files/4957_30119_000070_000001.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f483b0a7003610ba8451db035f3347b156bb348c7aa356b7403f8ca86b98ab28
+size 503084
diff --git a/Dataset/Audio_files/5012_80192_000020_000003.wav b/Dataset/Audio_files/5012_80192_000020_000003.wav
new file mode 100644
index 0000000000000000000000000000000000000000..feadaad91006c8fe5bbd2633e95e1a1c842e825c
--- /dev/null
+++ b/Dataset/Audio_files/5012_80192_000020_000003.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91aa9243a6697d65e6f5464b40e9b420b5e5cdef83b64a5556baef1ac548f11e
+size 409004
diff --git a/Dataset/Audio_files/5802_76044_000038_000000.wav b/Dataset/Audio_files/5802_76044_000038_000000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..eeff983a657840379c7308bb494a3c83d8e5ac09
--- /dev/null
+++ b/Dataset/Audio_files/5802_76044_000038_000000.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22964325ee8f751dddd136b3219191443270529d95ee27b45c4a789501286492
+size 460844
diff --git a/Dataset/Audio_files/6544_71420_000024_000001.wav b/Dataset/Audio_files/6544_71420_000024_000001.wav
new file mode 100644
index 0000000000000000000000000000000000000000..445645892dbaeedd2183bda1960299b0a06c04dd
--- /dev/null
+++ b/Dataset/Audio_files/6544_71420_000024_000001.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1289b6714acb263b8bb36d6acfbb4efded0a5c67cc9b6a6246340dd3493c6c2b
+size 209324
diff --git a/Dataset/Audio_files/6918_47541_000006_000008.wav b/Dataset/Audio_files/6918_47541_000006_000008.wav
new file mode 100644
index 0000000000000000000000000000000000000000..6b4d2bae883e73a78d4f1d565c7604e4f0a7aee9
--- /dev/null
+++ b/Dataset/Audio_files/6918_47541_000006_000008.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dcd7955f5469755038482a58f0929012526f98130513acd9d0cd1bc208bbfe8
+size 898612
diff --git a/Dataset/Audio_files/7011_66622_000032_000002.wav b/Dataset/Audio_files/7011_66622_000032_000002.wav
new file mode 100644
index 0000000000000000000000000000000000000000..c0b6fc90f49c7e80429361145d9eee72b9831a3c
--- /dev/null
+++ b/Dataset/Audio_files/7011_66622_000032_000002.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:111c88a591efa42a608d1609214e6ef56a64f3bd79a88b57efecba2ca2f7ed4c
+size 309164
diff --git a/Dataset/Audio_files/7059_77897_000017_000001.wav b/Dataset/Audio_files/7059_77897_000017_000001.wav
new file mode 100644
index 0000000000000000000000000000000000000000..aa7dbaa9ad91035afc72e51c6a9740bfd7a3317e
--- /dev/null
+++ b/Dataset/Audio_files/7059_77897_000017_000001.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17f51a3d2133e81607e36403b6bcb8bd7ec9e03c1bcfbbc80b4123c1b31d6618
+size 243884
diff --git a/Dataset/Audio_files/7190_90542_000054_000000.wav b/Dataset/Audio_files/7190_90542_000054_000000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..cd57aa42781ce78c33bd85d441752713331c8618
--- /dev/null
+++ b/Dataset/Audio_files/7190_90542_000054_000000.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34bdcfbdf51f3475465804b9dbf27f8e647ccc1af17573b0a923f44881217093
+size 222764
diff --git a/Dataset/Audio_files/7226_86965_000020_000001.wav b/Dataset/Audio_files/7226_86965_000020_000001.wav
new file mode 100644
index 0000000000000000000000000000000000000000..3ae60af52a50d25a871d54df5372c8047468b7d4
--- /dev/null
+++ b/Dataset/Audio_files/7226_86965_000020_000001.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b824a36ceaeec45724088957b1e543ee3b477ca1ee55e4c55e96ac8c2b018fb5
+size 622132
diff --git a/Dataset/Audio_files/7245_104888_000016_000000.wav b/Dataset/Audio_files/7245_104888_000016_000000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..34777d6a44ac3d1cb7a68df6cf6f2b5c3964529f
--- /dev/null
+++ b/Dataset/Audio_files/7245_104888_000016_000000.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92d74fe5965fff3182cb1f273c80ea051033c7aa0dbbb44ae48ccded15210216
+size 341804
diff --git a/Dataset/Audio_files/83_9960_000017_000003.wav b/Dataset/Audio_files/83_9960_000017_000003.wav
new file mode 100644
index 0000000000000000000000000000000000000000..9bf31f59261ea47beb21302d7ec7d7ad92a41d72
--- /dev/null
+++ b/Dataset/Audio_files/83_9960_000017_000003.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a1cdc21d779c1d108af86ec6a93558a501322a67c221c25e2dd32d93e0c356a
+size 192044
diff --git a/Dataset/Audio_files/8758_296465_000020_000000.wav b/Dataset/Audio_files/8758_296465_000020_000000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..1ab3f0ed6136e2c9922698ced2d7bce1dc8a4208
--- /dev/null
+++ b/Dataset/Audio_files/8758_296465_000020_000000.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8adafe1ab7b3e86c82454c06863dd616c5b52f91ebb8690fcc64ec7abb2821dc
+size 520364
diff --git a/Dataset/Audio_files/8820_294120_000011_000001.wav b/Dataset/Audio_files/8820_294120_000011_000001.wav
new file mode 100644
index 0000000000000000000000000000000000000000..2b99764656e794b415e2783948151d69ee5234ad
--- /dev/null
+++ b/Dataset/Audio_files/8820_294120_000011_000001.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b78d6b2aed1a99e8f3750bd54c50e8ed2e08dba114792fa604101faf27894708
+size 213164
diff --git a/Dataset/Embeddings/1034/1034_121119_000028_000001.pth b/Dataset/Embeddings/1034/1034_121119_000028_000001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7795dfb5787b650c6bba451c7054375ed0728cc3
--- /dev/null
+++ b/Dataset/Embeddings/1034/1034_121119_000028_000001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4a7bdd020bf0da6fb08d272448c8b61c6f065e529084ce1cf9c39c1636e017c
+size 2358
diff --git a/Dataset/Embeddings/1088/1088_129236_000006_000007.pth b/Dataset/Embeddings/1088/1088_129236_000006_000007.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e75973ec870e0f63360af27cc1fff5c70d6e9c9b
--- /dev/null
+++ b/Dataset/Embeddings/1088/1088_129236_000006_000007.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1544023ea8afb9b0c71fa31e1e16d2ec510cf9d8637a64648941448c9e5e18ae
+size 2358
diff --git a/Dataset/Embeddings/14/14_212_000019_000000.pth b/Dataset/Embeddings/14/14_212_000019_000000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b93e910c25c06b7635960a28295a32bfb5e950c4
--- /dev/null
+++ b/Dataset/Embeddings/14/14_212_000019_000000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f248135ffdacc81ef4b5071f564448d49c2341b5c5c14bf4257af633f9318fd
+size 2269
diff --git a/Dataset/Embeddings/1422/1422_149735_000006_000000.pth b/Dataset/Embeddings/1422/1422_149735_000006_000000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2413583f5360843ebeadb63754b1e065e0b96574
--- /dev/null
+++ b/Dataset/Embeddings/1422/1422_149735_000006_000000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fee0b79857cf8ce499a658dfeb5137d5b4fa7e849dd8118c32028391b88b3d08
+size 2358
diff --git a/Dataset/Embeddings/1535/1535_141644_000004_000001.pth b/Dataset/Embeddings/1535/1535_141644_000004_000001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..23a9496bc51c51c672a3a99d69485d507290447b
--- /dev/null
+++ b/Dataset/Embeddings/1535/1535_141644_000004_000001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d71a10862f81b5a3f0877f6eb26cd4bc733e0cc9868acc65a65bb23ffe304b9
+size 2358
diff --git a/Dataset/Embeddings/1731/1731_142320_000122_000005.pth b/Dataset/Embeddings/1731/1731_142320_000122_000005.pth
new file mode 100644
index 0000000000000000000000000000000000000000..77049b81d396b2f4cf6254b90b16591df920d5eb
--- /dev/null
+++ b/Dataset/Embeddings/1731/1731_142320_000122_000005.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79f2de5cb40487fe19b5099b57fe0a41f0436f554019c619bcb4cd9d6c64bf36
+size 2358
diff --git a/Dataset/Embeddings/3009/3009_10327_000027_000005.pth b/Dataset/Embeddings/3009/3009_10327_000027_000005.pth
new file mode 100644
index 0000000000000000000000000000000000000000..39d608dcb78198dff105ad64c41e680593b170d8
--- /dev/null
+++ b/Dataset/Embeddings/3009/3009_10327_000027_000005.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f24187ad06ecbe02df165538c6881192cfd055b5a3cc5ab1348d2c05d6567421
+size 2353
diff --git a/Dataset/Embeddings/329/329_861_000024_000003.pth b/Dataset/Embeddings/329/329_861_000024_000003.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9f37d896c6a6a19991d35a61645640f0334de7fb
--- /dev/null
+++ b/Dataset/Embeddings/329/329_861_000024_000003.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc001f48b3f67d25192967e0a297dc1787144e36222e5b83a71ae6f5b89be9b3
+size 2274
diff --git a/Dataset/Embeddings/4830/4830_25904_000008_000001.pth b/Dataset/Embeddings/4830/4830_25904_000008_000001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f98510c0837e3059333194f2f26a8429a5f7b767
--- /dev/null
+++ b/Dataset/Embeddings/4830/4830_25904_000008_000001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:170bdd680d0735a19c5e88e01fc8bf84dac623d7c73eebeff6e99974b8e9d081
+size 2353
diff --git a/Dataset/Embeddings/4957/4957_30119_000070_000001.pth b/Dataset/Embeddings/4957/4957_30119_000070_000001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..32cce89dacb7e3816a1a4155139d21a773586dab
--- /dev/null
+++ b/Dataset/Embeddings/4957/4957_30119_000070_000001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c922316a446bcc28db8a43f768ade2b2113ce0f6fab24b60b396f67264ce07c8
+size 2353
diff --git a/Dataset/Embeddings/5012/5012_80192_000020_000003.pth b/Dataset/Embeddings/5012/5012_80192_000020_000003.pth
new file mode 100644
index 0000000000000000000000000000000000000000..abef6da452c3919155b1e43ae02cf2a2a6f293a3
--- /dev/null
+++ b/Dataset/Embeddings/5012/5012_80192_000020_000003.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba1c17f5100b1e0147e9c96d864cc054e8840a15cd46307e191fbe88a728b1b0
+size 2353
diff --git a/Dataset/Embeddings/5802/5802_76044_000038_000000.pth b/Dataset/Embeddings/5802/5802_76044_000038_000000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..754af078eecea4c8cf77c7cd3383dd25a0b46ae1
--- /dev/null
+++ b/Dataset/Embeddings/5802/5802_76044_000038_000000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7656515f537fa6de193f40d78c9747cfb1268266d3dd88a22a41ce2c3a28514a
+size 2353
diff --git a/Dataset/Embeddings/6544/6544_71420_000024_000001.pth b/Dataset/Embeddings/6544/6544_71420_000024_000001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a5fc18d0832f940a61928bf7de9203bcbe65a762
--- /dev/null
+++ b/Dataset/Embeddings/6544/6544_71420_000024_000001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f70c9bd92dea6ddfd495c7ab32cae30494eaf3b42f6d6533ff9f55de80593f05
+size 2353
diff --git a/Dataset/Embeddings/6918/6918_47541_000006_000008.pth b/Dataset/Embeddings/6918/6918_47541_000006_000008.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1c589adf65cdc87082d0aeef1a71f394102321fb
--- /dev/null
+++ b/Dataset/Embeddings/6918/6918_47541_000006_000008.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed49a76c226606f98ce4c2db2aac937354e40cc8fb789e29e93aa87f64bc01d1
+size 2353
diff --git a/Dataset/Embeddings/7011/7011_66622_000032_000002.pth b/Dataset/Embeddings/7011/7011_66622_000032_000002.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1321781bee6ce4113d1622eddbde0fdb48a762d2
--- /dev/null
+++ b/Dataset/Embeddings/7011/7011_66622_000032_000002.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:43e63641af7d4322b89489acb9c10cfc7e71961bd6479c55c17135b3ecfa5605
+size 2353
diff --git a/Dataset/Embeddings/7059/7059_77897_000017_000001.pth b/Dataset/Embeddings/7059/7059_77897_000017_000001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0ba1d2ba5edc31a587b5f06f26ea4240520fa7ad
--- /dev/null
+++ b/Dataset/Embeddings/7059/7059_77897_000017_000001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:688e50692694cccbe5f61c8780e0980509118f4061a44180ec8dffff2d963921
+size 2353
diff --git a/Dataset/Embeddings/7190/7190_90542_000054_000000.pth b/Dataset/Embeddings/7190/7190_90542_000054_000000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..af82be283e6c9099f3c977aea25f51a1750b09a6
--- /dev/null
+++ b/Dataset/Embeddings/7190/7190_90542_000054_000000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f1925fcb8ce5ffa8b9223de17ea8d98c0abb24409852208f03c607374c9f60a
+size 2353
diff --git a/Dataset/Embeddings/7226/7226_86965_000020_000001.pth b/Dataset/Embeddings/7226/7226_86965_000020_000001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7a06fc4b5c2a4dc16b43519691dd6e7eeb17063c
--- /dev/null
+++ b/Dataset/Embeddings/7226/7226_86965_000020_000001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f88a82eee39139ab65f3b201f2657b795ad66d70ccd637f903d537df2acaca0
+size 2353
diff --git a/Dataset/Embeddings/7245/7245_104888_000016_000000.pth b/Dataset/Embeddings/7245/7245_104888_000016_000000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8ad02e597904100f2b98182e08c0de1e7e286859
--- /dev/null
+++ b/Dataset/Embeddings/7245/7245_104888_000016_000000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db50270eb45aab4344720a1da44d3c9d91ace10e69514287b3174ba9c2ca208a
+size 2358
diff --git a/Dataset/Embeddings/83/83_9960_000017_000003.pth b/Dataset/Embeddings/83/83_9960_000017_000003.pth
new file mode 100644
index 0000000000000000000000000000000000000000..65db33e3380a81f44a80a697b4f21d477b757b04
--- /dev/null
+++ b/Dataset/Embeddings/83/83_9960_000017_000003.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:958832b7e4e77f6eb8343b91091c8603b683b25c03f242e6de4b09952a0fba6d
+size 2274
diff --git a/Dataset/Embeddings/8758/8758_296465_000020_000000.pth b/Dataset/Embeddings/8758/8758_296465_000020_000000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a12bb487870216418045b17cc16623a85c58b9ae
--- /dev/null
+++ b/Dataset/Embeddings/8758/8758_296465_000020_000000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:465be53ae1d0a44ccdb90e0fcaccf09a0ae91041f984ef18f606df0169ea8f3e
+size 2358
diff --git a/Dataset/Embeddings/8820/8820_294120_000011_000001.pth b/Dataset/Embeddings/8820/8820_294120_000011_000001.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3759fbedcd3016c34a71c2f4953d8f4357234199
--- /dev/null
+++ b/Dataset/Embeddings/8820/8820_294120_000011_000001.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b79ee0d4796df0776bc0ddfc8683f2a025c4829893f28b3cff6b4a2d5405d968
+size 2358
diff --git a/Dataset/Embeddings/mean.json b/Dataset/Embeddings/mean.json
new file mode 100644
index 0000000000000000000000000000000000000000..a05338e3ed38749e7b317f90cb899ee2a81710ee
--- /dev/null
+++ b/Dataset/Embeddings/mean.json
@@ -0,0 +1,258 @@
+[
+  0.21412190794944763,
+  0.18206638097763062,
+  0.11840786784887314,
+  0.09126990288496017,
+  0.04086871072649956,
+  -0.149668350815773,
+  0.2645065188407898,
+  0.27953410148620605,
+  0.6700411438941956,
+  -0.06264923512935638,
+  0.2915269732475281,
+  0.12102372199296951,
+  -0.5578641891479492,
+  -0.12462181597948074,
+  0.6190101504325867,
+  -0.5761605501174927,
+  -0.084229975938797,
+  -0.0006869725184515119,
+  0.49899742007255554,
+  -0.21737882494926453,
+  -0.05707789212465286,
+  -0.18819154798984528,
+  -0.5531325340270996,
+  0.22641371190547943,
+  0.07952054589986801,
+  0.09851367026567459,
+  0.03574512526392937,
+  -0.13013364374637604,
+  -0.35363155603408813,
+  0.49086689949035645,
+  0.08895495533943176,
+  0.36905843019485474,
+  -0.10707297921180725,
+  -0.11953406780958176,
+  0.043051160871982574,
+  0.09323996305465698,
+  -0.16280269622802734,
+  -0.13945965468883514,
+  0.2095673531293869,
+  0.09729334712028503,
+  0.040950167924165726,
+  -0.37764972448349,
+  -0.018613651394844055,
+  -0.581308901309967,
+  -0.4080854058265686,
+  -0.42118221521377563,
+  1.0161728858947754,
+  -0.19709929823875427,
+  -0.024254681542515755,
+  0.04121233895421028,
+  -0.15502692759037018,
+  0.7614311575889587,
+  -0.6833258271217346,
+  0.33979618549346924,
+  0.49055442214012146,
+  0.011953921988606453,
+  0.4490082263946533,
+  0.2667522728443146,
+  -0.6408993005752563,
+  -0.17682728171348572,
+  0.12336420267820358,
+  0.1474267542362213,
+  -0.11565382778644562,
+  0.6467825174331665,
+  0.10751526057720184,
+  -0.14141449332237244,
+  0.6352338194847107,
+  -0.04154682531952858,
+  0.12760530412197113,
+  -0.6243913769721985,
+  0.08836925774812698,
+  0.28105032444000244,
+  -0.15209053456783295,
+  -0.0037005548365414143,
+  0.3098902106285095,
+  0.150644913315773,
+  0.07396118342876434,
+  -0.049714382737874985,
+  -0.5445783138275146,
+  -0.033714842051267624,
+  0.1200188472867012,
+  -0.2312866747379303,
+  0.20238173007965088,
+  -0.5392364263534546,
+  -0.40682801604270935,
+  -0.16234233975410461,
+  -0.6470288634300232,
+  -0.1738162636756897,
+  0.25936004519462585,
+  -0.15742169320583344,
+  0.24468930065631866,
+  0.13714095950126648,
+  0.1449803113937378,
+  0.16882915794849396,
+  0.19944046437740326,
+  -0.29332247376441956,
+  0.0026240404695272446,
+  0.03341501206159592,
+  0.01569036766886711,
+  -0.4688950777053833,
+  0.09352052956819534,
+  0.13269393146038055,
+  0.06116529926657677,
+  -0.06562789529561996,
+  -0.23961076140403748,
+  -0.22402845323085785,
+  0.47103151679039,
+  0.0728374496102333,
+  -0.561316192150116,
+  0.46127453446388245,
+  0.15431830286979675,
+  0.08550310134887695,
+  -0.03363621234893799,
+  0.04015417397022247,
+  -0.014262784272432327,
+  0.08499719202518463,
+  -0.39322608709335327,
+  0.27674373984336853,
+  0.24571490287780762,
+  -0.2642858326435089,
+  -0.7408877015113831,
+  0.21007885038852692,
+  0.5898057222366333,
+  0.14988923072814941,
+  -0.07782910019159317,
+  0.4078785479068756,
+  0.3004123270511627,
+  0.6256987452507019,
+  -0.21651767194271088,
+  -0.17712117731571198,
+  -0.2749980688095093,
+  0.4826784133911133,
+  0.3035520911216736,
+  0.23235619068145752,
+  -0.061135340481996536,
+  0.49035653471946716,
+  -0.16356635093688965,
+  -0.35920438170433044,
+  0.023298246785998344,
+  0.015880409628152847,
+  -0.015357445925474167,
+  -0.3540240228176117,
+  0.44811102747917175,
+  -0.05202110856771469,
+  -0.19488674402236938,
+  0.4875786602497101,
+  -0.03857485204935074,
+  0.463600754737854,
+  -0.07009128481149673,
+  0.29871219396591187,
+  -0.35601672530174255,
+  0.5102726817131042,
+  0.3902379274368286,
+  0.3692609369754791,
+  -0.35389819741249084,
+  0.07650414854288101,
+  -0.63330078125,
+  0.5580229759216309,
+  0.10672216862440109,
+  0.10609150677919388,
+  0.45468848943710327,
+  0.15291742980480194,
+  0.36706316471099854,
+  -0.2831500768661499,
+  -0.14291781187057495,
+  -0.17804013192653656,
+  -0.5424429178237915,
+  -0.15468499064445496,
+  0.07343851029872894,
+  0.5380398631095886,
+  0.44494226574897766,
+  0.9300274848937988,
+  -0.0274032074958086,
+  0.3488404154777527,
+  -0.23694315552711487,
+  -0.2424279898405075,
+  -0.04125871881842613,
+  0.06136211380362511,
+  -0.5118930339813232,
+  -0.15055209398269653,
+  0.45361533761024475,
+  0.12657225131988525,
+  0.34210655093193054,
+  0.313772052526474,
+  -0.3521589934825897,
+  0.05892332270741463,
+  -0.11534406244754791,
+  0.514985203742981,
+  0.054903097450733185,
+  0.18034562468528748,
+  0.26060545444488525,
+  -0.29317837953567505,
+  0.1423174887895584,
+  0.25360995531082153,
+  -0.47162681818008423,
+  0.5438259243965149,
+  0.02562086470425129,
+  0.020302919670939445,
+  0.3039097189903259,
+  0.19996808469295502,
+  0.3423006236553192,
+  0.4524010717868805,
+  -0.3152591586112976,
+  -0.60369873046875,
+  0.16421166062355042,
+  -0.055804263800382614,
+  -0.35883089900016785,
+  0.32918551564216614,
+  -0.4741072952747345,
+  0.05971089377999306,
+  -0.062083590775728226,
+  0.05729498714208603,
+  -0.6715519428253174,
+  0.2646842896938324,
+  0.14343565702438354,
+  0.2957288324832916,
+  0.37478363513946533,
+  -0.684753954410553,
+  -0.14382798969745636,
+  -0.3416562080383301,
+  0.6120049953460693,
+  0.24825794994831085,
+  0.049689218401908875,
+  0.08789665251970291,
+  -0.518900454044342,
+  -0.2226269692182541,
+  0.17690403759479523,
+  0.011226996779441833,
+  0.05879935249686241,
+  0.03022083267569542,
+  0.11887083947658539,
+  0.7854664325714111,
+  -0.2452417016029358,
+  0.6136188507080078,
+  0.5491909384727478,
+  -0.07412725687026978,
+  -0.3089025616645813,
+  0.16618099808692932,
+  -0.03215228021144867,
+  0.13637210428714752,
+  0.10921650379896164,
+  -0.14989499747753143,
+  0.6000584959983826,
+  0.19014132022857666,
+  -0.007800411432981491,
+  -0.06849341839551926,
+  -0.19043166935443878,
+  -0.012874589301645756,
+  -0.8398106694221497,
+  -0.002614892553538084,
+  -0.26642924547195435,
+  0.25869783759117126,
+  -0.46403658390045166,
+  0.18120701611042023,
+  0.08567068725824356,
+  0.08117248862981796
+]
\ No newline at end of file
diff --git a/Dataset/Embeddings/std.json b/Dataset/Embeddings/std.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b628e81d92e4358e9f4d9959e7e821876b8dcac
--- /dev/null
+++ b/Dataset/Embeddings/std.json
@@ -0,0 +1,258 @@
+[
+  0.8075656890869141,
+  0.8826062679290771,
+  0.8430591821670532,
+  0.8703321814537048,
+  0.877600371837616,
+  0.8111068606376648,
+  0.8719013929367065,
+  0.9000007510185242,
+  0.9740477800369263,
+  0.8267052173614502,
+  0.8011612296104431,
+  0.9747788906097412,
+  0.8026949763298035,
+  0.8818342089653015,
+  0.8605656623840332,
+  0.8279756903648376,
+  0.772606611251831,
+  0.8957112431526184,
+  0.8716765642166138,
+  0.7797929644584656,
+  0.8252673149108887,
+  0.781441330909729,
+  0.8043056130409241,
+  0.877123236656189,
+  0.9237406849861145,
+  0.7914682030677795,
+  0.9089431166648865,
+  0.8154596090316772,
+  0.8381725549697876,
+  0.8573335409164429,
+  0.7951206564903259,
+  0.8356125354766846,
+  0.8639358282089233,
+  0.8588302135467529,
+  0.8966045379638672,
+  0.836276113986969,
+  0.8558772206306458,
+  0.8904256820678711,
+  0.8009889721870422,
+  0.9030625820159912,
+  0.8489034175872803,
+  0.7720499038696289,
+  0.780423641204834,
+  0.7854387760162354,
+  0.8878417611122131,
+  0.8503796458244324,
+  0.8932433128356934,
+  0.9315906763076782,
+  0.8437496423721313,
+  0.8389645218849182,
+  0.8701387643814087,
+  0.9080750942230225,
+  1.0714792013168335,
+  0.8976108431816101,
+  0.8437362909317017,
+  0.8633260726928711,
+  0.8580045700073242,
+  0.8063361644744873,
+  0.8105617761611938,
+  0.8995920419692993,
+  0.8316185474395752,
+  0.9079830050468445,
+  0.8115889430046082,
+  0.8792805671691895,
+  0.8858475685119629,
+  0.7682526111602783,
+  0.8312106728553772,
+  0.8296751379966736,
+  0.9122119545936584,
+  0.9119444489479065,
+  0.8761489391326904,
+  0.8376705646514893,
+  0.9226043820381165,
+  0.8830709457397461,
+  0.819685161113739,
+  0.9397792816162109,
+  0.833674967288971,
+  0.8619604110717773,
+  0.8484258651733398,
+  0.943915605545044,
+  0.8020740151405334,
+  0.8027610182762146,
+  0.9116966724395752,
+  0.8570717573165894,
+  0.7944185733795166,
+  0.8977150917053223,
+  0.9434093236923218,
+  0.9964787364006042,
+  0.8149264454841614,
+  0.8179062604904175,
+  0.832256555557251,
+  0.866649329662323,
+  0.8442603349685669,
+  0.9397143125534058,
+  0.8501031398773193,
+  0.9365203380584717,
+  0.8380716443061829,
+  0.8887302279472351,
+  0.8084500432014465,
+  0.7769243121147156,
+  0.8449881076812744,
+  0.9015783667564392,
+  0.9295680522918701,
+  0.8259174227714539,
+  0.8573725819587708,
+  0.8600193858146667,
+  0.8780449032783508,
+  0.8595342040061951,
+  0.7720226049423218,
+  0.816754937171936,
+  0.8180097937583923,
+  0.8093970417976379,
+  0.9032255411148071,
+  0.8697183728218079,
+  0.888511061668396,
+  0.7960647940635681,
+  0.8589795827865601,
+  0.8813145160675049,
+  0.8638142347335815,
+  0.9093354344367981,
+  0.8201130628585815,
+  0.8607465028762817,
+  0.9925655722618103,
+  0.9680612683296204,
+  0.8303309679031372,
+  0.8515812158584595,
+  0.8854086399078369,
+  0.8599415421485901,
+  0.8196620941162109,
+  0.9137897491455078,
+  0.8218133449554443,
+  0.8703830242156982,
+  0.845089852809906,
+  0.8652607202529907,
+  0.877587080001831,
+  0.834847629070282,
+  0.7999405860900879,
+  0.867475152015686,
+  0.9779040217399597,
+  0.8888542652130127,
+  0.8318555951118469,
+  0.8721846342086792,
+  0.8582359552383423,
+  0.8781721591949463,
+  0.7750568389892578,
+  0.9456684589385986,
+  0.8390375971794128,
+  0.8528217077255249,
+  0.9676473736763,
+  0.9669485092163086,
+  0.8177183866500854,
+  0.8109471201896667,
+  0.8565740585327148,
+  1.012668490409851,
+  0.8075276017189026,
+  0.8120420575141907,
+  0.8192445039749146,
+  0.9088258743286133,
+  0.806582510471344,
+  0.8778362274169922,
+  0.9832965135574341,
+  0.8517345190048218,
+  0.8954508900642395,
+  0.8626090288162231,
+  0.8306634426116943,
+  0.7902420163154602,
+  0.8680355548858643,
+  0.8405691385269165,
+  0.8080191612243652,
+  0.8716298937797546,
+  0.8520878553390503,
+  0.8133600354194641,
+  0.9267045855522156,
+  0.8689888715744019,
+  0.8166713118553162,
+  0.8387840390205383,
+  0.835797131061554,
+  0.8922353386878967,
+  0.8736470937728882,
+  0.9051007032394409,
+  0.8347994685173035,
+  0.8269197344779968,
+  0.7968848943710327,
+  0.8677981495857239,
+  0.8539698719978333,
+  0.9122839570045471,
+  0.907562255859375,
+  0.908149242401123,
+  0.8897758722305298,
+  0.8776298761367798,
+  0.8702916502952576,
+  0.7712435722351074,
+  0.8737289905548096,
+  1.003007411956787,
+  0.9195813536643982,
+  0.9373644590377808,
+  0.8549340963363647,
+  0.8885018229484558,
+  0.8555989265441895,
+  0.8315033316612244,
+  0.8457157611846924,
+  0.8452540636062622,
+  0.9597710967063904,
+  0.8279005885124207,
+  0.9954813122749329,
+  0.8817158937454224,
+  0.8564739227294922,
+  0.8737724423408508,
+  0.8833761215209961,
+  0.9069574475288391,
+  0.8549059629440308,
+  0.8478658199310303,
+  0.8306840062141418,
+  0.8308926820755005,
+  0.8582388162612915,
+  0.7912089228630066,
+  0.843919038772583,
+  0.8585576415061951,
+  0.850679337978363,
+  0.921983003616333,
+  0.8164607882499695,
+  0.8369028568267822,
+  0.7947129607200623,
+  0.8371235132217407,
+  0.8269281387329102,
+  0.8633431196212769,
+  0.9147580862045288,
+  0.9019842743873596,
+  0.8293289542198181,
+  0.8421900868415833,
+  0.8144598603248596,
+  0.9013247489929199,
+  0.7653704285621643,
+  0.8295224905014038,
+  0.9549149870872498,
+  0.8671613931655884,
+  0.8507492542266846,
+  0.8559182286262512,
+  0.839141309261322,
+  0.918213427066803,
+  0.9064037203788757,
+  0.8579128980636597,
+  0.8337833881378174,
+  0.9374175071716309,
+  0.9142330884933472,
+  0.7878691554069519,
+  0.8651018142700195,
+  0.8595719933509827,
+  0.8955603837966919,
+  0.9085484743118286,
+  0.8001472353935242,
+  0.7812052369117737,
+  0.8475046157836914,
+  0.8226194381713867,
+  0.8940064311027527,
+  0.9277697801589966
+]
\ No newline at end of file
diff --git a/Dataset/dataset.yaml b/Dataset/dataset.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0715c87a69570f45eb66a231b12d8f9a39865247
--- /dev/null
+++ b/Dataset/dataset.yaml
@@ -0,0 +1,67 @@
+dataset:
+  '7190_90542_000054_000000':
+    speaker_id: '7190'
+    example_id: '7190_90542_000054_000000'
+  '4830_25904_000008_000001':
+    speaker_id: '4830'
+    example_id: '4830_25904_000008_000001'
+  '8820_294120_000011_000001':
+    speaker_id: '8820'
+    example_id: '8820_294120_000011_000001'
+  '3009_10327_000027_000005':
+    speaker_id: '3009'
+    example_id: '3009_10327_000027_000005'
+  '7226_86965_000020_000001':
+    speaker_id: '7226'
+    example_id: '7226_86965_000020_000001'
+  '329_861_000024_000003':
+    speaker_id: '329'
+    example_id: '329_861_000024_000003'
+  '5802_76044_000038_000000':
+    speaker_id: '5802'
+    example_id: '5802_76044_000038_000000'
+  '1535_141644_000004_000001':
+    speaker_id: '1535'
+    example_id: '1535_141644_000004_000001'
+  '7011_66622_000032_000002':
+    speaker_id: '7011'
+    example_id: '7011_66622_000032_000002'
+  '8758_296465_000020_000000':
+    speaker_id: '8758'
+    example_id: '8758_296465_000020_000000'
+  '1034_121119_000028_000001':
+    speaker_id: '1034'
+    'example_id': '1034_121119_000028_000001'
+  '4957_30119_000070_000001':
+    speaker_id: '4957'
+    example_id: '4957_30119_000070_000001'
+  '83_9960_000017_000003':
+    speaker_id: '83'
+    example_id: '83_9960_000017_000003'
+  '7059_77897_000017_000001':
+    speaker_id: '7059'
+    example_id: '7059_77897_000017_000001'
+  '1731_142320_000122_000005':
+    speaker_id: '1731'
+    example_id: '1731_142320_000122_000005'
+  '6918_47541_000006_000008':
+    speaker_id: '6918'
+    example_id: '6918_47541_000006_000008'
+  '6544_71420_000024_000001':
+    speaker_id: '6544'
+    example_id: '6544_71420_000024_000001'
+  '7245_104888_000016_000000':
+    speaker_id: '7245'
+    example_id: '7245_104888_000016_000000'
+  '5012_80192_000020_000003':
+    speaker_id: '5012'
+    example_id: '5012_80192_000020_000003'
+  '1422_149735_000006_000000':
+    speaker_id: '1422'
+    example_id: '1422_149735_000006_000000'
+  '14_212_000019_000000':
+    speaker_id: '14'
+    example_id: '14_212_000019_000000'
+  '1088_129236_000006_000007':
+    speaker_id: '1088'
+    example_id: '1088_129236_000006_000007'
diff --git a/app.py b/app.py
index 4ec513dcc365c02f07d1115d0a3e56900c0095e7..118d2d5b53a4bc00a513291ee5f31c6b6a5256aa 100644
--- a/app.py
+++ b/app.py
@@ -1,31 +1,39 @@
 import numpy as np 
 from pathlib import Path
-import padertorch as pt
 import paderbox as pb
-import time
 import torch
-import torchaudio
 from onnxruntime import InferenceSession
 from pvq_manipulation.models.vits import Vits_NT
 from pvq_manipulation.models.ffjord import FFJORD
-from IPython.display import display, Audio, clear_output
 from pvq_manipulation.models.hubert import HubertExtractor, SID_LARGE_LAYER
 import librosa
 from pvq_manipulation.helper.vad import EnergyVAD
 import gradio as gr
+from pvq_manipulation.helper.creapy_wrapper import process_file
 
-device = 'cpu'  #'cuda' if torch.cuda.is_available() else 'cpu'
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+pvq_labels = ['Weight', 'Resonance', 'Breathiness', 'Roughness', 'Loudness', 'Strain', 'Pitch']
 
-# load tts model
-storage_dir_tts = Path("./models/tts_model/")
-tts_model = Vits_NT.load_model(storage_dir_tts, "model.pt")
+dataset_dict = pb.io.load_yaml('./Dataset/dataset.yaml')
+
+cached_example_id = None
+cached_loaded_example = None
+cached_labels = None
+cached_d_vector = None
+cached_unmanipulated = None
+
+# path to stats
+stats_path = Path('./Dataset/Embeddings/')
 
 # load normalizing flow
 storage_dir_normalizing_flow = Path("./models/norm_flow")
-speaker_conditioning = pb.io.load(storage_dir_normalizing_flow / "speaker_conditioning.json")
-
+config_norm_flow = pb.io.load_yaml(storage_dir_normalizing_flow / "config.json")
 normalizing_flow = FFJORD.load_model(storage_dir_normalizing_flow, checkpoint="model.pt", device=device)
 
+# load tts model
+storage_dir_tts = Path("./models/tts_model/")
+tts_model = Vits_NT.load_model(storage_dir_tts, "model.pt")
+
 # load hubert features model
 hubert_model = HubertExtractor(
     layer=SID_LARGE_LAYER,
@@ -35,140 +43,157 @@ hubert_model = HubertExtractor(
     # storage_dir= # target storage dir hubert model
 )
 
-# example synthesis
-# speaker_id = 1034
-# example_id = "1034_121119_000028_000001"
-
-# wav_1 = tts_model.synthesize_from_example({
-#     'text' : "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", 
-#     'd_vector_storage_root': f"./Saved_models/Dataset/Embeddings/{speaker_id}/{example_id}.pth"
-# })
-# display(Audio(wav_1, rate=24_000, normalize=True))
 
-# manipulation block
 def get_manipulation(
-    d_vector,
+    example,
     labels,
-    flow, 
+    flow,
     tts_model,
+    d_vector,
+    config_norm_flow,
     manipulation_idx=0,
     manipulation_fkt=1,
 ):
     labels_manipulated = labels.clone()
-    labels_manipulated[:,manipulation_idx] += manipulation_fkt
-    
-    output_forward = flow.forward((d_vector.float(), labels))[0]
+    labels_manipulated[:, manipulation_idx] += manipulation_fkt
+
+    if config_norm_flow['flag_remove_mean']:
+        global_mean = pb.io.load(stats_path / "mean.json")
+        global_mean = torch.tensor(global_mean, dtype=torch.float32)
+        speaker_embedding_norm = (d_vector - global_mean)
+        global_std = pb.io.load(stats_path / "std.json")
+        global_std = torch.tensor(global_std, dtype=torch.float32)
+        speaker_embedding_norm = speaker_embedding_norm / global_std
+    else:
+        speaker_embedding_norm = d_vector
+
+    output_forward = flow.forward((speaker_embedding_norm.float(), labels))[0]
     sampled_class_manipulated = flow.sample((output_forward, labels_manipulated))[0]
 
+    if config_norm_flow['flag_remove_mean']:
+        sampled_class_manipulated = (sampled_class_manipulated * global_std + global_mean)
+
     wav = tts_model.synthesize_from_example({
-        'text': "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
+        'text': example['transcription'],
         'd_vector': d_vector.detach().numpy(),
         'd_vector_man': sampled_class_manipulated.detach().numpy(),
-    })    
+        'd_vector_storage_root': example['d_vector_storage_root'],
+    })
     return wav
 
-def extract_speaker_embedding(example):
-    observation, sr = pb.io.load_audio(example['audio_path']['observation'], return_sample_rate=True)
-    observation = librosa.resample(observation, orig_sr=sr, target_sr=16_000)
-    
-    vad = EnergyVAD(sample_rate=16_000)
-    if observation.ndim == 1:
-        observation = observation[None, :]
-    
-    observation = vad({'audio_data': observation})['audio_data']
-    
-    with torch.no_grad():
-        example = tts_model.speaker_manager.prepare_example({'audio_data': {'observation': observation}, **example})
-        example = pt.data.utils.collate_fn([example])
-        example['features'] = torch.tensor(np.array(example['features']))
-        d_vector = tts_model.speaker_manager.forward(example)[0]
-    return d_vector
-
-# load speaker labels
-def load_speaker_labels(example, speaker_conditioning, reg_stor_dir=Path('./models/pvq_extractor/')):
-    audio, _ = torchaudio.load(example['audio_path']['observation'])
-    audio = audio.to(device)
-    num_samples = torch.tensor([audio.shape[-1]], device=device)
 
+def get_creak_label(example):
+    audio_data = example['loaded_audio_data']['16_000']
+    test, y_pred, included_indices = process_file(audio_data)
+    mean_creak = np.mean(y_pred[included_indices])
+    return mean_creak * 100
+
+
+def load_speaker_labels(example, reg_stor_dir=Path('./models/pvq_extractor/')):
+    audio_data = torch.tensor(example['loaded_audio_data']['16_000'], dtype=torch.float)[None, :]
+    num_samples = torch.tensor([audio_data.shape[-1]])
+
+    if torch.cuda.is_available():
+        audio_data = audio_data.cuda()
+        num_samples = num_samples.cuda()
     providers = ["CPUExecutionProvider"]
 
     with torch.no_grad():
         features, seq_len = hubert_model(
-            audio, 
-            24_000, 
+            audio_data,
+            16_000,
             sequence_lengths=num_samples,
         )
         features = np.mean(features.squeeze(0).detach().cpu().numpy(), axis=-1)
-
         pvqd_predictions = {}
-        for pvq in ['Breathiness', 'Loudness', 'Pitch', 'Resonance', 'Roughness', 'Strain', 'Weight']:
+        for pvq in pvq_labels:
             with open(reg_stor_dir / f"{pvq}.onnx", "rb") as fid:
                 onnx = fid.read()
             sess = InferenceSession(onnx, providers=providers)
             pred = sess.run(None, {"X": features[None]})[0].squeeze(1)
             pvqd_predictions[pvq] = pred.tolist()[0]
-    labels = []
-    for key in speaker_conditioning:
-        labels.append(pvqd_predictions[key]/100)
-    return torch.tensor(labels)
 
+    pvqd_predictions['Creak_mean'] = get_creak_label(example)
+    labels = [pvqd_predictions[key] / 100 for key in pvq_labels + ["Creak_mean"]]
+    return torch.tensor(labels, device=device).float()
+
+
+def load_audio_files(example):
+    observation_loaded, sr = pb.io.load_audio(example['audio_path']['observation'], return_sample_rate=True)
+
+    example['loaded_audio_data'] = {}
+    observation = librosa.resample(observation_loaded, orig_sr=sr, target_sr=16_000)
+
+    vad = EnergyVAD(sample_rate=16_000)
+    if observation.ndim == 1:
+        observation = observation[None, :]
+
+    observation = vad({'audio_data': observation})['audio_data']
+    example['loaded_audio_data']['16_000'] = observation
+
+    observation = librosa.resample(observation, orig_sr=sr, target_sr=24_000)
+    vad = EnergyVAD(sample_rate=24_000)
+    if observation.ndim == 1:
+        observation = observation[None, :]
+    observation = vad({'audio_data': observation})['audio_data']
+    example['loaded_audio_data']['24_000'] = observation
+    return example
 
-example = {
-    'audio_path': {'observation': "audio/1034_121119_000028_000001.wav"},
-    'speaker_id': 1034,
-    'example_id': "1034_121119_000028_000001",
-}
 
-labels = load_speaker_labels(example, speaker_conditioning)
-label_options = ['Weight', 'Resonance', 'Breathiness', 'Roughness', 'Loudness', 'Strain', 'Pitch']
+def update_manipulation(manipulation_idx, example_id, transcription, manipulation_fkt):
+    global cached_example_id, cached_loaded_example, cached_labels, cached_d_vector, example_database, cached_unmanipulated
 
-# print('Estimated PVQ strengths of input speaker:')
-# max_len = max(len(name) for name in label_options)  
-# for label_name, pvq in zip(label_options, labels):
-    # print(f'{label_name:<{max_len}} : {pvq:6.2f}')
+    speaker_id = dataset_dict['dataset'][example_id]['speaker_id']
 
+    example = {
+        'audio_path': {'observation': f"./Dataset/Audio_files/{example_id}.wav"},
+        'd_vector_storage_root': f"./Saved_models/Dataset/Embeddings/{speaker_id}/{example_id}.pth",
+        'speaker_id': speaker_id,
+        'example_id': example_id,
+        'transcription': transcription
+    }
 
-def update_manipulation(manipulation_idx, manipulation_fkt):
+    if cached_example_id != example_id:
+        cached_loaded_example = load_audio_files(example)
+        cached_d_vector = torch.load(f"./Dataset/Embeddings/{speaker_id}/{example_id}.pth")
+        cached_labels = load_speaker_labels(example)
+        cached_example_id = example_id
+        cached_unmanipulated = tts_model.synthesize_from_example({
+            'text': transcription,
+            'd_vector': cached_d_vector.detach().numpy(),
+        })
 
-    d_vector = extract_speaker_embedding(example)
-    labels = load_speaker_labels(example, speaker_conditioning)
-    
     wav_manipulated = get_manipulation(
-        # example=example, 
-        d_vector=d_vector, 
-        labels=labels[None, :], 
+        example=example,
+        d_vector=cached_d_vector,
+        labels=cached_labels[None, :],
         flow=normalizing_flow,
         tts_model=tts_model,
         manipulation_idx=manipulation_idx,
         manipulation_fkt=manipulation_fkt,
+        config_norm_flow=config_norm_flow,
     )
-    
-    wav_unmanipulated = tts_model.synthesize_from_example({
-        'text': "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", 
-        'd_vector': d_vector.detach().numpy(),
-    })
-    sr = 24_000
-    return (sr, wav_unmanipulated), (sr, wav_manipulated)
-    
-    # with audio_output:
-    #     clear_output(wait=True) 
-    #     print('Manipulated Speaker')
-    #     display(Audio(wav_manipulated, rate=24_000, normalize=True))
-    #     print('Unmanipulated Synthese')
-    #     display(Audio(wav_unmanipulated, rate=24_000, normalize=True))
-    #     print('Original Speaker')
-    #     display(Audio(example['audio_path']['observation'], rate=24_000, normalize=True))
-
-    # print(f"Manipulated {label_options[manipulation_idx]} with strength {manipulation_fkt}")
-
-
-dropdown_options = [(label, i) for i, label in enumerate(label_options)]
+    return (24_000, cached_unmanipulated), (24_000, wav_manipulated)
+
+
 demo = gr.Interface(
     title="Perceptual Voice Quality (PVQ) Manipulation",
     fn=update_manipulation,
     inputs=[
-        gr.Dropdown(label="PVQ Feature", choices=dropdown_options, value=2, type="index"),
-        gr.Slider(label="Manipulation Factor", minimum=-2.0, maximum=2.0, value=1.0, step=0.1),
+        gr.Dropdown(
+            label="PVQ Feature",
+            choices=[('Weight', 0), ('Resonance', 1), ('Breathiness', 2), ('Roughness', 3), ('Creak', 7)],
+            value=2, type="value"
+        ),
+        gr.Dropdown(
+            choices=dataset_dict['dataset'].keys(),
+            value='1422_149735_000006_000000', type="value"
+        ),
+        gr.Textbox(
+            value="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
+            placeholder='Type something'
+        ),
+        gr.Slider(label="Manipulation Factor", minimum=-1.0, maximum=2.0, value=1.0, step=0.1),
     ],
     outputs=[gr.Audio(label="original utterance"), gr.Audio(label="manipulated utterance")],
 )
diff --git a/models/norm_flow/config.json b/models/norm_flow/config.json
index 520ef09629eaf9c4ade198b681bb55afbb82ee49..b890fe179bd19694d0546235893e512f9aadb487 100644
--- a/models/norm_flow/config.json
+++ b/models/norm_flow/config.json
@@ -1,12 +1,15 @@
 {
-  "factory": "pvq_manipulation.models.ffjord.FFJORD",
-  "normalize": true,
-  "ode_function": {
-    "condition_dim": 7,
-    "factory": "pvq_manipulation.models.ode_functions.CNFNN",
-    "hidden_channels": [
-      512
-    ],
-    "input_dim": 256
-  }
-}
\ No newline at end of file
+  "model":{
+    "factory": "pvq_manipulation.models.ffjord.FFJORD",
+    "normalize": true,
+    "ode_function": {
+      "condition_dim": 8,
+      "factory": "pvq_manipulation.models.ode_functions.CNFNN",
+      "hidden_channels": [
+        512
+      ],
+      "input_dim": 256
+    }
+  },
+  "flag_remove_mean": true
+}
diff --git a/models/norm_flow/model.pt b/models/norm_flow/model.pt
index 2d35cb25436a89869453af29dffda8237ce41d14..c86dabaf09245fb02893a473565bea229ee96584 100644
--- a/models/norm_flow/model.pt
+++ b/models/norm_flow/model.pt
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c5d5f64d3413a684254fdbdcc9c5d4c2c311946ff4c6724708ea4a8d332783d
-size 1120055
+oid sha256:d1110d37007980fcd8f28525ff39632e6fbade8160891fc079dbeb8777bb4d49
+size 1125666
diff --git a/models/norm_flow/speaker_conditioning.json b/models/norm_flow/speaker_conditioning.json
deleted file mode 100644
index dfec37f9a080571d00f925c3c32166296f48bbdb..0000000000000000000000000000000000000000
--- a/models/norm_flow/speaker_conditioning.json
+++ /dev/null
@@ -1,9 +0,0 @@
-[
-  "Weight",
-  "Resonance",
-  "Breathiness",
-  "Roughness",
-  "Loudness",
-  "Strain",
-  "Pitch"
-]
\ No newline at end of file
diff --git a/old_README.md b/old_README.md
deleted file mode 100644
index f87977c9a91fd245a7ee64983d249869ae7f03e3..0000000000000000000000000000000000000000
--- a/old_README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# PVQ Manipulation
-
-This repository contains code for manipulating perceptual voice quality (PVQ) features, intended for experiments and synthesis using models such as YourTTS.
-
----
-
-## Installation
-
-### Clone the repository and install
-
-```sh
-git clone https://github.com/FrederikRautenberg/pvq_manipulation.git
-cd pvq_manipulation
-pip install -e .
-```
-
-### Install [YourTTS](https://github.com/coqui-ai/TTS) from
-```
-git clone https://github.com/coqui-ai/TTS
-cd TTS
-pip install -e .[all,dev,notebooks]  # Select the relevant extras
-```
-### Make sure that [Paderbox](https://github.com/fgnt/paderbox) and [Padertorch](https://github.com/fgnt/padertorch?tab=readme-ov-file) are installed from 
-```
-git clone https://github.com/fgnt/paderbox.git
-cd paderbox
-pip install --editable .[all]
-git clone https://github.com/fgnt/padertorch.git
-cd padertorch && pip install -e .[all]
-```
diff --git a/pvq_manipulation/helper/creapy_wrapper.py b/pvq_manipulation/helper/creapy_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3174ac64b164bccd343d715ff92bc87966f07a1
--- /dev/null
+++ b/pvq_manipulation/helper/creapy_wrapper.py
@@ -0,0 +1,236 @@
+from __future__ import annotations
+
+import parselmouth as pm
+import warnings
+import numpy as np
+import pandas as pd
+from scipy.signal.windows import hann
+from pathlib import Path
+from sklearn.impute import SimpleImputer
+
+import creapy
+from creapy.feature_extraction.feature_extraction import _cpp, _h1_h2, _jitter, _shimmer, _f0mean, _zcr, _ste
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.neural_network import MLPClassifier
+
+
+class Model:
+    """The Model for creaky voice classification."""
+    def __init__(self):
+        self._config = creapy.utils.get_config()["MODEL"]
+        self._X_train: pd.DataFrame
+        self._y_train: pd.Series
+        self._imputer: SimpleImputer
+        self._features = self._config["FEATURES"]["for_classification"]
+        self._fitted = False
+        _clf = self._config["CLASSIFIER"]["clf"]
+        self._clf = clfs[_clf](
+            **self._config["CLASSIFIER"]["VALUES"][_clf.upper()]["kwargs"])
+
+    def fit(self, X_train: pd.DataFrame, y_train: pd.DataFrame):
+        """Function to fit the model with training data.
+
+        Args:
+            X_train (pd.DataFrame): Features of training data.
+            y_train (pd.Dataframe): Targets of training data (creak, no-creak).
+        """
+        if isinstance(y_train, pd.DataFrame):
+            y_train = y_train.to_numpy()
+        if self._config["PREPROCESSING"]["impute_at_fit"] is True:
+            self._X_train, self._imputer = creapy.preprocessing.impute(
+                X_train=X_train.loc[:, self._features], return_imputer=True)
+        else:
+            self._X_train = X_train
+        self._y_train = pd.Series(y_train, name=self._config["target_label"])
+        self._clf.fit(
+            self._X_train.loc[:, self._features], self._y_train)
+        self._fitted = True
+
+    def predict(self, X_test: pd.DataFrame, predict_proba: bool=None) -> np.ndarray:
+        """Predicts the given features.
+
+        Args:
+            X_test (pd.DataFrame): Features to be predicted.
+            predict_proba (bool, optional): If `True` the likelihood to be creak will be returned, else the predicted target.
+            Defaults to None.
+
+        Returns:
+            np.ndarray: Predicted targets, or probability of creak.
+        """
+        self._config = creapy.utils.get_config()["MODEL"]
+        if predict_proba is not None:
+            assert isinstance(predict_proba, bool)
+        else:
+            predict_proba = self._config["CLASSIFIER"]["predict_proba"]
+        if hasattr(self, "_imputer"):
+            X_test = pd.DataFrame(self._imputer.transform(
+                X_test.loc[:, self._features]), columns=self._X_train.columns, index=X_test.index)
+        if predict_proba is True:
+            _target_index = np.argwhere(
+                self._clf.classes_ == self._config["CLASSIFIER"]["target_name"]).item()
+            y_pred = self._clf.predict_proba(X_test[self._features])[
+                :, _target_index].flatten()
+            if self._config["POSTPROCESSING"]["MAVG"]["mavg"] is True:
+                length, mode = map(
+                    self._config["POSTPROCESSING"]["MAVG"]["VALUES"].get, ("length", "mode"))
+                y_pred = creapy.postprocessing.moving_average(y_pred, length, mode)
+        else:
+            y_pred = self._clf.predict(X_test[self._features])
+
+        return y_pred
+
+
+def read_wav(
+        data,
+        sr,
+        normalize: bool = True,
+        start: float = 0.0,
+        end: float | int = -1,
+        mono=True
+) -> tuple[np.ndarray, int]:
+    if mono is True and data.ndim > 1:
+        data = data.sum(axis=1) / data.shape[1]
+
+    max_ = max(abs(data))
+    if end == -1:
+        data = data[int(start*sr):]
+    else:
+        data = data[int(start*sr):int(end*sr)]
+
+    if normalize is True:
+        data /= max_
+
+    return data, sr
+
+
+def _hnr(data: np.ndarray, sound: pm.Sound, sr) -> float:
+    try:
+        harmonicity = sound.to_harmonicity()
+    except pm.PraatError:
+        hnr = np.nan
+    else:
+        # taken from
+        # https://parselmouth.readthedocs.io/en/stable/examples/batch_processing.html?highlight=harmonicity#Batch-processing-of-files
+        # check if empty
+        valid_values = harmonicity.values[harmonicity.values != -200]
+        if valid_values.size > 0:
+            hnr = valid_values.mean()
+        else:
+            hnr = np.nan
+    return hnr
+
+
+def blockwise_feature_calculation(data: np.ndarray, sr, feature):
+
+    sounds = [pm.Sound(values=block, sampling_frequency=sr) for block in data]
+    function = FEATURE_MAPPING[feature]
+    res = [function(block, sound, sr) for block, sound in zip(data, sounds)]
+    return np.array(res)
+
+
+def process_file(data, sample_rate: int = 16_000):
+    _config = creapy.config.get_config()
+    user_cfg = _config['USER']
+    model_cfg = _config['MODEL']
+
+    start, end = user_cfg['audio_start'], user_cfg['audio_end']
+    data, sr = read_wav(data, sample_rate, start=start, end=end)
+
+    w = hann(int(user_cfg["block_size"] * sample_rate))
+    creak_data_buff = creapy.preprocessing.buffer(data, sample_rate, window=w)
+    data_buffer = creak_data_buff.T
+
+    unvoiced_excl = model_cfg['PREPROCESSING']['UNVOICED_EXCLUSION']
+    preprocessing_features = [key for key, val in unvoiced_excl.items() if val is True]
+
+    elimination_chunks = np.stack([
+    blockwise_feature_calculation(
+            data_buffer, sample_rate, feature
+        ) for feature in preprocessing_features
+    ], axis=1)
+
+    preproc_values = unvoiced_excl['VALUES']
+    preproc_values['ZCR']['threshold'] = user_cfg['zcr_threshold']
+    preproc_values['STE']['threshold'] = user_cfg['ste_threshold']
+
+    thresholds = np.array([
+        creapy.postprocessing.thresholding(
+            series=elimination_chunks[:, i],
+            **preproc_values[feature.upper()]
+        )
+        for i, feature in enumerate(preprocessing_features)
+    ])
+    included_indices = thresholds.sum(axis=0) == 0
+
+    if not np.any(included_indices):
+        warnings.warn("Did not make classification. Adjust ZCR/STE thresholds.")
+        y_pred = np.zeros(creak_data_buff.shape[1])
+        X_test = pd.DataFrame(elimination_chunks, columns=preprocessing_features)
+        return X_test, y_pred, included_indices
+
+    class_features = model_cfg["FEATURES"]["for_classification"]
+    X_class = np.stack([
+        blockwise_feature_calculation(
+            data_buffer[included_indices], sample_rate, feature
+        ) for feature in class_features
+    ], axis=1)
+
+    _X_test = pd.DataFrame(
+        X_class,
+        columns=class_features,
+        index=np.flatnonzero(included_indices)
+    )
+
+    X_all = np.zeros((elimination_chunks.shape[0], elimination_chunks.shape[1] + len(class_features)))
+    X_all[:, :elimination_chunks.shape[1]] = elimination_chunks
+    X_all[included_indices, elimination_chunks.shape[1]:] = X_class
+
+    X_test = pd.DataFrame(X_all, columns=preprocessing_features + class_features)
+
+    y_pred = np.zeros(creak_data_buff.shape[1])
+    gender_model = user_cfg['gender_model']
+    model_path = creapy.utils.helpers.get_root() / model_cfg["model_location"]
+    model_path = (model_path.parent / f"{model_path.stem}_{gender_model.upper()}").with_suffix(".csv")
+    model = load_model(model_path)
+
+    y_pred[included_indices] = model.predict(_X_test)
+
+    return X_test, y_pred, included_indices
+
+
+def load_model(filepath: str = None) -> Model:
+    """Loads a already fitted model from a csv file.
+
+    Args:
+        filepath (str, optional): Location of the model csv file. Defaults to None.
+
+    Returns:
+        Model: Fitted Model for creak classification.
+    """
+    filepath = Path(filepath)
+
+    _config = creapy.utils.get_config()
+    _X_combined = pd.read_csv(filepath)
+    model = Model()
+    _target_column = _config["MODEL"]["target_label"]
+    _feature_columns = _config["MODEL"]["FEATURES"]["for_classification"]
+    _X_train, _y_train = _X_combined[_feature_columns], _X_combined[_target_column]
+    model.fit(_X_train, _y_train)
+    return model
+
+
+FEATURE_MAPPING = {
+    "cpp": _cpp,
+    "hnr": _hnr,
+    "h1h2": _h1_h2,
+    "jitter": _jitter,
+    "shimmer": _shimmer,
+    "f0mean": _f0mean,
+    "zcr": _zcr,
+    "ste": _ste,
+}
+
+clfs = {
+    "rfc": RandomForestClassifier,
+    "mlp": MLPClassifier
+}
diff --git a/pvq_manipulation/models/ffjord.py b/pvq_manipulation/models/ffjord.py
index ad830730cd439bbfd7cb6ca5a7c5571e312555d1..0feebe2e4b8a242a27901e4581d2cd02e3c3631c 100644
--- a/pvq_manipulation/models/ffjord.py
+++ b/pvq_manipulation/models/ffjord.py
@@ -113,7 +113,7 @@ class FFJORD(Model):
         if device is None:
             device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         model_dict = pb.io.load(model_path / "config.json")
-        model = Model.from_config(model_dict)
+        model = Model.from_config(model_dict['model'])
         cp = torch.load(
             model_path / checkpoint,
             map_location=device,
diff --git a/pvq_manipulation/models/vits.py b/pvq_manipulation/models/vits.py
index 0bbcefc53b18a981ee6c8ca4b6c7e72c7bef2ba3..2a58c5c4c00b462fc7a628927d2e819b8819cb31 100644
--- a/pvq_manipulation/models/vits.py
+++ b/pvq_manipulation/models/vits.py
@@ -28,7 +28,8 @@ from TTS.utils.audio import AudioProcessor
 from TTS.vocoder.models.hifigan_generator import HifiganGenerator
 from trainer.trainer import to_cuda
 from typing import Dict, List, Union
-
+if not torch.cuda.is_available():
+    from concurrent.futures import ThreadPoolExecutor
 
 class Vits_NT(Vits):
     def __init__(
@@ -205,12 +206,13 @@ class Vits_NT(Vits):
         Returns:
             - model_outputs (torch.Tensor): (batch_size, T_wav) Synthesized waveform
         """
-        speaker_embedding = aux_input['d_vector'].detach()[:, :, None]
-        if aux_input['d_vector_man'] is not None:
-            speaker_embedding_man = aux_input['d_vector_man'].detach()[:, :, None]
+        speaker_embedding = aux_input['d_vector']
+        if 'd_vector_man' in aux_input.keys() and aux_input['d_vector_man'] is not None:
+            speaker_embedding_man = aux_input['d_vector_man']
         else:
             speaker_embedding_man = speaker_embedding
-        aux_input['tokens'] = x.clone()
+
+        aux_input['tokens'] = x
         x_lengths = self._set_x_lengths(x, aux_input)
         x, m_p, logs_p, x_mask = self.text_encoder(
             x,
@@ -220,7 +222,7 @@ class Vits_NT(Vits):
         logw = self.duration_predictor(
             x,
             x_mask,
-            g=speaker_embedding,
+            g=speaker_embedding[:, :, None],
             lang_emb=None,
         )
 
@@ -231,21 +233,41 @@ class Vits_NT(Vits):
 
         attn_mask = x_mask * y_mask.transpose(1, 2)
         attn = generate_path(w_ceil.squeeze(1), attn_mask.squeeze(1).transpose(1, 2))
-        m_p = torch.matmul(attn.transpose(1, 2), m_p.transpose(1, 2)).transpose(1, 2)
-        logs_p = torch.matmul(attn.transpose(1, 2), logs_p.transpose(1, 2)).transpose(1, 2)
+
+        m_p = torch.einsum('blm, bnl -> bnm', attn, m_p)
+        logs_p = torch.einsum('blm, bnl -> bnm', attn, logs_p)
 
         z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * self.inference_noise_scale
 
-        z = self.flow(z_p, y_mask, g=speaker_embedding_man, reverse=True)
+        z = self.flow(z_p, y_mask, g=speaker_embedding_man[:, :, None], reverse=True)
         z, _, _, y_mask = self.upsampling_z(
             z,
             y_lengths=y_lengths,
             y_mask=y_mask
         )
-        o = self.waveform_decoder(
-            (z * y_mask)[:, :, : self.max_inference_len],
-            g=speaker_embedding_man if self.config.gan_speaker_conditioning else None
-        )
+
+        if not torch.cuda.is_available():
+            num_chunks = 2
+            chunk_size = z.shape[-1] // num_chunks
+            z_chunks = torch.split(z, chunk_size, dim=-1)
+
+            def decode_chunk(z_chunk):
+                return self.waveform_decoder(
+                    z_chunk,
+                    g=speaker_embedding_man[:, :, None] if self.config.gan_speaker_conditioning else None
+                )
+
+            with ThreadPoolExecutor(max_workers=num_chunks) as executor:
+                futures = [executor.submit(decode_chunk, chunk) for chunk in z_chunks]
+                results = [f.result() for f in futures]
+
+            o = torch.cat(results, dim=-1)
+
+        else:
+            o = self.waveform_decoder(
+                (z * y_mask)[:, :, : self.max_inference_len],
+                g=speaker_embedding_man[:, :, None] if self.config.gan_speaker_conditioning else None
+            )
         return o
 
     def forward(self, x, x_lengths, y, y_lengths, aux_input, inference=False):
diff --git a/requirements.txt b/requirements.txt
index 98d52a14a4ffa47d55c13c18ddd157f35f59c2ba..00c55f3bd5b1fe3118dd5a65e72cdb350583ff2c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ TTS==0.22.0
 padertorch
 onnxruntime
 torchdiffeq
+git+https://gitlab.tugraz.at/speech/creapy.git