Spaces:

HUBioDataLab
/

DrugGEN

Running

App Files Files Community

mgyigit commited on Mar 15

Commit

c724df9

verified ·

1 Parent(s): f41dc00

Update inference.py

Browse files

Files changed (1) hide show

inference.py +45 -22

inference.py CHANGED Viewed

@@ -136,7 +136,7 @@ class Inference(object):
         """Restore the trained generator and discriminator."""
         print('Loading the model...')
         G_path = os.path.join(model_directory, '{}-G.ckpt'.format(submodel))
-        self.G.load_state_dict(torch.load(G_path, map_location=lambda storage, loc: storage, weights_only=False))
     def inference(self):
         # Load the trained generator.
@@ -170,7 +170,9 @@ class Inference(object):
         uniqueness_calc = []
         real_smiles_snn = []
         nodes_sample = torch.Tensor(size=[1, self.vertexes, 1]).to(self.device)
-        generated_smiles = []
         val_counter = 0
         none_counter = 0
@@ -179,6 +181,7 @@ class Inference(object):
             pbar = tqdm(range(self.sample_num))
             pbar.set_description('Inference mode for {} model started'.format(self.submodel))
             for i, data in enumerate(self.inf_loader):
                 val_counter += 1
                 # Preprocess dataset
                 _, a_tensor, x_tensor = load_molecules(
@@ -206,13 +209,14 @@ class Inference(object):
                 inference_drugs = [None if x is None else max(x.split('.'), key=len) for x in inference_drugs]
                 for molecules in inference_drugs:
-                    if molecules is None:
-                        none_counter += 1
                 for molecules in inference_drugs:
                     if molecules is not None:
-                        molecules = molecules.replace("*", "C")
-                        generated_smiles.append(molecules)
                         uniqueness_calc.append(molecules)
                         nodes_sample = torch.cat((nodes_sample, g_nodes_hat_sample.view(1, self.vertexes, 1)), 0)
                         pbar.update(1)
@@ -223,21 +227,30 @@ class Inference(object):
                 if generation_number == self.sample_num or none_counter == self.sample_num:
                     break
         if not self.disable_correction:
-            correct = smi_correct(self.submodel, "experiments/inference/{}".format(self.submodel))
-            gen_smi = correct.correct_smiles_list(generated_smiles)
         else:
-            gen_smi = generated_smiles
         et = time.time() - start_time
         gen_vecs = [AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(x), 2, nBits=1024) for x in uniqueness_calc if Chem.MolFromSmiles(x) is not None]
         real_vecs = [AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=1024) for x in real_smiles_snn if x is not None]
         if not self.disable_correction:
             val = round(len(gen_smi)/self.sample_num, 3)
         else:
             val = round(fraction_valid(gen_smi), 3)
         uniq = round(fraction_unique(gen_smi), 3)
         nov = round(novelty(gen_smi, chembl_smiles), 3)
@@ -251,23 +264,33 @@ class Inference(object):
         qed = round(np.mean([QED.qed(Chem.MolFromSmiles(x)) for x in gen_smi if Chem.MolFromSmiles(x) is not None]), 3)
         sa = round(np.mean([sascorer.calculateScore(Chem.MolFromSmiles(x)) for x in gen_smi if Chem.MolFromSmiles(x) is not None]), 3)
         model_res = pd.DataFrame({"submodel": [self.submodel], "validity": [val],
                         "uniqueness": [uniq], "novelty": [nov],
                         "novelty_test": [nov_test], "drug_novelty": [drug_nov],
                         "max_len": [max_len], "mean_atom_type": [mean_atom],
                         "snn_chembl": [snn_chembl], "snn_drug": [snn_drug],
                         "IntDiv": [int_div], "qed": [qed], "sa": [sa]})
-        # Write generated SMILES to a temporary file for app.py to use
-        temp_file = f'{self.submodel}_denovo_mols.smi'
-        with open(temp_file, 'w') as f:
-            f.write("SMILES\n")
-            for smiles in gen_smi:
-                f.write(f"{smiles}\n")
-        return model_res
 if __name__=="__main__":
     parser = argparse.ArgumentParser()
@@ -300,4 +323,4 @@ if __name__=="__main__":
     config = parser.parse_args()
     inference = Inference(config)
-    inference.inference()

         """Restore the trained generator and discriminator."""
         print('Loading the model...')
         G_path = os.path.join(model_directory, '{}-G.ckpt'.format(submodel))
+        self.G.load_state_dict(torch.load(G_path, map_location=lambda storage, loc: storage))
     def inference(self):
         # Load the trained generator.
         uniqueness_calc = []
         real_smiles_snn = []
         nodes_sample = torch.Tensor(size=[1, self.vertexes, 1]).to(self.device)
+        f = open("experiments/inference/{}/inference_drugs.txt".format(self.submodel), "w")
+        f.write("SMILES")
+        f.write("\n")
         val_counter = 0
         none_counter = 0
             pbar = tqdm(range(self.sample_num))
             pbar.set_description('Inference mode for {} model started'.format(self.submodel))
             for i, data in enumerate(self.inf_loader):
                 val_counter += 1
                 # Preprocess dataset
                 _, a_tensor, x_tensor = load_molecules(
                 inference_drugs = [None if x is None else max(x.split('.'), key=len) for x in inference_drugs]
                 for molecules in inference_drugs:
+                            if molecules is None:
+                                none_counter += 1
                 for molecules in inference_drugs:
                     if molecules is not None:
+                        molecules = molecules.replace("*", "C")
+                        f.write(molecules)
+                        f.write("\n")
                         uniqueness_calc.append(molecules)
                         nodes_sample = torch.cat((nodes_sample, g_nodes_hat_sample.view(1, self.vertexes, 1)), 0)
                         pbar.update(1)
                 if generation_number == self.sample_num or none_counter == self.sample_num:
                     break
+        f.close()
+        print("Inference completed, starting metrics calculation.")
         if not self.disable_correction:
+            corrected = correct.correct("experiments/inference/{}/inference_drugs.txt".format(self.submodel))
+            gen_smi = corrected["SMILES"].tolist()
         else:
+            gen_smi = pd.read_csv("experiments/inference/{}/inference_drugs.txt".format(self.submodel))["SMILES"].tolist()
         et = time.time() - start_time
         gen_vecs = [AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(x), 2, nBits=1024) for x in uniqueness_calc if Chem.MolFromSmiles(x) is not None]
         real_vecs = [AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=1024) for x in real_smiles_snn if x is not None]
+        print("Inference mode is lasted for {:.2f} seconds".format(et))
+        print("Metrics calculation started using MOSES.")
         if not self.disable_correction:
             val = round(len(gen_smi)/self.sample_num, 3)
+            print("Validity: ", val, "\n")
         else:
             val = round(fraction_valid(gen_smi), 3)
+            print("Validity: ", val, "\n")
         uniq = round(fraction_unique(gen_smi), 3)
         nov = round(novelty(gen_smi, chembl_smiles), 3)
         qed = round(np.mean([QED.qed(Chem.MolFromSmiles(x)) for x in gen_smi if Chem.MolFromSmiles(x) is not None]), 3)
         sa = round(np.mean([sascorer.calculateScore(Chem.MolFromSmiles(x)) for x in gen_smi if Chem.MolFromSmiles(x) is not None]), 3)
+        print("Uniqueness: ", uniq, "\n")
+        print("Novelty: ", nov, "\n")
+        print("Novelty_test: ", nov_test, "\n")
+        print("Drug_novelty: ", drug_nov, "\n")
+        print("max_len: ", max_len, "\n")
+        print("mean_atom_type: ", mean_atom, "\n")
+        print("snn_chembl: ", snn_chembl, "\n")
+        print("snn_drug: ", snn_drug, "\n")
+        print("IntDiv: ", int_div, "\n")
+        print("QED: ", qed, "\n")
+        print("SA: ", sa, "\n")
+        print("Metrics are calculated.")
         model_res = pd.DataFrame({"submodel": [self.submodel], "validity": [val],
                         "uniqueness": [uniq], "novelty": [nov],
                         "novelty_test": [nov_test], "drug_novelty": [drug_nov],
                         "max_len": [max_len], "mean_atom_type": [mean_atom],
                         "snn_chembl": [snn_chembl], "snn_drug": [snn_drug],
                         "IntDiv": [int_div], "qed": [qed], "sa": [sa]})
+        search_res = pd.concat([search_res, model_res], axis=0)
+        os.remove("experiments/inference/{}/inference_drugs.txt".format(self.submodel))
+        search_res.to_csv("experiments/inference/{}/inference_results.csv".format(self.submodel), index=False)
+        generatedsmiles = pd.DataFrame({"SMILES": gen_smi})
+        generatedsmiles.to_csv("experiments/inference/{}/inference_drugs.csv".format(self.submodel), index=False)
+    return model_res
 if __name__=="__main__":
     parser = argparse.ArgumentParser()
     config = parser.parse_args()
     inference = Inference(config)
+    inference.inference()