MediQuery-AI / download_models.py
bitphonix's picture
Upload 6 files
8500b5e verified
import os
import torch
from torchvision import models
from transformers import AutoTokenizer, AutoModel, T5ForConditionalGeneration, T5Tokenizer
import faiss
import numpy as np
import pandas as pd
# Create directories
os.makedirs("models/flan-t5-finetuned", exist_ok=True)
os.makedirs("knowledge_base", exist_ok=True)
print("Downloading model weights...")
# Download image model (DenseNet121)
image_model = models.densenet121(pretrained=True)
torch.save(image_model.state_dict(), "models/densenet121.pt")
print("Downloaded DenseNet121 weights")
# Download text model (BioBERT)
tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-v1.1")
model = AutoModel.from_pretrained("dmis-lab/biobert-v1.1")
tokenizer.save_pretrained("models/biobert")
model.save_pretrained("models/biobert")
print("Downloaded BioBERT weights")
# Download generation model (FLAN-T5)
gen_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
gen_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
gen_tokenizer.save_pretrained("models/flan-t5-finetuned")
gen_model.save_pretrained("models/flan-t5-finetuned")
print("Downloaded FLAN-T5 weights")
# Create a minimal knowledge base
print("Creating minimal knowledge base...")
text_data = pd.DataFrame({
'combined_text': [
"The chest X-ray shows clear lung fields with no evidence of consolidation, effusion, or pneumothorax. The heart size is normal. No acute cardiopulmonary abnormality.",
"Bilateral patchy airspace opacities consistent with multifocal pneumonia. No pleural effusion or pneumothorax. Heart size is normal.",
"Cardiomegaly with pulmonary vascular congestion and bilateral pleural effusions, consistent with congestive heart failure. No pneumothorax or pneumonia.",
"Right upper lobe opacity concerning for pneumonia. No pleural effusion or pneumothorax. Heart size is normal.",
"Left lower lobe atelectasis. No pneumothorax or pleural effusion. Heart size is normal.",
"Bilateral pleural effusions with bibasilar atelectasis. Cardiomegaly present. Findings consistent with heart failure.",
"Right pneumothorax with partial lung collapse. No pleural effusion. Heart size is normal.",
"Endotracheal tube, central venous catheter, and nasogastric tube in place. No pneumothorax or pleural effusion.",
"Hyperinflated lungs with flattened diaphragms, consistent with COPD. No acute infiltrate or effusion.",
"Multiple rib fractures on the right side. No pneumothorax or hemothorax. Lung fields are clear."
],
'valid_index': list(range(10))
})
text_data.to_csv("knowledge_base/text_data.csv", index=False)
# Create dummy FAISS indices
text_dim = 768
text_embeddings = np.random.rand(len(text_data), text_dim).astype('float32')
image_dim = 1024
image_embeddings = np.random.rand(len(text_data), image_dim).astype('float32')
# Create FAISS indices
text_index = faiss.IndexFlatL2(text_dim)
text_index.add(text_embeddings)
faiss.write_index(text_index, "knowledge_base/text_index.faiss")
image_index = faiss.IndexFlatL2(image_dim)
image_index.add(image_embeddings)
faiss.write_index(image_index, "knowledge_base/image_index.faiss")
print("Created minimal knowledge base")
print("Setup complete!")