Custom Model Fine-Tuning

Domain-specific fine-tuning for healthcare AI improved diagnostic accuracy by 25% while reducing inference costs by 40% through optimized model architecture

25%
Better Accuracy
40%
Cost Reduction
85%
Faster Inference
50K+
Medical Records

The Challenge

A leading healthcare system serving 2.5 million patients annually needed an AI solution to assist physicians in diagnosing rare diseases from medical imaging and clinical notes. Existing general-purpose medical AI models were producing insufficient accuracy for rare conditions, while commercial solutions were prohibitively expensive for large-scale deployment.

The key challenges included:

Our Solution

We developed a custom fine-tuning approach that combined multiple pre-trained models with domain-specific data to create a highly accurate, cost-effective diagnostic assistant.

Fine-Tuning Strategy

1

Base Model Selection

Started with BioBERT for clinical text analysis and ResNet-50 pre-trained on medical images as foundation models.

2

Domain Adaptation

Fine-tuned on 500K+ general medical records to adapt to healthcare system's specific terminology and practices.

3

Rare Disease Specialization

Applied advanced techniques like focal loss and class weighting to improve performance on rare conditions with limited data.

4

Model Compression

Used knowledge distillation and pruning to reduce model size by 60% while maintaining accuracy for faster inference.

PyTorch Transformers BioBERT ResNet-50 Hugging Face ONNX TensorRT Docker FastAPI

Implementation Details

Custom Fine-Tuning Pipeline

# Custom fine-tuning for rare disease classification
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
from torch.utils.data import DataLoader, WeightedRandomSampler
import numpy as np

class MedicalDiagnosisModel(nn.Module):
    def __init__(self, model_name, num_classes, dropout_rate=0.3):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_classes)
        self.focal_loss = FocalLoss(alpha=1, gamma=2)
        
    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        output = self.dropout(pooled_output)
        logits = self.classifier(output)
        
        if labels is not None:
            loss = self.focal_loss(logits, labels)
            return {'loss': loss, 'logits': logits}
        return {'logits': logits}

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.ce_loss = nn.CrossEntropyLoss(reduction='none')
        
    def forward(self, inputs, targets):
        ce_loss = self.ce_loss(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()

def create_class_weights(dataset):
    """Create balanced weights for rare disease classes"""
    class_counts = np.bincount(dataset.labels)
    total_samples = len(dataset.labels)
    class_weights = total_samples / (len(class_counts) * class_counts)
    return torch.FloatTensor(class_weights)

def fine_tune_model(model, train_loader, val_loader, num_epochs=10, lr=2e-5):
    """Fine-tuning training loop with advanced techniques"""
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    
    best_val_accuracy = 0
    patience = 3
    patience_counter = 0
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            
            outputs = model(
                input_ids=batch['input_ids'].to(device),
                attention_mask=batch['attention_mask'].to(device),
                labels=batch['labels'].to(device)
            )
            
            loss = outputs['loss']
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
        
        # Validation phase
        model.eval()
        val_accuracy = evaluate_model(model, val_loader)
        
        # Early stopping
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pt')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch}")
                break
        
        scheduler.step()
        print(f"Epoch {epoch}: Loss={total_loss/len(train_loader):.4f}, Val Acc={val_accuracy:.4f}")
    
    return model

Model Optimization and Compression

# Knowledge distillation for model compression
import torch.nn.functional as F
from torch.nn import KLDivLoss

class ModelDistillation:
    def __init__(self, teacher_model, student_model, temperature=3.0, alpha=0.7):
        self.teacher = teacher_model
        self.student = student_model
        self.temperature = temperature
        self.alpha = alpha  # Weight for distillation loss
        self.kl_loss = KLDivLoss(reduction='batchmean')
        self.ce_loss = nn.CrossEntropyLoss()
        
    def distillation_loss(self, student_logits, teacher_logits, labels):
        """Calculate combined distillation and hard target loss"""
        
        # Soft targets from teacher
        teacher_probs = F.softmax(teacher_logits / self.temperature, dim=1)
        student_log_probs = F.log_softmax(student_logits / self.temperature, dim=1)
        
        distillation_loss = self.kl_loss(student_log_probs, teacher_probs) * (self.temperature ** 2)
        
        # Hard targets
        student_loss = self.ce_loss(student_logits, labels)
        
        # Combined loss
        total_loss = self.alpha * distillation_loss + (1 - self.alpha) * student_loss
        return total_loss
    
    def train_student(self, train_loader, val_loader, num_epochs=20):
        """Train student model using knowledge distillation"""
        self.teacher.eval()  # Teacher in eval mode
        optimizer = torch.optim.AdamW(self.student.parameters(), lr=3e-5)
        
        for epoch in range(num_epochs):
            self.student.train()
            total_loss = 0
            
            for batch in train_loader:
                optimizer.zero_grad()
                
                # Teacher predictions (no gradients)
                with torch.no_grad():
                    teacher_outputs = self.teacher(
                        input_ids=batch['input_ids'].to(device),
                        attention_mask=batch['attention_mask'].to(device)
                    )
                    teacher_logits = teacher_outputs['logits']
                
                # Student predictions
                student_outputs = self.student(
                    input_ids=batch['input_ids'].to(device),
                    attention_mask=batch['attention_mask'].to(device)
                )
                student_logits = student_outputs['logits']
                
                # Calculate distillation loss
                loss = self.distillation_loss(
                    student_logits, teacher_logits, batch['labels'].to(device)
                )
                
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            
            print(f"Distillation Epoch {epoch}: Loss={total_loss/len(train_loader):.4f}")
        
        return self.student

# Model quantization for further optimization
def quantize_model(model, calibration_loader):
    """Apply dynamic quantization to reduce model size"""
    import torch.quantization as quantization
    
    # Prepare model for quantization
    model.eval()
    model.qconfig = quantization.get_default_qconfig('fbgemm')
    quantization.prepare(model, inplace=True)
    
    # Calibrate with sample data
    with torch.no_grad():
        for batch in calibration_loader:
            model(batch['input_ids'], batch['attention_mask'])
    
    # Convert to quantized model
    quantized_model = quantization.convert(model, inplace=False)
    return quantized_model

Performance Comparison

Metric General Medical AI Our Fine-Tuned Model Improvement
Overall Accuracy 68.2% 85.3% +25.1%
Rare Disease F1-Score 0.52 0.78 +50%
Inference Time (ms) 850 125 -85.3%
Model Size (MB) 1,200 480 -60%
Annual Cost per Physician $50,000 $12,000 -76%
Memory Usage (GB) 8.5 3.2 -62.4%

Results & Impact

Clinical Performance

Economic Impact

Technical Achievements

Lessons Learned

Key Insights

Challenges Overcome

Best Practices Established

← Back to Case Studies