pytorch · fedebongio · Mar 25, 2026 · Mar 26, 2026 · siyuanfoundation · Mar 26, 2026
diff --git a/README.md b/README.md
@@ -1,7 +1,5 @@
 # PyTorch Examples
 
-![Run Examples](https://github.com/pytorch/examples/workflows/Run%20Examples/badge.svg)
-
 https://pytorch.org/examples/
 
 `pytorch/examples` is a repository showcasing examples of using [PyTorch](https://github.com/pytorch/pytorch). The goal is to have curated, short, few/no dependencies _high quality_ examples that are substantially different from each other that can be emulated in your existing work.
@@ -32,8 +30,6 @@ https://pytorch.org/examples/
 - [Image Classification Using Forward-Forward](./mnist_forward_forward/README.md)
 - [Language Translation using Transformers](./language_translation/README.md)
 
-
-
 Additionally, a list of good examples hosted in their own repositories:
 
 - [Neural Machine Translation using sequence-to-sequence RNN with attention (OpenNMT)](https://github.com/OpenNMT/OpenNMT-py)

diff --git a/mnist/README.md b/mnist/README.md
@@ -1,7 +1,48 @@
-# Basic MNIST Example
+# MNIST Example
+
+Trains a ConvNet on the MNIST dataset using PyTorch.
+
+## Usage
+
+### Standard (CUDA / MPS / XPU)
 
 ```bash
 pip install -r requirements.txt
 python main.py
-# CUDA_VISIBLE_DEVICES=2 python main.py  # to specify GPU id to ex. 2
+# or to run on CPU only:
+python main.py --no-accel
+```
+
+### TPU (via PyTorch/XLA)
+
+```bash
+pip install torch torchvision
+pip install 'torch_xla[tpu]'
+python main.py --xla
+```
+
+For multi-device TPU training, see the [PyTorch/XLA multiprocessing guide](https://docs.pytorch.org/xla/master/learn/pytorch-on-xla-devices.html).
+
+### Options
+
+```
+usage: main.py [-h] [--batch-size N] [--test-batch-size N] [--epochs N]
+               [--lr LR] [--gamma M] [--no-accel] [--xla] [--dry-run]
+               [--seed S] [--log-interval N] [--save-model]
+
+PyTorch MNIST Example
+
+options:
+  -h, --help           show this help message and exit
+  --batch-size N       input batch size for training (default: 64)
+  --test-batch-size N  input batch size for testing (default: 1000)
+  --epochs N           number of epochs to train (default: 14)
+  --lr LR              learning rate (default: 1.0)
+  --gamma M            Learning rate step gamma (default: 0.7)
+  --no-accel           disables accelerator
+  --xla                enables XLA device (e.g. TPU). Requires torch_xla.
+  --dry-run            quickly check a single pass
+  --seed S             random seed (default: 1)
+  --log-interval N     how many batches to wait before logging training status
+  --save-model         For Saving the current Model
 ```
diff --git a/mnist/main.py b/mnist/main.py
@@ -7,6 +7,14 @@
 from torch.optim.lr_scheduler import StepLR
 
 
+_XLA_AVAILABLE = False
+try:
+    import torch_xla
+    _XLA_AVAILABLE = True
+except ImportError:
+    pass
+
+
 class Net(nn.Module):
     def __init__(self):
         super(Net, self).__init__()
@@ -42,6 +50,8 @@ def train(args, model, device, train_loader, optimizer, epoch):
         loss = F.nll_loss(output, target)
         loss.backward()
         optimizer.step()
+        if args.xla:
+            torch_xla.sync()
         if batch_idx % args.log_interval == 0:
             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                 epoch, batch_idx * len(data), len(train_loader.dataset),
@@ -84,48 +94,60 @@ def main():
                         help='Learning rate step gamma (default: 0.7)')
     parser.add_argument('--no-accel', action='store_true',
                         help='disables accelerator')
+    parser.add_argument('--xla', action='store_true', default=False,
+                        help='enables XLA device (e.g. TPU). Requires torch_xla.')
     parser.add_argument('--dry-run', action='store_true',
                         help='quickly check a single pass')
     parser.add_argument('--seed', type=int, default=1, metavar='S',
                         help='random seed (default: 1)')
     parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                         help='how many batches to wait before logging training status')
-    parser.add_argument('--save-model', action='store_true', 
+    parser.add_argument('--save-model', action='store_true',
                         help='For Saving the current Model')
     args = parser.parse_args()
 
-    use_accel = not args.no_accel and torch.accelerator.is_available()
+    if args.xla:
+        if not _XLA_AVAILABLE:
+            raise RuntimeError(
+                "--xla flag requires torch_xla to be installed. "
+                "Install with: pip install torch_xla[tpu]"
+            )
+        device = torch_xla.device()
+    else:
+        use_accel = not args.no_accel and torch.accelerator.is_available()
+        device = torch.accelerator.current_accelerator() if use_accel else torch.device("cpu")
 
     torch.manual_seed(args.seed)
 
-    if use_accel:
-        device = torch.accelerator.current_accelerator()
-    else:
-        device = torch.device("cpu")
-
     train_kwargs = {'batch_size': args.batch_size}
     test_kwargs = {'batch_size': args.test_batch_size}
-    if use_accel:
-        accel_kwargs = {'num_workers': 1,
-                        'persistent_workers': True,
-                       'pin_memory': True,
-                       'shuffle': True}
+
+    if args.xla:
+        train_kwargs.update({'num_workers': 4, 'persistent_workers': True,
+                             'shuffle': True, 'drop_last': True})
+        test_kwargs.update({'num_workers': 4, 'persistent_workers': True})
+    elif not args.no_accel and torch.accelerator.is_available():
+        accel_kwargs = {'num_workers': 1, 'persistent_workers': True,
+                        'pin_memory': True, 'shuffle': True}
         train_kwargs.update(accel_kwargs)
         test_kwargs.update(accel_kwargs)
 
-    transform=transforms.Compose([
+    transform = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0.1307,), (0.3081,))
         ])
     dataset1 = datasets.MNIST('../data', train=True, download=True,
                        transform=transform)
     dataset2 = datasets.MNIST('../data', train=False,
                        transform=transform)
-    train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
+    train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
     test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
 
     model = Net().to(device)
-    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
+    if args.xla:
+        optimizer = optim.Adam(model.parameters(), lr=1e-3)
+    else:
+        optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
 
     scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
     for epoch in range(1, args.epochs + 1):
@@ -134,7 +156,10 @@ def main():
         scheduler.step()
 
     if args.save_model:
-        torch.save(model.state_dict(), "mnist_cnn.pt")
+        if args.xla:
+            torch.save(model.cpu().state_dict(), "mnist_cnn.pt")
+        else:
+            torch.save(model.state_dict(), "mnist_cnn.pt")
 
 
 if __name__ == '__main__':

diff --git a/run_python_examples.sh b/run_python_examples.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # This script runs through the code in each of the python examples.
-# The purpose is just as an integration test, not to actually train models in any meaningful way.
+# The purpose is just as an integration test, not to actually train models in any meaningful way.
 # For that reason, most of these set epochs = 1 and --dry-run.
 #
 # Optionally specify a comma separated list of examples to run. Can be run as:
@@ -91,7 +91,7 @@ function language_translation() {
 }
 
 function mnist() {
-  uv run main.py --epochs 1 --dry-run || error "mnist example failed"
+  uv run main.py --epochs 1 --dry-run $ACCEL_FLAG || error "mnist example failed"
 }
 function mnist_forward_forward() {
   uv run main.py --epochs 1 --no_accel || error "mnist forward forward failed"