• 如果您觉得本站非常有看点，那么赶紧使用Ctrl+D 收藏吧

# 【小白学PyTorch】4 构建模型三要素与权重初始化

4周前 (09-02) 25次浏览

• 1 模型三要素
• 2 参数初始化
• 3 完整运行代码
• 4 尺寸计算与参数计算

## 1 模型三要素

1. 必须要继承nn.Module这个类，要让PyTorch知道这个类是一个Module
2. 在__init__(self)中设置好需要的组件，比如conv，pooling，Linear，BatchNorm等等
3. 最后在forward(self,x)中用定义好的组件进行组装，就像搭积木，把网络结构搭建出来，这样一个模型就定义好了

``````def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(3,6,5)
self.pool1 = nn.MaxPool2d(2,2)
self.conv2 = nn.Conv2d(6,16,5)
self.pool2 = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(16*5*5,120)
self.fc2 = nn.Linear(120,84)
self.fc3 = nn.Linear(84,10)
``````

``````def forward(self,x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = x.view(-1,16*5*5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
``````

x为模型的输入，第一行表示x经过conv1，然后经过激活函数relu，然后经过pool1操作

``````net = Net()
outputs = net(inputs)
``````

## 2 参数初始化

``````# 定义权值初始化
def initialize_weights(self):
for m in self.modules():
if isinstance(m,nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m,nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m,nn.Linear):
torch.nn.init.normal_(m.weight.data,0,0.01)
# m.weight.data.normal_(0,0.01)
m.bias.data.zero_()
``````

``````# self.modules的源码
def modules(self):
for name,module in self.named_modules():
yield module
``````

## 3 完整运行代码

``````import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)

def forward(self, x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x

def initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
torch.nn.init.normal_(m.weight.data, 0, 0.01)
# m.weight.data.normal_(0,0.01)
m.bias.data.zero_()

net = Net()
net.initialize_weights()
print(net.modules())
for m in net.modules():
print(m)
``````

``````# 这个是print(net.modules())的输出
<generator object Module.modules at 0x0000023BDCA23258>
# 这个是第一次从net.modules()取出来的东西，是整个网络的结构
Net(
(conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=400, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
# 从net.modules()第二次开始取得东西就是每一层了
Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
Linear(in_features=400, out_features=120, bias=True)
Linear(in_features=120, out_features=84, bias=True)
Linear(in_features=84, out_features=10, bias=True)
``````

``````torch.nn.init.xavier_normal(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()
``````

## 4 尺寸计算与参数计算

``````net = Net()
net.initialize_weights()
layers = {}
for m in net.modules():
if isinstance(m,nn.Conv2d):
print(m)
break
``````

``````Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
``````

【问题1：输入特征图和输出特征图的尺寸计算】

``````net = Net()
net.initialize_weights()
input = torch.ones((16,3,10,10))
output = net.conv1(input)
print(input.shape)
print(output.shape)
``````

``````torch.Size([16, 3, 10, 10])
torch.Size([16, 6, 6, 6])
``````

(frac{10+2times 0-5}{1}+1=6) 算出来的结果没毛病。

【问题2：这个卷积层中有多少的参数？】

``````net = Net()
net.initialize_weights()
for m in net.modules():
if isinstance(m,nn.Conv2d):
print(m)
print(m.weight.shape)
print(m.bias.shape)
break
``````

``````Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
torch.Size([6, 3, 5, 5])
torch.Size([6])
``````