passgan / all.py

Upload 9 files

af48e90 over 1 year ago

No virus

6.89 kB

	import os
	from sklearn.preprocessing import OneHotEncoder # 独热编码
	import numpy as np
	import pickle as pk


	# 载入数据
	def load_batch(file): # 读取一个批次的数据
	with open(file, 'rb') as f:
	data_dict = pk.load(f, encoding='bytes')
	images = data_dict[b'data']
	labels = data_dict[b'labels']
	images = images.reshape(10000, 3072)
	labels = np.array(labels)
	return (images / 255), labels


	def load_data(data_dir):
	images_train = []
	labels_train = []
	for i in range(5):
	file = os.path.join(data_dir, 'data_batch_%d' % (i + 1))
	print('加载文件：', file)
	# 按批次读取训练集数据并拼接到图像和标签列表后，直到读入所有批次数据
	images_batch, labels_batch = load_batch(file)
	images_train.append(images_batch)
	labels_train.append(labels_batch)
	# 将多个批次的数组统一为一个数组
	x_train = np.concatenate(images_train)
	t_train = np.concatenate(labels_train)
	del images_batch, labels_batch

	# 加载测试集图像和标签
	x_test, t_test = load_batch(os.path.join(data_dir, 'test_batch'))
	return x_train, t_train, x_test, t_test


	def sigmoid(x):
	return 1 / (1 + np.exp(-x))


	def sigmoid_grad(x):
	return (1.0 - sigmoid(x)) * sigmoid(x)


	def softmax(x):
	if x.ndim == 2:
	x = x.T
	x = x - np.max(x, axis=0)
	y = np.exp(x) / np.sum(np.exp(x), axis=0)
	return y.T

	x = x - np.max(x) # 溢出对策
	return np.exp(x) / np.sum(np.exp(x))


	class neuralNetwork:

	def __init__(self, numNeuronLayers, numNeurons_perLayer, learningRate):
	self.numNeurons_perLayer = numNeurons_perLayer
	self.numNeuronLayers = numNeuronLayers
	self.learningRate = learningRate
	self.weight = []
	self.bias = []
	for i in range(numNeuronLayers):
	self.weight.append(
	learningRate * np.random.randn(self.numNeurons_perLayer[i], self.numNeurons_perLayer[i + 1]))
	self.bias.append(np.zeros(self.numNeurons_perLayer[i + 1]))

	def predict(self, x):
	z = x
	# 走一遍前向传播得到输出
	for i in range(self.numNeuronLayers - 1):
	a = np.dot(z, self.weight[i]) + self.bias[i]
	z = sigmoid(a)
	an = np.dot(z, self.weight[self.numNeuronLayers - 1]) + self.bias[self.numNeuronLayers - 1]
	y = softmax(an)
	return y

	def gradient(self, x, t):
	z = []
	a = []
	z.append(x)
	# 走一遍前向传播得到输出
	for i in range(self.numNeuronLayers):
	a.append(np.dot(z[i], self.weight[i]) + self.bias[i])
	z.append(sigmoid(a[i]))
	y = softmax(a[self.numNeuronLayers - 1])
	num = x.shape[0]
	dy = (y - t) / num
	dz = []
	da = []
	dz.append(dy)
	for i in range(self.numNeuronLayers - 1):
	da.append(np.dot(dz[i], self.weight[self.numNeuronLayers - i - 1].T))
	dz.append(sigmoid_grad(a[self.numNeuronLayers - i - 2]) * da[i])

	for i in range(self.numNeuronLayers):
	self.weight[i] -= self.learningRate * np.dot(z[i].T, dz[self.numNeuronLayers - i - 1])
	self.bias[i] -= self.learningRate * np.sum(dz[self.numNeuronLayers - i - 1], axis=0)

	def loss(self, x, t):
	y = self.predict(x)
	t = t.argmax(axis=1)
	num = y.shape[0]
	s = y[np.arange(num), t]
	return -np.sum(np.log(s)) / num

	def accuracy(self, x, t):
	y = self.predict(x)
	p = np.argmax(y, axis=1)
	q = np.argmax(t, axis=1)
	acc = np.sum(p == q) / len(y)
	return acc


	def kNN(x_train, x_test, t_train, k):
	px = list()
	for i in range(len(x_test)):
	px.append([])
	for j in range(10):
	px[i].append(0)
	for i in range(len(x_test)):
	dis = getODistance(x_test[i], x_train)
	index = np.argsort(dis)
	count = list()
	r = np.sort(dis)[k - 1]
	for j in range(len(t_train[0])):
	count.append(0)
	for j in range(k):
	for w in range(10):
	if t_train[index[j]][w] == 1:
	count[w] = count[w] + 1
	for j in range(10):
	px[i][j] = count[j]
	return px


	def getODistance(sample, train):
	a = np.tile(sample, [1000, 1])
	a = a - train
	a = np.square(a)
	a = a.sum(axis=1)
	dis = np.sqrt(a)
	dis = dis.T
	dis = dis.tolist()
	return dis[0]


	def runNetwork():
	numNeuronLayers = 3
	numNeurons_perLayer = [3072, 50, 20, 10]
	learningRate = 0.05
	epoch = 50000
	batch_size = 100
	train_size = x_train.shape[0] # 50000

	net = neuralNetwork(numNeuronLayers, numNeurons_perLayer, learningRate)
	for i in range(epoch):
	batch_mask = np.random.choice(train_size, batch_size) # 从0到50000 随机选100个数
	x_batch = x_train[batch_mask]
	t_batch = t_train[batch_mask]
	net.gradient(x_batch, t_batch)
	y = net.predict(x_test[0:1000, 0:3072])
	p = np.argmax(y, axis=1)
	q = np.argmax(t_test[0:1000, 0:3072], axis=1)
	acc = np.sum(p == q) / len(y)
	print("神经网络正确率为：", acc)
	return p


	def runKnn(x_train, x_test):
	x_train = np.mat(x_train)
	x_test = np.mat(x_test)
	px = kNN(x_train[0:1000, 0:3072], x_test[0:1000, 0:3072], t_train[0:1000, 0:10], 7)
	p = np.argmax(px, axis=1)
	q = np.argmax(t_test[0:1000, 0:3072], axis=1)
	acc = np.sum(p == q) / 1000
	print("knn正确率为：", acc)
	return p


	from sklearn import svm


	def runSvm():
	clf = svm.SVC(probability=True)
	t = np.argmax(t_train[0:1000, 0:3072], axis=1)
	clf.fit(x_train[0:1000, 0:3072], t)
	p = clf.predict(x_test[0:1000, 0:3072])
	q = np.argmax(t_test[0:1000, 0:3072], axis=1)
	acc = np.sum(p == q) / 1000
	print("svm正确率为：", acc)
	return p


	data_dir = 'cifar-10-batches-py'
	x_train, t_train, x_test, t_test = load_data(data_dir)
	encoder = OneHotEncoder(sparse=False)
	one_format = [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]
	encoder.fit(one_format)
	t_train = t_train.reshape(-1, 1) # 数组化为一维包含一个元素的二维数组，-1代表二维的数量自适应
	t_train = encoder.transform(t_train)
	t_test = t_test.reshape(-1, 1)
	t_test = encoder.transform(t_test)

	p1 = runNetwork()
	p2 = runSvm()
	p3 = runKnn(x_train, x_test)

	p1 = p1.reshape(-1, 1) # 数组化为一维包含一个元素的二维数组，-1代表二维的数量自适应
	p1 = encoder.transform(p1)
	p2 = p2.reshape(-1, 1)
	p2 = encoder.transform(p2)
	p3 = p3.reshape(-1, 1)
	p3 = encoder.transform(p3)

	vote = p1+p2+p3
	p = np.argmax(vote, axis=1)
	q = np.argmax(t_test[0:1000, 0:3072], axis=1)
	acc = np.sum(p == q) / 1000
	print("最终正确率为", acc)