我在
Linux CentOS 7机器上使用Torch7.
我正在尝试将人工神经网络(ANN)应用于我的数据集,以解决二进制分类问题.我正在使用一个简单的多层感知器.
我正在尝试将人工神经网络(ANN)应用于我的数据集,以解决二进制分类问题.我正在使用一个简单的多层感知器.
我正在使用以下火炬包:optim,torch.
问题是我的感知器总是预测零值(被归类为零的元素),我无法理解为什么……
这是我的数据集(“dataset_file.csv”).有34个功能和1个标签目标(最后一列,可能是0或1):
- 0.55,1,0.29,0.46,0.67,0.37,0.41,0.08,0.47,0.23,0.13,0.82,0.25,0.04,0.52,0.33,0
- 0.65,0.64,0.02,0.32,0.18,0.2,0.38,0.24,0
- 0.34,0.5,0.55,0.06,0.15,0.51,0.22,0.6,0.42,1
- 0.46,0.14,0.17,0.1,0.94,0.65,0.75,0.3,0
- 0.55,0.03,0.16,0.12,0.73,0.54,0.44,0.35,0.11,0
- 0.67,0.71,0.74,0.69,0.27,0.61,0.48,1
- 0.52,0.21,0.01,0.34,0.85,0.05,0.36,0
- 0.58,0.57,0.19,0
- 0.66,0.07,0.45,0.92,0
- 0.39,0.31,0.81,0
- 0.26,0.26,0.43,0
- 0.96,0.63,0.86,0.72,0.53,0.4,0.09,0.8,0.28,0
- 0.6,0
- 0.72,0.78,0.68,0
- 0.56,0.56,0.49,0.62,0.76,0.88,1
- 0.61,0.58,0
- 0.59,0.87,0
- 0.74,0.93,0
- 0.64,1
- 0.36,0.79,0.59,0.7,1
这是我的Torch Lua代码:
- -- add comma to separate thousands
- function comma_value(amount)
- local formatted = amount
- while true do
- formatted,k = string.gsub(formatted,"^(-?%d+)(%d%d%d)",'%1,%2')
- if (k==0) then
- break
- end
- end
- return formatted
- end
- -- function that computes the confusion matrix
- function confusion_matrix(predictionTestVect,truthVect,threshold,printValues)
- local tp = 0
- local tn = 0
- local fp = 0
- local fn = 0
- local MatthewsCC = -2
- local accuracy = -2
- local arrayFPindices = {}
- local arrayFPvalues = {}
- local arrayTPvalues = {}
- local areaRoc = 0
- local fpRateVett = {}
- local tpRateVett = {}
- local precisionVett = {}
- local recallVett = {}
- for i=1,#predictionTestVect do
- if printValues == true then
- io.write("predictionTestVect["..i.."] = ".. round(predictionTestVect[i],4).."\ttruthVect["..i.."] = "..truthVect[i].." ");
- io.flush();
- end
- if predictionTestVect[i] >= threshold and truthVect[i] >= threshold then
- tp = tp + 1
- arrayTPvalues[#arrayTPvalues+1] = predictionTestVect[i]
- if printValues == true then print(" TP ") end
- elseif predictionTestVect[i] < threshold and truthVect[i] >= threshold then
- fn = fn + 1
- if printValues == true then print(" FN ") end
- elseif predictionTestVect[i] >= threshold and truthVect[i] < threshold then
- fp = fp + 1
- if printValues == true then print(" FP ") end
- arrayFPindices[#arrayFPindices+1] = i;
- arrayFPvalues[#arrayFPvalues+1] = predictionTestVect[i]
- elseif predictionTestVect[i] < threshold and truthVect[i] < threshold then
- tn = tn + 1
- if printValues == true then print(" TN ") end
- end
- end
- print("TOTAL:")
- print(" FN = "..comma_value(fn).." / "..comma_value(tonumber(fn+tp)).."\t (truth == 1) & (prediction < threshold)");
- print(" TP = "..comma_value(tp).." / "..comma_value(tonumber(fn+tp)).."\t (truth == 1) & (prediction >= threshold)\n");
- print(" FP = "..comma_value(fp).." / "..comma_value(tonumber(fp+tn)).."\t (truth == 0) & (prediction >= threshold)");
- print(" TN = "..comma_value(tn).." / "..comma_value(tonumber(fp+tn)).."\t (truth == 0) & (prediction < threshold)\n");
- local continueLabel = true
- if continueLabel then
- upperMCC = (tp*tn) - (fp*fn)
- innerSquare = (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)
- lowerMCC = math.sqrt(innerSquare)
- MatthewsCC = -2
- if lowerMCC>0 then MatthewsCC = upperMCC/lowerMCC end
- local signedMCC = MatthewsCC
- print("signedMCC = "..signedMCC)
- if MatthewsCC > -2 then print("\n::::\tMatthews correlation coefficient = "..signedMCC.."\t::::\n");
- else print("Matthews correlation coefficient = NOT computable"); end
- accuracy = (tp + tn)/(tp + tn +fn + fp)
- print("accuracy = "..round(accuracy,2).. " = (tp + tn) / (tp + tn +fn + fp) \t \t [worst = -1,best = +1]");
- local f1_score = -2
- if (tp+fp+fn)>0 then
- f1_score = (2*tp) / (2*tp+fp+fn)
- print("f1_score = "..round(f1_score,2).." = (2*tp) / (2*tp+fp+fn) \t [worst = 0,best = 1]");
- else
- print("f1_score CANNOT be computed because (tp+fp+fn)==0")
- end
- local totalRate = 0
- if MatthewsCC > -2 and f1_score > -2 then
- totalRate = MatthewsCC + accuracy + f1_score
- print("total rate = "..round(totalRate,2).." in [-1,+3] that is "..round((totalRate+1)*100/4,2).."% of possible correctness");
- end
- local numberOfPredictedOnes = tp + fp;
- print("numberOfPredictedOnes = (TP + FP) = "..comma_value(numberOfPredictedOnes).." = "..round(numberOfPredictedOnes*100/(tp + tn + fn + fp),2).."%");
- io.write("\nDiagnosis: ");
- if (fn >= tp and (fn+tp)>0) then print("too many FN false negatives"); end
- if (fp >= tn and (fp+tn)>0) then print("too many FP false positives"); end
- if (tn > (10*fp) and tp > (10*fn)) then print("Excellent ! ! !");
- elseif (tn > (5*fp) and tp > (5*fn)) then print("Very good ! !");
- elseif (tn > (2*fp) and tp > (2*fn)) then print("Good !");
- elseif (tn >= fp and tp >= fn) then print("Alright");
- else print("Baaaad"); end
- end
- return {accuracy,arrayFPindices,arrayFPvalues,MatthewsCC};
- end
- -- Permutations
- -- tab = {1,2,3,4,5,6,7,8,9,10}
- -- permute(tab,10,10)
- function permute(tab,n,count)
- n = n or #tab
- for i = 1,count or n do
- local j = math.random(i,n)
- tab[i],tab[j] = tab[j],tab[i]
- end
- return tab
- end
- -- round a real value
- function round(num,idp)
- local mult = 10^(idp or 0)
- return math.floor(num * mult + 0.5) / mult
- end
- -- ##############################3
- local profile_vett = {}
- local csv = require("csv")
- local fileName = "dataset_file.csv"
- print("Readin' "..tostring(fileName))
- local f = csv.open(fileName)
- local column_names = {}
- local j = 0
- for fields in f:lines() do
- if j>0 then
- profile_vett[j] = {}
- for i,v in ipairs(fields) do
- profile_vett[j][i] = tonumber(v);
- end
- j = j + 1
- else
- for i,v in ipairs(fields) do
- column_names[i] = v
- end
- j = j + 1
- end
- end
- OPTIM_PACKAGE = true
- local output_number = 1
- THRESHOLD = 0.5 -- ORIGINAL
- DROPOUT_FLAG = false
- MOMENTUM = false
- MOMENTUM_ALPHA = 0.5
- MAX_MSE = 4
- LEARN_RATE = 0.001
- ITERATIONS = 100
- local hidden_units = 2000
- local hidden_layers = 1
- local hiddenUnitVect = {2000,4000,6000,8000,10000}
- -- local hiddenLayerVect = {1,5}
- local hiddenLayerVect = {1}
- local profile_vett_data = {}
- local label_vett = {}
- for i=1,#profile_vett do
- profile_vett_data[i] = {}
- for j=1,#(profile_vett[1]) do
- if j<#(profile_vett[1]) then
- profile_vett_data[i][j] = profile_vett[i][j]
- else
- label_vett[i] = profile_vett[i][j]
- end
- end
- end
- print("Number of value profiles (rows) = "..#profile_vett_data);
- print("Number features (columns) = "..#(profile_vett_data[1]));
- print("Number of targets (rows) = "..#label_vett);
- local table_row_outcome = label_vett
- local table_rows_vett = profile_vett
- -- ########################################################
- -- START
- local indexVect = {};
- for i=1,#table_rows_vett do indexVect[i] = i; end
- permutedIndexVect = permute(indexVect,#indexVect,#indexVect);
- TEST_SET_PERC = 20
- local test_set_size = round((TEST_SET_PERC*#table_rows_vett)/100)
- print("training_set_size = "..(#table_rows_vett-test_set_size).." elements");
- print("test_set_size = "..test_set_size.." elements\n");
- local train_table_row_profile = {}
- local test_table_row_profile = {}
- local original_test_indexes = {}
- for i=1,#table_rows_vett do
- if i<=(tonumber(#table_rows_vett)-test_set_size) then
- train_table_row_profile[#train_table_row_profile+1] = {torch.Tensor(table_rows_vett[permutedIndexVect[i]]),torch.Tensor{table_row_outcome[permutedIndexVect[i]]}}
- else
- original_test_indexes[#original_test_indexes+1] = permutedIndexVect[i];
- test_table_row_profile[#test_table_row_profile+1] = {torch.Tensor(table_rows_vett[permutedIndexVect[i]]),torch.Tensor{table_row_outcome[permutedIndexVect[i]]}}
- end
- end
- require 'nn'
- perceptron = nn.Sequential()
- input_number = #table_rows_vett[1]
- perceptron:add(nn.Linear(input_number,hidden_units))
- perceptron:add(nn.Sigmoid())
- if DROPOUT_FLAG==true then perceptron:add(nn.Dropout()) end
- for w=1,hidden_layers do
- perceptron:add(nn.Linear(hidden_units,hidden_units))
- perceptron:add(nn.Sigmoid())
- if DROPOUT_FLAG==true then perceptron:add(nn.Dropout()) end
- end
- perceptron:add(nn.Linear(hidden_units,output_number))
- function train_table_row_profile:size() return #train_table_row_profile end
- function test_table_row_profile:size() return #test_table_row_profile end
- -- OPTIMIZATION LOOPS
- local MCC_vect = {}
- for a=1,#hiddenUnitVect do
- for b=1,#hiddenLayerVect do
- local hidden_units = hiddenUnitVect[a]
- local hidden_layers = hiddenLayerVect[b]
- print("hidden_units = "..hidden_units.."\t output_number = "..output_number.." hidden_layers = "..hidden_layers)
- local criterion = nn.MSECriterion()
- local lossSum = 0
- local error_progress = 0
- require 'optim'
- local params,gradParams = perceptron:getParameters()
- local optimState = nil
- if MOMENTUM==true then
- optimState = {learningRate = LEARN_RATE}
- else
- optimState = {learningRate = LEARN_RATE,momentum = MOMENTUM_ALPHA }
- end
- local total_runs = ITERATIONS*#train_table_row_profile
- local loopIterations = 1
- for epoch=1,ITERATIONS do
- for k=1,#train_table_row_profile do
- -- Function feval
- local function feval(params)
- gradParams:zero()
- local thisProfile = train_table_row_profile[k][1]
- local thisLabel = train_table_row_profile[k][2]
- local thisPrediction = perceptron:forward(thisProfile)
- local loss = criterion:forward(thisPrediction,thisLabel)
- -- print("thisPrediction = "..round(thisPrediction[1],2).." thisLabel = "..thisLabel[1])
- lossSum = lossSum + loss
- error_progress = lossSum*100 / (loopIterations*MAX_MSE)
- if ((loopIterations*100/total_runs)*10)%10==0 then
- io.write("completion: ",round((loopIterations*100/total_runs),2).."%" )
- io.write(" (epoch="..epoch..")(element="..k..") loss = "..round(loss,2).." ")
- io.write("\terror progress = "..round(error_progress,5).."%\n")
- end
- local dloss_doutput = criterion:backward(thisPrediction,thisLabel)
- perceptron:backward(thisProfile,dloss_doutput)
- return loss,gradParams
- end
- optim.sgd(feval,params,optimState)
- loopIterations = loopIterations+1
- end
- end
- local correctPredictions = 0
- local atleastOneTrue = false
- local atleastOneFalse = false
- local predictionTestVect = {}
- local truthVect = {}
- for i=1,#test_table_row_profile do
- local current_label = test_table_row_profile[i][2][1]
- local prediction = perceptron:forward(test_table_row_profile[i][1])[1]
- predictionTestVect[i] = prediction
- truthVect[i] = current_label
- local labelResult = false
- if current_label >= THRESHOLD and prediction >= THRESHOLD then
- labelResult = true
- elseif current_label < THRESHOLD and prediction < THRESHOLD then
- labelResult = true
- end
- if labelResult==true then correctPredictions = correctPredictions + 1; end
- print("\nCorrect predictions = "..round(correctPredictions*100/#test_table_row_profile,2).."%")
- local printValues = false
- local output_confusion_matrix = confusion_matrix(predictionTestVect,THRESHOLD,printValues)
- end
- end
有没有人知道为什么我的脚本只预测零元素?
编辑:我用原始数据集替换了我在脚本中使用的规范化版本
解决方法
当我运行您的原始代码时,我有时会预测所有零,我有时会获得完美的性能.这表明您的原始模型对参数值的初始化非常敏感.
如果我使用种子值torch.manualSeed(0)(所以我们总是有相同的初始化),我每次都会得到完美的表现.但这不是一般的解决方案.
为了获得更全面的改进,我做了以下更改:
>减少隐藏单位的数量.在原始代码中你有一个
单个隐藏层的2000个单位.但是你只有34个输入和
1输出通常你只需要隐藏单位的数量
输入和输出的数量之间.我减少了它
50.
>标签是不对称的,只有5/27(19%)的标签是1,所以你应该真正划分列车|测试集,以保持1与0的比率.目前我只是将测试集大小增加到’50’%.
>我也将学习率提高到’0.01′,开启MOMENTUM,并将ITERATIONS增加到200.
当我运行这个模型20次(未播种)时,我获得了19次优异的表现.为了进一步改进,您可以进一步调整超参数.并且还应该使用单独的验证集来查看多个初始化,以选择“最佳”模型(尽管这将进一步细分已经非常小的数据集).
- -- add comma to separate thousands
- function comma_value(amount)
- local formatted = amount
- while true do
- formatted,v in ipairs(fields) do
- column_names[i] = v
- end
- j = j + 1
- end
- end
- OPTIM_PACKAGE = true
- local output_number = 1
- THRESHOLD = 0.5 -- ORIGINAL
- DROPOUT_FLAG = false
- MOMENTUM_ALPHA = 0.5
- MAX_MSE = 4
- -- CHANGE: increased learn_rate to 0.01,reduced hidden units to 50,turned momentum on,increased iterations to 200
- LEARN_RATE = 0.01
- local hidden_units = 50
- MOMENTUM = true
- ITERATIONS = 200
- -------------------------------------
- local hidden_layers = 1
- local hiddenUnitVect = {2000,#(profile_vett[1]) do
- if j<#(profile_vett[1]) then
- profile_vett_data[i][j] = profile_vett[i][j]
- else
- label_vett[i] = profile_vett[i][j]
- end
- end
- end
- print("Number of value profiles (rows) = "..#profile_vett_data);
- print("Number features (columns) = "..#(profile_vett_data[1]));
- print("Number of targets (rows) = "..#label_vett);
- local table_row_outcome = label_vett
- local table_rows_vett = profile_vett
- -- ########################################################
- -- START
- -- Seed random number generator
- -- torch.manualSeed(0)
- local indexVect = {};
- for i=1,#indexVect);
- -- CHANGE: increase test_set to 50%
- TEST_SET_PERC = 50
- ---------------------------
- local test_set_size = round((TEST_SET_PERC*#table_rows_vett)/100)
- print("training_set_size = "..(#table_rows_vett-test_set_size).." elements");
- print("test_set_size = "..test_set_size.." elements\n");
- local train_table_row_profile = {}
- local test_table_row_profile = {}
- local original_test_indexes = {}
- for i=1,printValues)
- end
- end
- end
下面粘贴的是20次运行中的1次输出:
- Correct predictions = 100%
- TOTAL:
- FN = 0 / 4 (truth == 1) & (prediction < threshold)
- TP = 4 / 4 (truth == 1) & (prediction >= threshold)
- FP = 0 / 9 (truth == 0) & (prediction >= threshold)
- TN = 9 / 9 (truth == 0) & (prediction < threshold)
- signedMCC = 1
- :::: Matthews correlation coefficient = 1 ::::
- accuracy = 1 = (tp + tn) / (tp + tn +fn + fp) [worst = -1,best = +1]
- f1_score = 1 = (2*tp) / (2*tp+fp+fn) [worst = 0,best = 1]
- total rate = 3 in [-1,+3] that is 100% of possible correctness
- numberOfPredictedOnes = (TP + FP) = 4 = 30.77%
- Diagnosis: Excellent ! ! !