我实现了
Caffe C++ example的修改版本,而它的工作原理非常好,这是非常慢的,因为它只接受图像一个接一个.理想情况下,我想通过Caffe一个200张图像的矢量,并为每个图像返回最佳预测.我收到了一些
great help from Fanglin Wang,并实施了一些他的建议,但仍然有一些麻烦,找出如何从每个图像检索最好的结果.
Classify方法现在传递了一个矢量的cv :: Mat对象(变量input_channels),它是灰度浮点图像的向量.我已经消除了代码中的预处理方法,因为我不需要将这些图像转换为浮点数或减去平均图像.我也试图摆脱N变量,因为我只想返回每个图像的最高预测和概率.
#include "Classifier.h" using namespace caffe; using std::string; Classifier::Classifier(const string& model_file,const string& trained_file,const string& label_file) { #ifdef cpu_ONLY Caffe::set_mode(Caffe::cpu); #else Caffe::set_mode(Caffe::GPU); #endif /* Load the network. */ net_.reset(new Net<float>(model_file,TEST)); net_->CopyTrainedLayersFrom(trained_file); Blob<float>* input_layer = net_->input_blobs()[0]; num_channels_ = input_layer->channels(); input_geometry_ = cv::Size(input_layer->width(),input_layer->height()); /* Load labels. */ std::ifstream labels(label_file.c_str()); CHECK(labels) << "Unable to open labels file " << label_file; string line; while (std::getline(labels,line)) labels_.push_back(string(line)); Blob<float>* output_layer = net_->output_blobs()[0]; CHECK_EQ(labels_.size(),output_layer->channels()) << "Number of labels is different from the output layer dimension."; } static bool PairCompare(const std::pair<float,int>& lhs,const std::pair<float,int>& rhs) { return lhs.first > rhs.first; } /* Return the indices of the top N values of vector v. */ static std::vector<int> Argmax(const std::vector<float>& v,int N) { std::vector<std::pair<float,int> > pairs; for (size_t i = 0; i < v.size(); ++i) pairs.push_back(std::make_pair(v[i],i)); std::partial_sort(pairs.begin(),pairs.begin() + N,pairs.end(),PairCompare); std::vector<int> result; for (int i = 0; i < N; ++i) result.push_back(pairs[i].second); return result; } /* Return the top N predictions. */ std::vector<Prediction> Classifier::Classify(const std::vector<cv::Mat> &input_channels) { std::vector<float> output = Predict(input_channels); std::vector<int> maxN = Argmax(output,1); int idx = maxN[0]; predictions.push_back(std::make_pair(labels_[idx],output[idx])); return predictions; } std::vector<float> Classifier::Predict(const std::vector<cv::Mat> &input_channels,int num_images) { Blob<float>* input_layer = net_->input_blobs()[0]; input_layer->Reshape(num_images,num_channels_,input_geometry_.height,input_geometry_.width); /* Forward dimension change to all layers. */ net_->Reshape(); WrapInputLayer(&input_channels); net_->ForwardPrefilled(); /* Copy the output layer to a std::vector */ Blob<float>* output_layer = net_->output_blobs()[0]; const float* begin = output_layer->cpu_data(); const float* end = begin + num_images * output_layer->channels(); return std::vector<float>(begin,end); } /* Wrap the input layer of the network in separate cv::Mat objects (one per channel). This way we save one memcpy operation and we don't need to rely on cudaMemcpy2D. The last preprocessing operation will write the separate channels directly to the input layer. */ void Classifier::WrapInputLayer(std::vector<cv::Mat>* input_channels) { Blob<float>* input_layer = net_->input_blobs()[0]; int width = input_layer->width(); int height = input_layer->height(); float* input_data = input_layer->mutable_cpu_data(); for (int i = 0; i < input_layer->channels() * num_images; ++i) { cv::Mat channel(height,width,CV_32FC1,input_data); input_channels->push_back(channel); input_data += width * height; } }
UPDATE
非常感谢你的帮助ai,我做了你推荐的更改,但似乎正在得到一些奇怪的编译问题,我无法解决(我设法解决了一些问题).
这些是我所做的改变:
头文件:
#ifndef __CLASSIFIER_H__ #define __CLASSIFIER_H__ #include <caffe/caffe.hpp> #include <opencv2/core/core.hpp> #include <opencv2/highgui/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #include <algorithm> #include <iosfwd> #include <memory> #include <string> #include <utility> #include <vector> using namespace caffe; // NOLINT(build/namespaces) using std::string; /* Pair (label,confidence) representing a prediction. */ typedef std::pair<string,float> Prediction; class Classifier { public: Classifier(const string& model_file,const string& label_file); std::vector< std::pair<int,float> > Classify(const std::vector<cv::Mat>& img); private: std::vector< std::vector<float> > Predict(const std::vector<cv::Mat>& img,int nImages); void WrapInputLayer(std::vector<cv::Mat>* input_channels,int nImages); void Preprocess(const std::vector<cv::Mat>& img,std::vector<cv::Mat>* input_channels,int nImages); private: shared_ptr<Net<float> > net_; cv::Size input_geometry_; int num_channels_; std::vector<string> labels_; }; #endif /* __CLASSIFIER_H__ */
类文件:
#define cpu_ONLY #include "Classifier.h" using namespace caffe; // NOLINT(build/namespaces) using std::string; Classifier::Classifier(const string& model_file,TEST)); net_->CopyTrainedLayersFrom(trained_file); CHECK_EQ(net_->num_inputs(),1) << "Network should have exactly one input."; CHECK_EQ(net_->num_outputs(),1) << "Network should have exactly one output."; Blob<float>* input_layer = net_->input_blobs()[0]; num_channels_ = input_layer->channels(); CHECK(num_channels_ == 3 || num_channels_ == 1) << "Input layer should have 1 or 3 channels."; input_geometry_ = cv::Size(input_layer->width(),PairCompare); std::vector<int> result; for (int i = 0; i < N; ++i) result.push_back(pairs[i].second); return result; } std::vector< std::pair<int,float> > Classifier::Classify(const std::vector<cv::Mat>& img) { std::vector< std::vector<float> > output = Predict(img,img.size()); std::vector< std::pair<int,float> > predictions; for ( int i = 0 ; i < output.size(); i++ ) { std::vector<int> maxN = Argmax(output[i],output[idx])); } return predictions; } std::vector< std::vector<float> > Classifier::Predict(const std::vector<cv::Mat>& img,int nImages) { Blob<float>* input_layer = net_->input_blobs()[0]; input_layer->Reshape(nImages,input_geometry_.width); /* Forward dimension change to all layers. */ net_->Reshape(); std::vector<cv::Mat> input_channels; WrapInputLayer(&input_channels,nImages); Preprocess(img,&input_channels,nImages); net_->ForwardPrefilled(); /* Copy the output layer to a std::vector */ Blob<float>* output_layer = net_->output_blobs()[0]; std::vector <std::vector<float> > ret; for (int i = 0; i < nImages; i++) { const float* begin = output_layer->cpu_data() + i*output_layer->channels(); const float* end = begin + output_layer->channels(); ret.push_back( std::vector<float>(begin,end) ); } return ret; } /* Wrap the input layer of the network in separate cv::Mat objects * (one per channel). This way we save one memcpy operation and we * don't need to rely on cudaMemcpy2D. The last preprocessing * operation will write the separate channels directly to the input * layer. */ void Classifier::WrapInputLayer(std::vector<cv::Mat>* input_channels,int nImages) { Blob<float>* input_layer = net_->input_blobs()[0]; int width = input_layer->width(); int height = input_layer->height(); float* input_data = input_layer->mutable_cpu_data(); for (int i = 0; i < input_layer->channels()* nImages; ++i) { cv::Mat channel(height,input_data); input_channels->push_back(channel); input_data += width * height; } } void Classifier::Preprocess(const std::vector<cv::Mat>& img,int nImages) { for (int i = 0; i < nImages; i++) { vector<cv::Mat> channels; cv::split(img[i],channels); for (int j = 0; j < channels.size(); j++){ channels[j].copyTo((*input_channels)[i*num_channels_[0]+j]); } } }
解决方法
如果我正确地理解你的问题,你输入n个图像,期望n个(标签,prob),但只有一个这样的对.
我相信这些修改应该为你做点窍门:
> Classifier :: Predict应该返回一个向量<矢量<浮子> >是每个输入图像的概率向量.这是大小为output_layer-> channels()的向量大小为n的向量:
std::vector< std::vecot<float> > Classifier::Predict(const std::vector<cv::Mat> &input_channels,int num_images) { // same code here... /* changes here: Copy the output layer to a std::vector */ Blob<float>* output_layer = net_->output_blobs()[0]; std::vector< std::vector<float> > ret; for ( int i = 0 ; i < num_images ; i++ ) { const float* begin = output_layer->cpu_data() + i*output_layer->channels(); const float* end = begin + output_layer->channels(); ret.push_back( std::vector<float>(begin,end) ); } return ret; }
>在Classifier :: Classify中,您需要处理每个向量< float>通过Argmax独立:
std::vector< std::pair<int,float> > Classifier::Classify(const std::vector<cv::Mat> &input_channels) { std::vector< std::vector<float> > output = Predict(input_channels); std::vector< std::pair<int,float> > predictions; for ( int i = 0 ; i < output.size(); i++ ) { std::vector<int> maxN = Argmax(output[i],1); int idx = maxN[0]; predictions.push_back(std::make_pair(labels_[idx],output[idx])); } return predictions; }