1 #ifndef VIENNACL_GENERATOR_GENERATE_VECTOR_REDUCTION_HPP
2 #define VIENNACL_GENERATOR_GENERATE_VECTOR_REDUCTION_HPP
47 return m_*(k_+1)*scalartype_size;
56 return "Vec,M,K,NumGroups";
60 std::ostringstream oss;
64 <<
"," << num_groups_;
68 unsigned int m()
const {
return m_; }
70 unsigned int k()
const {
return k_; }
81 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
83 for(scheduler::statement::container_type::iterator iit = exprs.begin() ; iit != exprs.end() ; ++iit){
109 assert(
false &&
bool(
"unexpected expression tree"));
119 arguments_string += detail::generate_value_kernel_argument(
"unsigned int",
"M");
120 arguments_string += detail::generate_value_kernel_argument(
"unsigned int",
"N");
126 std::vector<detail::mapped_vector_reduction*> exprs;
127 for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it){
128 for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit){
132 p->bind_sizes(
"M",
"N");
138 std::string scalartype =
"float";
139 bool is_lhs_transposed =
false;
142 is_lhs_transposed =
true;
145 if(is_lhs_transposed)
148 for(std::vector<detail::mapped_vector_reduction*>::iterator it = exprs.begin() ; it != exprs.end() ; ++it){
149 stream <<
"__local " << (*it)->scalartype() <<
" buf" << std::distance(exprs.begin(), it) <<
'[' << lsize1*lsize2 <<
"];" << std::endl;
152 stream <<
"unsigned int lid0 = get_local_id(0);" << std::endl;
153 stream <<
"unsigned int lid1 = get_local_id(1);" << std::endl;
156 stream <<
"for(unsigned int r = get_global_id(0) ; r < " << size1 <<
" ; r += get_global_size(0)){" << std::endl;
160 stream << scalartype <<
" sum" <<
k <<
" = 0;" << std::endl;
162 stream <<
"for(unsigned int c = get_local_id(1) ; c < " <<
size2 <<
" ; c += get_local_size(1)){" << std::endl;
165 std::set<std::string> fetched;
167 for(std::vector<detail::mapped_vector_reduction*>::iterator it = exprs.begin() ; it != exprs.end() ; ++it){
170 if(is_lhs_transposed)
171 detail::fetch_all_lhs(fetched,statement,root_node, std::make_pair(
"c",
"r"),
vector_size_,stream,(*it)->mapping());
173 detail::fetch_all_lhs(fetched,statement,root_node, std::make_pair(
"r",
"c"),
vector_size_,stream,(*it)->mapping());
175 detail::fetch_all_rhs(fetched,statement,root_node, std::make_pair(
"c",
"0"),
vector_size_,stream,(*it)->mapping());
180 for(std::vector<detail::mapped_vector_reduction*>::iterator it = exprs.begin() ; it != exprs.end() ; ++it){
184 detail::generate_all_lhs(statement,root_node,std::make_pair(
"i",
"0"),-1,str,(*it)->mapping());
186 detail::generate_all_rhs(statement,root_node,std::make_pair(
"i",
"0"),-1,str,(*it)->mapping());
187 stream <<
" sum" << std::distance(exprs.begin(),it) <<
" += " << str <<
";" << std::endl;
192 stream <<
"}" << std::endl;
195 stream <<
"buf" <<
k <<
"[lid0*" << lsize2 <<
"+ lid1] = sum" <<
k <<
";" << std::endl;
199 stream <<
"barrier(CLK_LOCAL_MEM_FENCE); " << std::endl;
200 stream <<
"if(lid1 < " <<
stride <<
")" ;
201 stream <<
"{" << std::endl;
204 for(
vcl_size_t i = 0 ; i < exprs.size() ; ++i)
205 stream <<
"buf" << i <<
"[lid0*" << lsize2 <<
"+ lid1] += buf" << i <<
"[lid0*" << lsize2 <<
"+ lid1 + " << stride <<
"];" << std::endl;
208 stream <<
"}" << std::endl;
212 stream <<
"barrier(CLK_LOCAL_MEM_FENCE); " << std::endl;
213 stream <<
"if(lid1 == 0)" ;
214 stream <<
"{" << std::endl;
216 for(
vcl_size_t i = 0 ; i < exprs.size() ; ++i){
217 stream <<
"buf" << i <<
"[lid0*" << lsize2 <<
"] += buf" << i <<
"[lid0*" << lsize2 <<
"+ 1];" << std::endl;
221 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
223 detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair(
"r",
"0"), -1, str, mapping[i++]),
false);
224 stream << str <<
";" << std::endl;
227 stream <<
"}" << std::endl;
231 stream <<
"}" << std::endl;
238 unsigned int num_groups_;
A stream class where the kernel sources are streamed to. Takes care of indentation of the sources...
Definition: utils.hpp:233
void arg(unsigned int pos, cl_char val)
Sets a char argument at the provided position.
Definition: kernel.hpp:124
std::size_t vcl_size_t
Definition: forwards.h:58
Definition: forwards.h:83
vcl_size_t node_index
Definition: forwards.h:276
statement(container_type const &custom_array)
Definition: forwards.h:454
Internal utils for a dynamic OpenCL kernel generation.
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Base class for an operation profile.
Definition: profile_base.hpp:47
lhs_rhs_element lhs
Definition: forwards.h:422
Definition: forwards.h:176
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
unsigned int k() const
Definition: vector_reduction.hpp:70
Mapping of a matrix to a generator class.
Definition: mapped_objects.hpp:236
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
Functor for obtaining the internal number of columns of a ViennaCL matrix.
Definition: utils.hpp:188
std::list< std::pair< scheduler::statement, scheduler::statement_node > > statements_type
Definition: profile_base.hpp:49
vector_reduction(unsigned int vectorization, unsigned int m, unsigned int k, unsigned int num_groups)
The user constructor.
Definition: vector_reduction.hpp:52
lhs_rhs_element rhs
Definition: forwards.h:424
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
unsigned int m() const
Definition: vector_reduction.hpp:68
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
unsigned int num_groups() const
Definition: vector_reduction.hpp:72
several code generation helpers
void kernel_arguments(statements_type const &, std::string &arguments_string) const
Definition: vector_reduction.hpp:118
Base classes for the profiles.
Map ViennaCL objects to generator wrappers.
Functor for obtaining the internal number of rows of a ViennaCL matrix.
Definition: utils.hpp:181
void configure_local_sizes(viennacl::ocl::kernel &k, vcl_size_t) const
Definition: profile_base.hpp:59
void dec_tab()
Definition: utils.hpp:259
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
std::vector< value_type > container_type
Definition: forwards.h:452
std::string csv_representation() const
csv representation of an operation
Definition: vector_reduction.hpp:59
OpenCL kernel template for reductions resulting in a vector. Example: Computing the row norms of a ma...
Definition: vector_reduction.hpp:44
unsigned int vector_size_
Definition: profile_base.hpp:178
void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const &statements, viennacl::ocl::kernel &kernel, unsigned int &n_arg) const
Configures the range and enqueues the arguments associated with the profile.
Definition: vector_reduction.hpp:74
Definition: forwards.h:95
void inc_tab()
Definition: utils.hpp:257
std::string to_string(T const t)
Definition: utils.hpp:204
Mapping of a vector reduction (based on matrix-vector product)
Definition: mapped_objects.hpp:109
static std::string csv_format()
Definition: vector_reduction.hpp:55
statement_node_type_family type_family
Definition: forwards.h:269
viennacl::enable_if< viennacl::is_scalar< S1 >::value &&viennacl::is_scalar< S2 >::value >::type swap(S1 &s1, S2 &s2)
Swaps the contents of two scalars, data is copied.
Definition: scalar_operations.hpp:366
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:447
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
Main datastructure for an node in the statement tree.
Definition: forwards.h:420
Definition: forwards.h:167