1 #ifndef VIENNACL_GENERATOR_GENERATE_HPP
2 #define VIENNACL_GENERATOR_GENERATE_HPP
52 typedef std::pair<expression_descriptor, generator::profile_base::statements_type> representation_node_type;
53 typedef std::vector<representation_node_type> statements_type;
54 typedef std::map<forced_profile_key_type, tools::shared_ptr<profile_base> > forced_profiles_type;
67 res = res || is_lhs_flow_transposed(statement, expr[root_node.
lhs.
node_index]);
69 res = res || is_lhs_flow_transposed(statement, expr[root_node.
rhs.
node_index]);
78 return is_flow_transposed(statement, expr[root_node.
lhs.
node_index]);
87 return is_flow_transposed(statement, expr[root_node.
rhs.
node_index]);
106 fill_expression_descriptor_scalar(statement, expr[root_node.
lhs.
node_index],descriptor);
108 fill_expression_descriptor_scalar(statement, expr[root_node.
rhs.
node_index],descriptor);
123 if(is_lhs_flow_transposed(statement,root_node))
129 fill_expression_descriptor_vector(statement, expr[root_node.
lhs.
node_index],descriptor);
131 fill_expression_descriptor_vector(statement, expr[root_node.
rhs.
node_index],descriptor);
146 bool lhs_trans = is_lhs_flow_transposed(statement,root_node);
147 bool rhs_trans = is_rhs_flow_transposed(statement,root_node);
148 if(!lhs_trans && !rhs_trans)
150 else if(lhs_trans && !rhs_trans)
152 else if(!lhs_trans && rhs_trans)
154 else if(lhs_trans && rhs_trans)
159 fill_expression_descriptor_matrix(statement, expr[root_node.
lhs.
node_index],descriptor);
161 fill_expression_descriptor_matrix(statement, expr[root_node.
rhs.
node_index],descriptor);
167 descriptor.scalartype_size = utils::call_on_element(root_node.
lhs, utils::scalartype_size_fun());
171 fill_expression_descriptor_vector(statement,root_node,descriptor);
176 fill_expression_descriptor_matrix(statement,root_node,descriptor);
181 fill_expression_descriptor_scalar(statement,root_node,descriptor);
189 template<
class StatementsType>
190 void set_expression_arguments(profile_base
const & profile,
unsigned int device_offset, StatementsType
const & statements,
unsigned int & kernel_id,
viennacl::ocl::program & p, std::list<viennacl::ocl::kernel *> & kernels)
const {
191 for(
vcl_size_t i = 0 ; i < profile.num_kernels() ; ++i){
194 std::sprintf(str,
"kernel_%d_%d",device_offset,kernel_id);
196 kernels.push_back(&kernel);
197 unsigned int current_arg = 0;
199 profile.configure_range_enqueue_arguments(i, statements, kernel, current_arg);
200 std::set<void *> memory;
201 for(
typename StatementsType::const_iterator it = statements.begin() ; it != statements.end() ; ++it){
202 detail::traverse(it->first, it->second, detail::set_arguments_functor(memory,current_arg,kernel));
209 profile_base
const & get_profile(
viennacl::ocl::device const & device, expression_descriptor
const & descriptor)
const {
210 forced_profiles_type::const_iterator it = forced_profiles_.find(std::make_pair(descriptor.type, descriptor.scalartype_size));
211 if(it != forced_profiles_.end())
213 return *profiles::get(device,descriptor);
220 statements_.reserve(16);
234 fill_descriptor(statement, root_node, descriptor);
237 if(statements_.empty())
240 if(statements_.back().first == descriptor)
241 statements_.back().second.push_back(std::make_pair(statement, root_node));
249 unsigned int kernel_id = 0;
250 std::vector<viennacl::ocl::device>::const_iterator found = std::find(ctx_.
devices().begin(),ctx_.
devices().end(),ctx_.
current_device());
251 for(statements_type::const_iterator it = statements_.begin() ; it != statements_.end() ; ++it)
252 set_expression_arguments(get_profile(ctx_.
current_device(), it->first), static_cast<unsigned int>(std::distance(ctx_.
devices().begin(), found)), it->second, kernel_id, p, kernels);
257 unsigned int current_arg = 0;
258 void* memory[64] = {NULL};
259 for(statements_type::const_iterator it = statements_.begin() ; it != statements_.end() ; ++it){
260 for(profile_base::statements_type::const_iterator iit = it->second.begin() ; iit != it->second.end() ; ++iit){
272 stream <<
"#if defined(cl_khr_fp64)\n";
273 stream <<
"# pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
274 stream <<
"#elif defined(cl_amd_fp64)\n";
275 stream <<
"# pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
276 stream <<
"#endif\n";
280 for(std::vector<viennacl::ocl::device>::const_iterator it = ctx_.
devices().begin() ; it != ctx_.
devices().end() ; ++it)
281 for(statements_type::const_iterator iit = statements_.begin() ; iit != statements_.end() ; ++iit)
282 get_profile(*it,iit->first)(stream,device_offset++,iit->second);
295 for(std::vector<viennacl::ocl::device>::const_iterator it = ctx_.
devices().begin() ; it != ctx_.
devices().end() ; ++it)
296 for(statements_type::const_iterator iit = statements_.begin() ; iit != statements_.end() ; ++iit)
297 get_profile(*it,iit->first)(stream,device_offset++,iit->second);
298 std::string res = stream.
str();
340 statements_type statements_;
342 forced_profiles_type forced_profiles_;
352 char* program_name =
new char[256];
354 if(force_recompilation)
358 #ifdef VIENNACL_DEBUG_BUILD
359 std::cout <<
"Building " << program_name <<
"..." << std::endl;
360 std::cout << source_code << std::endl;
366 delete[] program_name;
373 std::list<viennacl::ocl::kernel*> kernels;
375 for(std::list<viennacl::ocl::kernel*>::iterator it = kernels.begin() ; it != kernels.end() ; ++it){
397 gen.
add(s,root_node);
statement_node_subtype subtype
Definition: forwards.h:270
Definition: forwards.h:61
A stream class where the kernel sources are streamed to. Takes care of indentation of the sources...
Definition: utils.hpp:233
Definition: forwards.h:60
std::size_t vcl_size_t
Definition: forwards.h:58
void delete_program(std::string const &name)
Delete the program with the provided name.
Definition: context.hpp:401
Definition: forwards.h:83
code_generator(viennacl::ocl::context const &ctx=viennacl::ocl::current_context())
The constructor.
Definition: generate.hpp:219
viennacl::ocl::program & get_configured_program(viennacl::generator::code_generator const &generator, std::list< viennacl::ocl::kernel * > &kernels, bool force_recompilation=false)
Creates the program associated with a generator object and fills the kernels. Checks the context for ...
Definition: generate.hpp:351
vcl_size_t node_index
Definition: forwards.h:276
void enqueue(viennacl::generator::code_generator const &generator, bool force_recompilation=false)
Set the arguments and enqueue a generator object.
Definition: generate.hpp:372
void force_profile(forced_profile_key_type key, T const &t)
Force the generator to use a specific profile for an operation.
Definition: generate.hpp:225
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
lhs_rhs_element lhs
Definition: forwards.h:422
Definition: forwards.h:176
std::string str()
Definition: utils.hpp:255
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
Definition: context.hpp:340
Definition: forwards.h:190
Functor to map the statements to the types defined in mapped_objects.hpp.
Definition: forwards.h:59
std::list< std::pair< scheduler::statement, scheduler::statement_node > > statements_type
Definition: profile_base.hpp:49
Vendor-specific parameters for the generated kernels.
Definition: forwards.h:54
Definition: forwards.h:58
A class for holding meta information such as the type or the underlying scalar type of an expression ...
Definition: forwards.h:84
Definition: forwards.h:56
Definition: forwards.h:57
Definition: forwards.h:47
lhs_rhs_element rhs
Definition: forwards.h:424
Helper class for the OpenCL kernel generator, representing a statement.
Definition: statement_representation_functor.hpp:52
void configure_program(viennacl::ocl::program &p, std::list< viennacl::ocl::kernel * > &kernels) const
Set the arguments for a program previously generated by the generator and fills the kernels...
Definition: generate.hpp:248
viennacl::ocl::program & get_program(std::string const &name)
Returns the program with the provided name.
Definition: context.hpp:414
std::string get_cuda_device_code(viennacl::scheduler::statement const &s)
Convenience function to get the CUDA device code for a single statement.
Definition: generate.hpp:388
viennacl::ocl::device const & current_device() const
Returns the current device.
Definition: context.hpp:95
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
Definition: forwards.h:44
Definition: forwards.h:170
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
statement_node_type_family
Groups the type of a node in the statement tree. Used for faster dispatching.
Definition: forwards.h:162
Definition: forwards.h:52
void generate_enqueue_statement(viennacl::scheduler::statement const &s, scheduler::statement_node const &root_node)
Generate and enqueue a statement plus root_node into the current queue.
Definition: generate.hpp:395
Definition: forwards.h:62
Definition: forwards.h:55
std::string make_cuda_program_string() const
Creates the CUDA device code from the set of expressions in the object.
Definition: generate.hpp:291
Definition: forwards.h:43
viennacl::ocl::context & current_context()
Convenience function for returning the current context.
Definition: backend.hpp:192
Definition: forwards.h:173
Wrapper class for an OpenCL program.
Definition: program.hpp:40
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
std::vector< value_type > container_type
Definition: forwards.h:452
std::string make_opencl_program_string() const
Creates the OpenCL program string from the set of expressions in the object.
Definition: generate.hpp:268
Definition: forwards.h:45
Definition: forwards.h:96
Definition: forwards.h:53
container_type const & array() const
Definition: forwards.h:473
viennacl::ocl::kernel & get_kernel(std::string const &name)
Returns the kernel with the provided name.
Definition: context.hpp:638
Definition: forwards.h:95
Class for handling code generation.
Definition: generate.hpp:47
bool add(scheduler::statement const &statement, scheduler::statement_node const &root_node)
Add a statement and the root node to the expression list.
Definition: generate.hpp:232
Definition: forwards.h:48
Definition: forwards.h:42
Functor to set the arguments of a statement into a kernel.
Definition: forwards.h:101
statement_node_type_family type_family
Definition: forwards.h:269
The main class for representing a statement such as x = inner_prod(y,z); at runtime.
Definition: forwards.h:447
std::vector< viennacl::ocl::device > const & devices() const
Returns a vector with all devices in this context.
Definition: context.hpp:89
std::string get_opencl_program_string(viennacl::scheduler::statement const &s)
Convenience function to get the OpenCL program string for a single statement.
Definition: generate.hpp:381
std::pair< expression_type, vcl_size_t > forced_profile_key_type
typedef of the key used in the forced profiles. Contains the expression type and the size of the scal...
Definition: generate.hpp:50
Functor to generate the string id of a statement.
op_element op
Definition: forwards.h:423
void make_program_name(char *program_name) const
Creates an identifier string for the set of expressions in the object.
Definition: generate.hpp:256
expression_type_family type_family
Definition: forwards.h:90
Main datastructure for an node in the statement tree.
Definition: forwards.h:420
operation_node_type type
Definition: forwards.h:416
Definition: forwards.h:191
Definition: forwards.h:46
Definition: forwards.h:167