StarPU Internal Handbook
|
#include <starpu.h>
#include <common/config.h>
#include <common/starpu_spinlock.h>
#include <common/rwlock.h>
#include <common/timing.h>
#include <common/fxt.h>
#include <common/list.h>
#include <datawizard/interfaces/data_interface.h>
#include <datawizard/datastats.h>
#include <datawizard/memstats.h>
#include <datawizard/data_request.h>
Go to the source code of this file.
Data Structures | |
struct | _starpu_data_replicate |
struct | _starpu_jobid_list |
struct | _starpu_task_wrapper_list |
struct | _starpu_task_wrapper_dlist |
struct | _starpu_data_state |
Macros | |
#define | STARPU_UNMAPPED |
Typedefs | |
typedef void(* | _starpu_data_handle_unregister_hook) (starpu_data_handle_t) |
Enumerations | |
enum | _starpu_cache_state { STARPU_OWNER , STARPU_SHARED , STARPU_INVALID } |
Functions | |
int | _starpu_fetch_data_on_node (starpu_data_handle_t handle, int node, struct _starpu_data_replicate *replicate, enum starpu_data_access_mode mode, unsigned detached, struct starpu_task *task, enum starpu_is_prefetch is_prefetch, unsigned async, void(*callback_func)(void *), void *callback_arg, int prio, const char *origin) |
void | _starpu_release_data_on_node (struct _starpu_data_state *state, uint32_t default_wt_mask, enum starpu_data_access_mode down_to_mode, struct _starpu_data_replicate *replicate) |
void | _starpu_update_data_state (starpu_data_handle_t handle, struct _starpu_data_replicate *requesting_replicate, enum starpu_data_access_mode mode) |
uint32_t | _starpu_get_data_refcnt (struct _starpu_data_state *state, unsigned node) |
size_t | _starpu_data_get_size (starpu_data_handle_t handle) |
size_t | _starpu_data_get_alloc_size (starpu_data_handle_t handle) |
starpu_ssize_t | _starpu_data_get_max_size (starpu_data_handle_t handle) |
uint32_t | _starpu_data_get_footprint (starpu_data_handle_t handle) |
void | __starpu_push_task_output (struct _starpu_job *j) |
void | _starpu_push_task_output (struct _starpu_job *j) |
STARPU_ATTRIBUTE_WARN_UNUSED_RESULT int | _starpu_fetch_task_input (struct starpu_task *task, struct _starpu_job *j, int async) |
void | _starpu_fetch_task_input_tail (struct starpu_task *task, struct _starpu_job *j, struct _starpu_worker *worker) |
void | _starpu_fetch_nowhere_task_input (struct _starpu_job *j) |
int | _starpu_select_src_node (struct _starpu_data_state *state, unsigned destination) |
int | _starpu_determine_request_path (starpu_data_handle_t handle, int src_node, int dst_node, enum starpu_data_access_mode mode, int max_len, unsigned *src_nodes, unsigned *dst_nodes, unsigned *handling_nodes, unsigned write_invalidation) |
struct _starpu_data_request * | _starpu_create_request_to_fetch_data (starpu_data_handle_t handle, struct _starpu_data_replicate *dst_replicate, enum starpu_data_access_mode mode, struct starpu_task *task, enum starpu_is_prefetch is_prefetch, unsigned async, void(*callback_func)(void *), void *callback_arg, int prio, const char *origin) |
void | _starpu_init_data_replicate (starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, int workerid) |
void | _starpu_data_start_reduction_mode (starpu_data_handle_t handle) |
void | _starpu_data_end_reduction_mode (starpu_data_handle_t handle, int priority) |
void | _starpu_data_end_reduction_mode_terminate (starpu_data_handle_t handle) |
void | _starpu_data_unmap (starpu_data_handle_t handle, unsigned node) |
void | _starpu_data_set_unregister_hook (starpu_data_handle_t handle, _starpu_data_handle_unregister_hook func) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT |
Variables | |
int | _starpu_has_not_important_data |
struct _starpu_data_replicate |
this should contain the information relative to a given data replicate
Data Fields | ||
---|---|---|
starpu_data_handle_t | handle | |
void * | data_interface |
describe the actual data layout, as manipulated by data interfaces in *_interface.c |
int | refcnt |
How many requests or tasks are currently working with this replicate |
char | memory_node | |
enum _starpu_cache_state | state: 2 |
describes the state of the local data in term of coherency |
unsigned | relaxed_coherency:2 |
A buffer that is used for SCRATCH or reduction cannot be used with filters. |
unsigned | initialized:1 |
We may need to initialize the replicate with some value before using it. |
unsigned | allocated:1 |
is the data locally allocated ? |
unsigned | automatically_allocated:1 |
was it automatically allocated ? (else it's the application-provided buffer, don't ever try to free it!) perhaps the allocation was perform higher in the hierarchy for now this is just translated into !automatically_allocated |
unsigned | map_write:1 |
is the write side enabled on the mapping? This is important for drivers which may actually make a copy instead of a map. Only meaningful when mapped != STARPU_UNMAPPED |
int | mapped |
>= 0 when the data just a mapping of a replicate from that memory node, otherwise STARPU_UNMAPPED |
uint32_t | requested |
To help the scheduling policies to make some decision, we may keep a track of the tasks that are likely to request this data on the current node. It is the responsibility of the scheduling policy to set that flag when it assigns a task to a queue, policies which do not use this hint can simply ignore it. |
struct _starpu_data_request * | request[STARPU_MAXNODES] |
This tracks the list of requests to provide the value |
struct _starpu_data_request * | last_request[STARPU_MAXNODES] |
This points to the last entry of request, to easily append to the list |
struct _starpu_data_request * | load_request | |
unsigned | nb_tasks_prefetch |
The number of prefetches that we made for this replicate for various tasks This is also the number of tasks that we will wait to see use the mc before we attempt to evict it. |
struct _starpu_mem_chunk * | mc |
Pointer to memchunk for LRU strategy |
struct _starpu_jobid_list |
Data Fields | ||
---|---|---|
unsigned long | id | |
struct _starpu_jobid_list * | next |
struct _starpu_task_wrapper_list |
This structure describes a simply-linked list of task
Data Fields | ||
---|---|---|
struct starpu_task * | task | |
struct _starpu_task_wrapper_list * | next |
struct _starpu_task_wrapper_dlist |
This structure describes a doubly-linked list of task
Data Fields | ||
---|---|---|
struct starpu_task * | task | |
struct _starpu_task_wrapper_dlist * | next | |
struct _starpu_task_wrapper_dlist * | prev |
struct _starpu_data_state |
This is initialized in both _starpu_register_new_data and _starpu_data_partition
Data Fields | ||
---|---|---|
int | magic | |
struct _starpu_data_requester_prio_list | req_list | |
unsigned | refcnt |
the number of requests currently in the scheduling engine (not in the req_list anymore), i.e. the number of holders of the current_mode rwlock |
unsigned | unlocking_reqs |
whether we are already unlocking data requests |
enum starpu_data_access_mode | current_mode |
Current access mode. Is always either STARPU_R, STARPU_W, STARPU_SCRATCH or STARPU_REDUX, but never a combination such as STARPU_RW. |
struct _starpu_spinlock | header_lock |
protect meta data |
unsigned | busy_count |
Condition to make application wait for all transfers before freeing handle busy_count is the number of handle->refcnt, handle->per_node[*]->refcnt, number of starpu_data_requesters, and number of tasks that have released it but are still registered on the implicit data dependency lists. Core code which releases busy_count has to call _starpu_data_check_not_busy to let starpu_data_unregister proceed |
unsigned | busy_waiting |
Is starpu_data_unregister waiting for busy_count? |
starpu_pthread_mutex_t | busy_mutex | |
starpu_pthread_cond_t | busy_cond | |
struct _starpu_data_state * | root_handle |
In case we user filters, the handle may describe a sub-data |
struct _starpu_data_state * | father_handle |
root of the tree |
starpu_data_handle_t * | active_children |
father of the node, NULL if the current node is the root |
unsigned | active_nchildren |
The currently active set of read-write children |
starpu_data_handle_t ** | active_readonly_children | |
unsigned * | active_readonly_nchildren |
The currently active set of read-only children |
unsigned | nactive_readonly_children |
Size of active_readonly_children[i] array |
unsigned | nsiblings |
Size of active_readonly_children and active_readonly_nchildren arrays. Actual use is given by 'partitioned' Our siblings in the father partitioning |
starpu_data_handle_t * | siblings |
How many siblings |
unsigned | sibling_index | |
unsigned | depth |
indicate which child this node is from the father's perspective (if any) |
starpu_pthread_mutex_t | unpartition_mutex |
what's the depth of the tree ? |
starpu_data_handle_t | children |
Synchronous partitioning |
unsigned | nchildren | |
unsigned | nplans |
How many partition plans this handle has |
struct starpu_codelet * | switch_cl |
Switch codelet for asynchronous partitioning |
unsigned | switch_cl_nparts |
size of dyn_nodes recorded in switch_cl |
unsigned | partitioned |
Whether a partition plan is currently submitted and the corresponding unpartition has not been yet Or the number of partition plans currently submitted in readonly mode. |
unsigned | part_readonly:1 |
Whether a partition plan is currently submitted in readonly mode |
unsigned | active:1 |
Whether our father is currently partitioned into ourself |
unsigned | active_ro:1 | |
struct _starpu_data_replicate | per_node[STARPU_MAXNODES] |
describe the state of the data in term of coherency This is execution-time state. |
struct _starpu_data_replicate * | per_worker | |
struct starpu_data_interface_ops * | ops | |
uint32_t | footprint |
Footprint which identifies data layout |
unsigned | is_not_important:1 |
in some case, the application may explicitly tell StarPU that a piece of data is not likely to be used soon again |
unsigned | ooc:1 |
Can the data be pushed to the disk? |
unsigned | sequential_consistency:1 |
Does StarPU have to enforce some implicit data-dependencies ? |
unsigned | readonly:1 |
Whether we shall not ever write to this handle, thus allowing various optimizations |
int | home_node |
where is the data home, i.e. which node it was registered from ? -1 if none yet |
uint32_t | wt_mask |
what is the default write-through mask for that data ? |
unsigned | aliases |
for a readonly handle, the number of times that we have returned again the same handle and thus the number of times we have to ignore unregistration requests |
starpu_data_handle_t | readonly_dup |
for a non-readonly handle, a readonly-only duplicate, that we can return from starpu_data_dup_ro |
starpu_data_handle_t | readonly_dup_of |
for a readonly handle, the non-readonly handle that is referencing is in its readonly_dup field. |
unsigned | initialized:1 |
Is the data initialized, or a task is already submitted to initialize it This is submission-time initialization state. |
unsigned | removed_from_context_hash:1 | |
unsigned char | lazy_unregister |
Whether lazy unregistration was requested through starpu_data_unregister_submit |
starpu_pthread_mutex_t | sequential_consistency_mutex |
This lock should protect any operation to enforce sequential_consistency |
enum starpu_data_access_mode | last_submitted_mode |
The last submitted task (or application data request) that declared it would modify the piece of data ? Any task accessing the data in a read-only mode should depend on that task implicitly if the sequential_consistency flag is enabled. |
struct starpu_task * | last_sync_task | |
struct _starpu_task_wrapper_dlist | last_submitted_accessors | |
unsigned | last_submitted_ghost_sync_id_is_valid |
If FxT is enabled, we keep track of "ghost dependencies": that is to say the dependencies that are not needed anymore, but that should appear in the post-mortem DAG. For instance if we have the sequence f(Aw) g(Aw), and that g is submitted after the termination of f, we want to have f->g appear in the DAG even if StarPU does not need to enforce this dependency anymore. |
unsigned long | last_submitted_ghost_sync_id | |
struct _starpu_jobid_list * | last_submitted_ghost_accessors_id | |
struct _starpu_task_wrapper_list * | post_sync_tasks |
protected by sequential_consistency_mutex |
unsigned | post_sync_tasks_cnt | |
struct starpu_codelet * | redux_cl |
During reduction we need some specific methods: redux_func performs the reduction of an interface into another one (eg. "+="), and init_func initializes the data interface to a default value that is stable by reduction (eg. 0 for +=). |
struct starpu_codelet * | init_cl | |
void * | redux_cl_arg | |
void * | init_cl_arg | |
unsigned | reduction_refcnt |
Are we currently performing a reduction on that handle ? If so the reduction_refcnt should be non null until there are pending tasks that are performing the reduction. |
struct _starpu_data_requester_prio_list | reduction_req_list |
List of requesters that are specific to the pending reduction. This list is used when the requests in the req_list list are frozen until the end of the reduction. |
starpu_data_handle_t * | reduction_tmp_handles | |
struct _starpu_data_request * | write_invalidation_req |
Final request for write invalidation |
void * | mpi_data |
Used for MPI |
_starpu_memory_stats_t | memory_stats | |
unsigned int | mf_node | |
_starpu_data_handle_unregister_hook | unregister_hook |
hook to be called when unregistering the data |
struct starpu_arbiter * | arbiter | |
struct _starpu_data_requester_prio_list | arbitered_req_list |
This is protected by the arbiter mutex |
int | last_locality |
Data maintained by schedulers themselves Last worker that took this data in locality mode, or -1 if nobody took it yet |
unsigned | dimensions |
Application-provided coordinates. The maximum dimension (5) is relatively arbitrary. |
int | coordinates[5] | |
void * | user_data |
A generic pointer to data in the user land (could be anything and this is not manage by StarPU) |
void * | sched_data |
A generic pointer to data in the scheduler (could be anything and this is managed by the scheduler) |
int _starpu_fetch_data_on_node | ( | starpu_data_handle_t | handle, |
int | node, | ||
struct _starpu_data_replicate * | replicate, | ||
enum starpu_data_access_mode | mode, | ||
unsigned | detached, | ||
struct starpu_task * | task, | ||
enum starpu_is_prefetch | is_prefetch, | ||
unsigned | async, | ||
void(*)(void *) | callback_func, | ||
void * | callback_arg, | ||
int | prio, | ||
const char * | origin | ||
) |
This does not take a reference on the handle, the caller has to do it, e.g. through _starpu_attempt_to_submit_data_request_from_apps() detached means that the core is allowed to drop the request. The caller should thus not take a reference since it can not know whether the request will complete async means that _starpu_fetch_data_on_node will wait for completion of the request
void _starpu_release_data_on_node | ( | struct _starpu_data_state * | state, |
uint32_t | default_wt_mask, | ||
enum starpu_data_access_mode | down_to_mode, | ||
struct _starpu_data_replicate * | replicate | ||
) |
This releases a reference on the handle
void _starpu_push_task_output | ( | struct _starpu_job * | j | ) |
Version with driver trace
STARPU_ATTRIBUTE_WARN_UNUSED_RESULT int _starpu_fetch_task_input | ( | struct starpu_task * | task, |
struct _starpu_job * | j, | ||
int | async | ||
) |
Fetch the data parameters for task task
Setting async
to 1 allows to only start the fetches, and call _starpu_fetch_task_input_tail
later when the transfers are finished
struct _starpu_data_request * _starpu_create_request_to_fetch_data | ( | starpu_data_handle_t | handle, |
struct _starpu_data_replicate * | dst_replicate, | ||
enum starpu_data_access_mode | mode, | ||
struct starpu_task * | task, | ||
enum starpu_is_prefetch | is_prefetch, | ||
unsigned | async, | ||
void(*)(void *) | callback_func, | ||
void * | callback_arg, | ||
int | prio, | ||
const char * | origin | ||
) |
is_prefetch is whether the DSM may drop the request (when there is not enough memory for instance async is whether the caller wants a reference on the last request, to be able to wait for it (which will release that reference).