/* * Copyright (C) 2007 Fabio Checconi , * Paolo Valente * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include "wfq.h" /* * Shift for timestamp calculations. This actually limits the maximum * service allowed in one go (small shift values increase it,) and the * maximum total weight of a queue (big shift values increase it,) and * the period of virtual time wraparounds. */ #define WFQ_SERVICE_SHIFT 16 /** * wfq_gt - compare two timestamps * @a: first ts. * @b: second ts. * * Return @a > @b, dealing with wrapping correctly. */ static inline int wfq_gt(wfq_timestamp_t a, wfq_timestamp_t b) { return (s64)(a - b) > 0; } /** * wfq_max - max between two timestamps * @a: first ts. * @b: second ts. * * Return the max between two timestamps, handling their wraparound. */ static inline wfq_timestamp_t wfq_max(wfq_timestamp_t a, wfq_timestamp_t b) { return wfq_gt(a, b) ? a : b; } /** * wfq_delta - map service into the virtual time domain * @service: amount of service. * @weight: scale factor. */ static inline wfq_timestamp_t wfq_delta(wfq_service_t service, wfq_weight_t weight) { wfq_timestamp_t d = (wfq_timestamp_t)service << WFQ_SERVICE_SHIFT; do_div(d, weight); return d; } /** * wfq_update_finish - resync the finish time with the service received * @entity: the entity to update. * * The entity may have received less service than allocated, decrease its * finish time. This is called only for the entity under service. */ static inline void wfq_update_finish(struct wfq_entity *entity) { entity->finish -= wfq_delta(entity->size, entity->weight); entity->size = 0; } /** * wfq_calc_finish - assign the finish time to an entity * @entity: the entity to act upon. * @service: the amount of service requested. */ static inline void wfq_calc_finish(struct wfq_entity *entity, wfq_service_t service) { BUG_ON(entity->size != 0); entity->finish = entity->start + wfq_delta(service, entity->weight); entity->size = service; } /** * wfq_extract - remove an entity from a tree * @root: the tree root. * @entity: the entity to remove. */ static inline void wfq_extract(struct rb_root *root, struct wfq_entity *entity) { BUG_ON(entity->tree != root); entity->tree = NULL; rb_erase(&entity->node, root); } /** * wfq_entity_of - get an entity from a node * @node: the node field of the entity. * * Convert a node pointer to the relative entity. This is used only * to simplify the logic of some functions and not as the generic * conversion mechanism because, e.g., in the tree walking functions, * the check for a %NULL value would be redundant. */ static inline struct wfq_entity *wfq_entity_of(struct rb_node *node) { struct wfq_entity *entity = NULL; if (node != NULL) entity = rb_entry(node, struct wfq_entity, node); return entity; } /** * wfq_idle_extract - extract an entity from the idle tree * @queue: the queue containig the tree. * @entity: the entity to extract. */ static void wfq_idle_extract(struct wfq_queue *queue, struct wfq_entity *entity) { struct rb_node *next; BUG_ON(entity->tree != &queue->idle); if (entity == queue->first_idle) { next = rb_next(&entity->node); queue->first_idle = wfq_entity_of(next); } if (entity == queue->last_idle) { next = rb_prev(&entity->node); queue->last_idle = wfq_entity_of(next); } wfq_extract(&queue->idle, entity); } /** * wfq_forget_entity - remove an entity from a queue * @queue: the queue. * @entity: the entity being removed. * * Update the queue status and forget everything about @entity, putting * the queue reference to it. */ static void wfq_forget_entity(struct wfq_queue *queue, struct wfq_entity *entity) { queue->nr_entities--; queue->weight_sum -= entity->weight; wfq_entity_put(queue, entity); } /** * wfq_forget_idle - update the idle tree if necessary * @queue: the queue to act upon. * * To preserve the global O(log N) complexity we only remove one entry here; * as the idle tree will not grow indefinitely this can be done safely. */ static void wfq_forget_idle(struct wfq_queue *queue) { struct wfq_entity *first_idle = queue->first_idle; struct wfq_entity *last_idle = queue->last_idle; if (RB_EMPTY_ROOT(&queue->active) && last_idle != NULL) { /* * Forget the whole idle tree, increasing the vtime past * the last finish time of idle entities. */ queue->vtime = last_idle->finish; } if (first_idle != NULL && !wfq_gt(first_idle->finish, queue->vtime)) { wfq_idle_extract(queue, first_idle); wfq_forget_entity(queue, first_idle); } } /** * wfq_insert - generic tree insertion * @root: tree root. * @entity: entity to insert. * * This is used for the idle and the active tree, since they are both * ordered by finish time. */ static void wfq_insert(struct rb_root *root, struct wfq_entity *entity) { struct wfq_entity *entry; struct rb_node **node = &root->rb_node; struct rb_node *parent = NULL; while (*node != NULL) { parent = *node; entry = rb_entry(parent, struct wfq_entity, node); if (wfq_gt(entity->finish, entry->finish)) node = &parent->rb_right; else node = &parent->rb_left; } rb_link_node(&entity->node, parent, node); rb_insert_color(&entity->node, root); entity->tree = root; } /** * wfq_idle_insert - insert an entity into the idle tree * @queue: the queue containing the tree. * @entity: the entity to insert. */ static void wfq_idle_insert(struct wfq_queue *queue, struct wfq_entity *entity) { struct wfq_entity *first_idle = queue->first_idle; struct wfq_entity *last_idle = queue->last_idle; if (first_idle == NULL || wfq_gt(first_idle->finish, entity->finish)) queue->first_idle = entity; if (last_idle == NULL || wfq_gt(entity->finish, last_idle->finish)) queue->last_idle = entity; wfq_insert(&queue->idle, entity); } /** * wfq_update_min - update the min_start field of an entity * @entity: the entity to update. * @node: one of its children. * * This function is called when @entity may store an invalid value for * min_start due to updates to the active tree. It assumes that the subtree * rooted at @node (that may be its left or its right child) has a valid * min_start value. */ static inline void wfq_update_min(struct wfq_entity *entity, struct rb_node *node) { struct wfq_entity *child; if (node != NULL) { child = rb_entry(node, struct wfq_entity, node); if (wfq_gt(entity->min_start, child->min_start)) entity->min_start = child->min_start; } } /** * wfq_update_active_node - recalculate min_start * @node: the node to update. * * @node may have changed position or one of its children can have moved, * this function updates its min_start value. The left and right subtrees * are assumed to hold a correct min_start value. */ static inline void wfq_update_active_node(struct rb_node *node) { struct wfq_entity *entity = rb_entry(node, struct wfq_entity, node); entity->min_start = entity->start; wfq_update_min(entity, node->rb_right); wfq_update_min(entity, node->rb_left); } /** * wfq_update_active_tree - update min_start for the whole active tree * @node: the starting node. * * @node must be the deepest modified node after an update. This function * updates its min_start using the values held by its children, assuming * that they did not change, and then updates all the nodes that may have * changed in the path to the root. The only nodes that may have changed * are those in the path or their siblings. */ static void wfq_update_active_tree(struct rb_node *node) { struct rb_node *parent; up: wfq_update_active_node(node); parent = rb_parent(node); if (parent == NULL) return; if (node == parent->rb_left && parent->rb_right != NULL) wfq_update_active_node(parent->rb_right); else if (parent->rb_left != NULL) wfq_update_active_node(parent->rb_left); node = parent; goto up; } /** * wfq_active_insert - insert an entity in the active tree * @queue: the queue containing the tree. * @entity: the entity being inserted. * * The active tree is ordered by finish time, but an extra key is kept * per each node, containing the minimum value for the start times of * its children (and the node itself,) so it's possible to search for * the eligible node with the lowest finish time. */ static void wfq_active_insert(struct wfq_queue *queue, struct wfq_entity *entity) { struct rb_node *node = &entity->node; wfq_insert(&queue->active, entity); if (node->rb_left != NULL) node = node->rb_left; else if (node->rb_right != NULL) node = node->rb_right; wfq_update_active_tree(node); } /** * wfq_find_deepest - find the deepest node that an extraction can modify * @node: the node being removed. * * Do the first step of an extraction in an rb tree, looking for the * node that will replace @node, and returning the deepest node that * the following modifications to the tree can touch. If @node is the * last node in the tree return %NULL. */ static struct rb_node *wfq_find_deepest(struct rb_node *node) { struct rb_node *deepest; if (node->rb_right == NULL && node->rb_left == NULL) deepest = rb_parent(node); else if (node->rb_right == NULL) deepest = node->rb_left; else if (node->rb_left == NULL) deepest = node->rb_right; else { deepest = rb_next(node); if (deepest->rb_right != NULL) deepest = deepest->rb_right; else if (rb_parent(deepest) != node) deepest = rb_parent(deepest); } return deepest; } /** * wfq_active_extract - remove an entity from the active tree * @queue: the queue containing the tree. * @entity: the entity being removed. */ static void wfq_active_extract(struct wfq_queue *queue, struct wfq_entity *entity) { struct rb_node *node; node = wfq_find_deepest(&entity->node); wfq_extract(&queue->active, entity); if (node != NULL) wfq_update_active_tree(node); } void wfq_enqueue(struct wfq_queue *queue, struct wfq_entity *entity, wfq_service_t service) { if (entity == queue->active_entity) { BUG_ON(entity->tree != NULL); /* * If we are requeueing the current entity we have * to take care of not charging to it service it has * not received. */ wfq_update_finish(entity); entity->start = entity->finish; queue->active_entity = NULL; } else if (entity->tree != NULL) { /* * Must be on the idle tree, wfq_idle_extract() will * check for that. */ wfq_idle_extract(queue, entity); entity->start = wfq_max(entity->finish, queue->vtime); } else { /* * The finish time of the entity can be invalid, and * it is in the past for sure, otherwise the entity * would have been on the idle tree. */ entity->start = queue->vtime; queue->nr_entities++; queue->weight_sum += entity->weight; wfq_entity_get(queue, entity); } wfq_calc_finish(entity, service); wfq_active_insert(queue, entity); } void wfq_dequeue(struct wfq_queue *queue, struct wfq_entity *entity) { if (entity == queue->active_entity) { BUG_ON(entity->tree != NULL); wfq_update_finish(entity); queue->active_entity = NULL; } else wfq_active_extract(queue, entity); if (wfq_gt(entity->finish, queue->vtime)) wfq_idle_insert(queue, entity); else wfq_forget_entity(queue, entity); } /** * wfq_first_active - find the eligible entity with the smallest finish time * @queue: the queue to select from. * * This function searches the first schedulable entity, starting from the * root of the tree and going on the left every time on this side there is * a subtree with at least one eligible (start >= vtime) entity. The path * on the right is followed only if a) the left subtree contains no eligible * entity and b) no eligible entity has been found yet. */ static struct wfq_entity *wfq_first_active(struct wfq_queue *queue) { struct wfq_entity *entry, *first = NULL; struct rb_node *node = queue->active.rb_node; while (node != NULL) { entry = rb_entry(node, struct wfq_entity, node); left: if (!wfq_gt(entry->start, queue->vtime)) first = entry; BUG_ON(wfq_gt(entry->min_start, queue->vtime)); if (node->rb_left != NULL) { entry = rb_entry(node->rb_left, struct wfq_entity, node); if (!wfq_gt(entry->min_start, queue->vtime)) { node = node->rb_left; goto left; } } if (first != NULL) break; node = node->rb_right; } return first; } /** * wfq_update_vtime - update vtime if necessary * @queue: the queue to act upon. * * If necessary update the queue vtime to have at least one eligible * entity, skipping to its start time. Assumes that the active tree * of the queue is not empty. */ static void wfq_update_vtime(struct wfq_queue *queue) { struct wfq_entity *entry; struct rb_node *node = queue->active.rb_node; entry = rb_entry(node, struct wfq_entity, node); if (wfq_gt(entry->min_start, queue->vtime)) { queue->vtime = entry->min_start; wfq_forget_idle(queue); } } struct wfq_entity *wfq_next(struct wfq_queue *queue) { struct wfq_entity *entity; BUG_ON(queue->active_entity != NULL); if (RB_EMPTY_ROOT(&queue->active)) return NULL; wfq_update_vtime(queue); entity = wfq_first_active(queue); wfq_active_extract(queue, entity); queue->active_entity = entity; BUG_ON(wfq_gt(entity->start, queue->vtime)); return entity; } void wfq_update_size(struct wfq_queue *queue, struct wfq_entity *entity, wfq_service_t service) { BUG_ON(entity == queue->active_entity); BUG_ON(entity->tree != &queue->active); BUG_ON(entity->size > service); wfq_active_extract(queue, entity); wfq_calc_finish(entity, service); wfq_active_insert(queue, entity); } /** * wfq_served - update the scheduler status after service * @queue: the queue. * @served: the amount of service. */ void wfq_served(struct wfq_queue *queue, wfq_service_t served) { struct wfq_entity *active_entity = queue->active_entity; BUG_ON(active_entity == NULL); WARN_ON_ONCE(active_entity->size < served); active_entity->size -= served; queue->vtime += wfq_delta(served, queue->weight_sum); wfq_forget_idle(queue); } /* * XXX ugly, for userspace testing. */ #include "wfq-dbg.c" int wfq_init(void) { return 0; } void wfq_exit(void) { } #if 0 module_init(wfq_init); module_exit(wfq_exit); MODULE_AUTHOR("fabio,pv"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("WFQ test"); #endif