cpufreq: interactive: Changes to interactive governor

Changes include:
* May scale up to intermediate speeds after scaling down, rather than
scale to max speed and then only scale down until max speed needed.

* Tweaked thresholds at which max speed requested (previously CPU must
have been 100% busy since idle exit timer started, now will go max if at
least 85% busy) and default minimum sample time raised to 80ms. Tweaking
based on UI tests, still in progress.

* SMP fixes.

* Fixed attempted multiple delete of sysfs group on governor stop.
Set a just-in-case-CPU-goes-busy-again timer even if nr_running == 0 at
timer function run time, but cancel if that CPU goes idle (and don't
re-arm timer if that CPU is currently idle).

* Re-evaluate speed if a CPU goes idle while above min speed (and no timer
currently set) in case the platform requires all CPUs to be at the same
speed.

* Realtime workqueues disappeared upstream, convert speed up workqueue to
a realtime task. Average scheduling latency measured significantly less
than WQ_HIGHPRI.

* Timers are not deferrable, must wake CPU from idle, since we now
re-evaluate speed for idle CPUs.

* CPU load is computed from higher of short-term load since idle exit vs.
long-term load since last frequency change, to avoid dropping speed
during temporary dips in load on long-term-busy CPU.

* Avoid 1 CPU starting new idle exit load eval interval in a race with
timer running on another CPU.

* New fugly debugging printfs should be reworked or go away eventually.

Change-Id: I606b5c1850637c35a7814309df12362d5c044825
via: https://review.source.android.com//#change,15809
This commit is contained in:
Todd Poynor 2010-11-06 19:22:42 -04:00 committed by Jon Benson
parent c8cec4c0c1
commit 7304611fc3
3 changed files with 489 additions and 162 deletions

View File

@ -187,23 +187,32 @@ default value of '20' it means that if the CPU usage needs to be below
2.6 Interactive
---------------
The CPUfreq governor "interactive" is designed for low latency,
The CPUfreq governor "interactive" is designed for latency-sensitive,
interactive workloads. This governor sets the CPU speed depending on
usage, similar to "ondemand" and "conservative" governors. However
there is no polling, or 'sample_rate' required to scale the CPU up.
usage, similar to "ondemand" and "conservative" governors. However,
the governor is more aggressive about scaling the CPU speed up in
response to CPU-intensive activity.
Sampling CPU load every X ms can lead to under powering the CPU
for X ms, leading to dropped framerate, stuttering UI etc..
Scaling the CPU up is done when coming out of idle, and like "ondemand"
scaling up will always go to MAX, then step down based off of cpu load.
Sampling the CPU load every X ms can lead to under-powering the CPU
for X ms, leading to dropped frames, stuttering UI, etc. Instead of
sampling the cpu at a specified rate, the interactive governor will
check whether to scale the cpu frequency up soon after coming out of
idle. When the cpu comes out of idle, a timer is configured to fire
within 1-2 ticks. If the cpu is very busy between exiting idle and
when the timer fires then we assume the cpu is underpowered and ramp
to MAX speed.
If the cpu was not sufficiently busy to immediately ramp to MAX speed,
then governor evaluates the cpu load since the last speed adjustment,
choosing th highest value between that longer-term load or the
short-term load since idle exit to determine the cpu speed to ramp to.
There is only one tuneable value for this governor:
min_sample_time: The ammount of time the CPU must spend (in uS)
at the current frequency before scaling DOWN. This is done to
more accurately determine the cpu workload and the best speed for that
workload. The default is 50ms.
min_sample_time: The minimum amount of time to spend at the current
frequency before ramping down. This is to ensure that the governor has
seen enough historic cpu load data to determine the appropriate
workload. Default is 80000 uS.
3. The Governor Interface in the CPUfreq Core

View File

@ -122,9 +122,10 @@ config CPU_FREQ_DEFAULT_GOV_INTERACTIVE
bool "interactive"
select CPU_FREQ_GOV_INTERACTIVE
help
Use the 'interactive' governor as default. This gets full cpu frequency
scaling for workloads that are latency sensitive, typically interactive
workloads.
Use the CPUFreq governor 'interactive' as default. This allows
you to get a full dynamic cpu frequency capable system by simply
loading your cpufreq low-level hardware driver, using the
'interactive' governor for latency-sensitive workloads.
endchoice
config CPU_FREQ_GOV_PERFORMANCE
@ -185,9 +186,8 @@ config CPU_FREQ_GOV_ONDEMAND
config CPU_FREQ_GOV_INTERACTIVE
tristate "'interactive' cpufreq governor"
help
'interactive' - This driver adds a dynamic cpufreq policy governor.
Designed for low latency burst workloads. Sclaing is done when
coming out idle instead of polling.
'interactive' - This driver adds a dynamic cpufreq policy governor
designed for latency-sensitive workloads
config CPU_FREQ_GOV_CONSERVATIVE
tristate "'conservative' cpufreq governor"

View File

@ -24,37 +24,132 @@
#include <linux/tick.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <asm/cputime.h>
static void (*pm_idle_old)(void);
static atomic_t active_count = ATOMIC_INIT(0);
static DEFINE_PER_CPU(struct timer_list, cpu_timer);
struct cpufreq_interactive_cpuinfo {
struct timer_list cpu_timer;
int timer_idlecancel;
u64 time_in_idle;
u64 idle_exit_time;
u64 timer_run_time;
int idling;
u64 freq_change_time;
u64 freq_change_time_in_idle;
struct cpufreq_policy *policy;
struct cpufreq_frequency_table *freq_table;
unsigned int target_freq;
int governor_enabled;
};
static DEFINE_PER_CPU(u64, time_in_idle);
static DEFINE_PER_CPU(u64, idle_exit_time);
static struct cpufreq_policy *policy;
static unsigned int target_freq;
static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
/* Workqueues handle frequency scaling */
static struct workqueue_struct *up_wq;
static struct task_struct *up_task;
static struct workqueue_struct *down_wq;
static struct work_struct freq_scale_work;
static u64 freq_change_time;
static u64 freq_change_time_in_idle;
static cpumask_t work_cpumask;
static struct work_struct freq_scale_down_work;
static cpumask_t up_cpumask;
static cpumask_t down_cpumask;
/*
* The minimum ammount of time to spend at a frequency before we can ramp down,
* default is 50ms.
* The minimum amount of time to spend at a frequency before we can ramp down.
*/
#define DEFAULT_MIN_SAMPLE_TIME 50000;
#define DEFAULT_MIN_SAMPLE_TIME 80000;
static unsigned long min_sample_time;
#define LOAD_SCALE_MAX 85
#define DEBUG 0
#define BUFSZ 128
#if DEBUG
#include <linux/proc_fs.h>
struct dbgln {
int cpu;
unsigned long jiffy;
unsigned long run;
char buf[BUFSZ];
};
#define NDBGLNS 256
static struct dbgln dbgbuf[NDBGLNS];
static int dbgbufs;
static int dbgbufe;
static struct proc_dir_entry *dbg_proc;
static spinlock_t dbgpr_lock;
static u64 up_request_time;
static unsigned int up_max_latency;
static void dbgpr(char *fmt, ...)
{
va_list args;
int n;
unsigned long flags;
spin_lock_irqsave(&dbgpr_lock, flags);
n = dbgbufe;
va_start(args, fmt);
vsnprintf(dbgbuf[n].buf, BUFSZ, fmt, args);
va_end(args);
dbgbuf[n].cpu = smp_processor_id();
dbgbuf[n].run = nr_running();
dbgbuf[n].jiffy = jiffies;
if (++dbgbufe >= NDBGLNS)
dbgbufe = 0;
if (dbgbufe == dbgbufs)
if (++dbgbufs >= NDBGLNS)
dbgbufs = 0;
spin_unlock_irqrestore(&dbgpr_lock, flags);
}
static void dbgdump(void)
{
int i, j;
unsigned long flags;
static struct dbgln prbuf[NDBGLNS];
spin_lock_irqsave(&dbgpr_lock, flags);
i = dbgbufs;
j = dbgbufe;
memcpy(prbuf, dbgbuf, sizeof(dbgbuf));
dbgbufs = 0;
dbgbufe = 0;
spin_unlock_irqrestore(&dbgpr_lock, flags);
while (i != j)
{
printk("%lu %d %lu %s",
prbuf[i].jiffy, prbuf[i].cpu, prbuf[i].run,
prbuf[i].buf);
if (++i == NDBGLNS)
i = 0;
}
}
static int dbg_proc_read(char *buffer, char **start, off_t offset,
int count, int *peof, void *dat)
{
printk("max up_task latency=%uus\n", up_max_latency);
dbgdump();
*peof = 1;
return 0;
}
#else
#define dbgpr(...) do {} while (0)
#endif
static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
unsigned int event);
@ -70,142 +165,329 @@ struct cpufreq_governor cpufreq_gov_interactive = {
static void cpufreq_interactive_timer(unsigned long data)
{
u64 delta_idle;
u64 update_time;
u64 *cpu_time_in_idle;
u64 *cpu_idle_exit_time;
struct timer_list *t;
u64 now_idle = get_cpu_idle_time_us(data,
&update_time);
cpu_time_in_idle = &per_cpu(time_in_idle, data);
cpu_idle_exit_time = &per_cpu(idle_exit_time, data);
if (update_time == *cpu_idle_exit_time)
return;
delta_idle = cputime64_sub(now_idle, *cpu_time_in_idle);
/* Scale up if there were no idle cycles since coming out of idle */
if (delta_idle == 0) {
if (policy->cur == policy->max)
return;
if (nr_running() < 1)
return;
target_freq = policy->max;
cpumask_set_cpu(data, &work_cpumask);
queue_work(up_wq, &freq_scale_work);
return;
}
unsigned int delta_idle;
unsigned int delta_time;
int cpu_load;
int load_since_change;
u64 time_in_idle;
u64 idle_exit_time;
struct cpufreq_interactive_cpuinfo *pcpu =
&per_cpu(cpuinfo, data);
u64 now_idle;
unsigned int new_freq;
unsigned int index;
/*
* There is a window where if the cpu utlization can go from low to high
* between the timer expiring, delta_idle will be > 0 and the cpu will
* be 100% busy, preventing idle from running, and this timer from
* firing. So setup another timer to fire to check cpu utlization.
* Do not setup the timer if there is no scheduled work.
* Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
* this lets idle exit know the current idle time sample has
* been processed, and idle exit can generate a new sample and
* re-arm the timer. This prevents a concurrent idle
* exit on that CPU from writing a new set of info at the same time
* the timer function runs (the timer function can't use that info
* until more time passes).
*/
t = &per_cpu(cpu_timer, data);
if (!timer_pending(t) && nr_running() > 0) {
*cpu_time_in_idle = get_cpu_idle_time_us(
data, cpu_idle_exit_time);
mod_timer(t, jiffies + 2);
time_in_idle = pcpu->time_in_idle;
idle_exit_time = pcpu->idle_exit_time;
now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
smp_wmb();
/* If we raced with cancelling a timer, skip. */
if (!idle_exit_time) {
dbgpr("timer %d: no valid idle exit sample\n", (int) data);
goto exit;
}
if (policy->cur == policy->min)
return;
#if DEBUG
if ((int) jiffies - (int) pcpu->cpu_timer.expires >= 10)
dbgpr("timer %d: late by %d ticks\n",
(int) data, jiffies - pcpu->cpu_timer.expires);
#endif
delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle);
delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
idle_exit_time);
/*
* If timer ran less than 1ms after short-term sample started, retry.
*/
if (delta_time < 1000) {
dbgpr("timer %d: time delta %u too short exit=%llu now=%llu\n", (int) data,
delta_time, idle_exit_time, pcpu->timer_run_time);
goto rearm;
}
if (delta_idle > delta_time)
cpu_load = 0;
else
cpu_load = 100 * (delta_time - delta_idle) / delta_time;
delta_idle = (unsigned int) cputime64_sub(now_idle,
pcpu->freq_change_time_in_idle);
delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
pcpu->freq_change_time);
if (delta_idle > delta_time)
load_since_change = 0;
else
load_since_change =
100 * (delta_time - delta_idle) / delta_time;
/*
* Choose greater of short-term load (since last idle timer
* started or timer function re-armed itself) or long-term load
* (since last frequency change).
*/
if (load_since_change > cpu_load)
cpu_load = load_since_change;
if (cpu_load >= LOAD_SCALE_MAX)
new_freq = pcpu->policy->max;
else
new_freq = pcpu->policy->max * cpu_load / 100;
if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
new_freq, CPUFREQ_RELATION_H,
&index)) {
dbgpr("timer %d: cpufreq_frequency_table_target error\n", (int) data);
goto rearm;
}
new_freq = pcpu->freq_table[index].frequency;
if (pcpu->target_freq == new_freq)
{
dbgpr("timer %d: load=%d, already at %d\n", (int) data, cpu_load, new_freq);
goto rearm_if_notmax;
}
/*
* Do not scale down unless we have been at this frequency for the
* minimum sample time.
*/
if (cputime64_sub(update_time, freq_change_time) < min_sample_time)
return;
target_freq = policy->min;
cpumask_set_cpu(data, &work_cpumask);
queue_work(down_wq, &freq_scale_work);
}
static void cpufreq_idle(void)
{
struct timer_list *t;
u64 *cpu_time_in_idle;
u64 *cpu_idle_exit_time;
pm_idle_old();
if (!cpumask_test_cpu(smp_processor_id(), policy->cpus))
return;
/* Timer to fire in 1-2 ticks, jiffie aligned. */
t = &per_cpu(cpu_timer, smp_processor_id());
cpu_idle_exit_time = &per_cpu(idle_exit_time, smp_processor_id());
cpu_time_in_idle = &per_cpu(time_in_idle, smp_processor_id());
if (timer_pending(t) == 0) {
*cpu_time_in_idle = get_cpu_idle_time_us(
smp_processor_id(), cpu_idle_exit_time);
mod_timer(t, jiffies + 2);
if (new_freq < pcpu->target_freq) {
if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time) <
min_sample_time) {
dbgpr("timer %d: load=%d cur=%d tgt=%d not yet\n", (int) data, cpu_load, pcpu->target_freq, new_freq);
goto rearm;
}
}
}
/*
* Choose the cpu frequency based off the load. For now choose the minimum
* frequency that will satisfy the load, which is not always the lower power.
*/
static unsigned int cpufreq_interactive_calc_freq(unsigned int cpu)
{
unsigned int delta_time;
unsigned int idle_time;
unsigned int cpu_load;
u64 current_wall_time;
u64 current_idle_time;;
dbgpr("timer %d: load=%d cur=%d tgt=%d queue\n", (int) data, cpu_load, pcpu->target_freq, new_freq);
current_idle_time = get_cpu_idle_time_us(cpu, &current_wall_time);
if (new_freq < pcpu->target_freq) {
pcpu->target_freq = new_freq;
cpumask_set_cpu(data, &down_cpumask);
queue_work(down_wq, &freq_scale_down_work);
} else {
pcpu->target_freq = new_freq;
#if DEBUG
up_request_time = ktime_to_us(ktime_get());
#endif
cpumask_set_cpu(data, &up_cpumask);
wake_up_process(up_task);
}
idle_time = (unsigned int) current_idle_time - freq_change_time_in_idle;
delta_time = (unsigned int) current_wall_time - freq_change_time;
rearm_if_notmax:
/*
* Already set max speed and don't see a need to change that,
* wait until next idle to re-evaluate, don't need timer.
*/
if (pcpu->target_freq == pcpu->policy->max)
goto exit;
if (delta_time == 0)
return policy->cur;
rearm:
if (!timer_pending(&pcpu->cpu_timer)) {
/*
* If already at min: if that CPU is idle, don't set timer.
* Else cancel the timer if that CPU goes idle. We don't
* need to re-evaluate speed until the next idle exit.
*/
if (pcpu->target_freq == pcpu->policy->min) {
smp_rmb();
cpu_load = 100 * (delta_time - idle_time) / delta_time;
return policy->cur * cpu_load / 100;
}
/* We use the same work function to sale up and down */
static void cpufreq_interactive_freq_change_time_work(struct work_struct *work)
{
unsigned int cpu;
cpumask_t *tmp_mask = &work_cpumask;
for_each_cpu(cpu, tmp_mask) {
if (target_freq == policy->max) {
if (nr_running() == 1) {
cpumask_clear_cpu(cpu, &work_cpumask);
return;
if (pcpu->idling) {
dbgpr("timer %d: cpu idle, don't re-arm\n", (int) data);
goto exit;
}
__cpufreq_driver_target(policy, target_freq,
CPUFREQ_RELATION_H);
} else {
target_freq = cpufreq_interactive_calc_freq(cpu);
__cpufreq_driver_target(policy, target_freq,
CPUFREQ_RELATION_L);
pcpu->timer_idlecancel = 1;
}
freq_change_time_in_idle = get_cpu_idle_time_us(cpu,
&freq_change_time);
cpumask_clear_cpu(cpu, &work_cpumask);
pcpu->time_in_idle = get_cpu_idle_time_us(
data, &pcpu->idle_exit_time);
mod_timer(&pcpu->cpu_timer, jiffies + 2);
dbgpr("timer %d: set timer for %lu exit=%llu\n", (int) data, pcpu->cpu_timer.expires, pcpu->idle_exit_time);
}
exit:
return;
}
static void cpufreq_interactive_idle(void)
{
struct cpufreq_interactive_cpuinfo *pcpu =
&per_cpu(cpuinfo, smp_processor_id());
int pending;
if (!pcpu->governor_enabled) {
pm_idle_old();
return;
}
pcpu->idling = 1;
smp_wmb();
pending = timer_pending(&pcpu->cpu_timer);
if (pcpu->target_freq != pcpu->policy->min) {
#ifdef CONFIG_SMP
/*
* Entering idle while not at lowest speed. On some
* platforms this can hold the other CPU(s) at that speed
* even though the CPU is idle. Set a timer to re-evaluate
* speed so this idle CPU doesn't hold the other CPUs above
* min indefinitely. This should probably be a quirk of
* the CPUFreq driver.
*/
if (!pending) {
pcpu->time_in_idle = get_cpu_idle_time_us(
smp_processor_id(), &pcpu->idle_exit_time);
pcpu->timer_idlecancel = 0;
mod_timer(&pcpu->cpu_timer, jiffies + 2);
dbgpr("idle: enter at %d, set timer for %lu exit=%llu\n",
pcpu->target_freq, pcpu->cpu_timer.expires,
pcpu->idle_exit_time);
}
#endif
} else {
/*
* If at min speed and entering idle after load has
* already been evaluated, and a timer has been set just in
* case the CPU suddenly goes busy, cancel that timer. The
* CPU didn't go busy; we'll recheck things upon idle exit.
*/
if (pending && pcpu->timer_idlecancel) {
dbgpr("idle: cancel timer for %lu\n", pcpu->cpu_timer.expires);
del_timer(&pcpu->cpu_timer);
/*
* Ensure last timer run time is after current idle
* sample start time, so next idle exit will always
* start a new idle sampling period.
*/
pcpu->idle_exit_time = 0;
pcpu->timer_idlecancel = 0;
}
}
pm_idle_old();
pcpu->idling = 0;
smp_wmb();
/*
* Arm the timer for 1-2 ticks later if not already, and if the timer
* function has already processed the previous load sampling
* interval. (If the timer is not pending but has not processed
* the previous interval, it is probably racing with us on another
* CPU. Let it compute load based on the previous sample and then
* re-arm the timer for another interval when it's done, rather
* than updating the interval start time to be "now", which doesn't
* give the timer function enough time to make a decision on this
* run.)
*/
if (timer_pending(&pcpu->cpu_timer) == 0 &&
pcpu->timer_run_time >= pcpu->idle_exit_time) {
pcpu->time_in_idle =
get_cpu_idle_time_us(smp_processor_id(),
&pcpu->idle_exit_time);
pcpu->timer_idlecancel = 0;
mod_timer(&pcpu->cpu_timer, jiffies + 2);
dbgpr("idle: exit, set timer for %lu exit=%llu\n", pcpu->cpu_timer.expires, pcpu->idle_exit_time);
#if DEBUG
} else if (timer_pending(&pcpu->cpu_timer) == 0 &&
pcpu->timer_run_time < pcpu->idle_exit_time) {
dbgpr("idle: timer not run yet: exit=%llu tmrrun=%llu\n",
pcpu->idle_exit_time, pcpu->timer_run_time);
#endif
}
}
static int cpufreq_interactive_up_task(void *data)
{
unsigned int cpu;
cpumask_t tmp_mask;
struct cpufreq_interactive_cpuinfo *pcpu;
#if DEBUG
u64 now;
u64 then;
unsigned int lat;
#endif
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
if (cpumask_empty(&up_cpumask))
schedule();
set_current_state(TASK_RUNNING);
if (kthread_should_stop())
break;
#if DEBUG
then = up_request_time;
now = ktime_to_us(ktime_get());
if (now > then) {
lat = ktime_to_us(ktime_get()) - then;
if (lat > up_max_latency)
up_max_latency = lat;
}
#endif
tmp_mask = up_cpumask;
for_each_cpu(cpu, &tmp_mask) {
cpumask_clear_cpu(cpu, &up_cpumask);
pcpu = &per_cpu(cpuinfo, cpu);
if (nr_running() == 1) {
dbgpr("up %d: tgt=%d nothing else running\n", cpu,
pcpu->target_freq);
}
__cpufreq_driver_target(pcpu->policy,
pcpu->target_freq,
CPUFREQ_RELATION_H);
pcpu->freq_change_time_in_idle =
get_cpu_idle_time_us(cpu,
&pcpu->freq_change_time);
dbgpr("up %d: set tgt=%d (actual=%d)\n", cpu, pcpu->target_freq, pcpu->policy->cur);
}
}
return 0;
}
static void cpufreq_interactive_freq_down(struct work_struct *work)
{
unsigned int cpu;
cpumask_t tmp_mask = down_cpumask;
struct cpufreq_interactive_cpuinfo *pcpu;
for_each_cpu(cpu, &tmp_mask) {
cpumask_clear_cpu(cpu, &down_cpumask);
pcpu = &per_cpu(cpuinfo, cpu);
__cpufreq_driver_target(pcpu->policy,
pcpu->target_freq,
CPUFREQ_RELATION_H);
pcpu->freq_change_time_in_idle =
get_cpu_idle_time_us(cpu,
&pcpu->freq_change_time);
dbgpr("down %d: set tgt=%d (actual=%d)\n", cpu, pcpu->target_freq, pcpu->policy->cur);
}
}
static ssize_t show_min_sample_time(struct kobject *kobj,
@ -237,11 +519,21 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *new_policy,
unsigned int event)
{
int rc;
struct cpufreq_interactive_cpuinfo *pcpu =
&per_cpu(cpuinfo, new_policy->cpu);
switch (event) {
case CPUFREQ_GOV_START:
if (!cpu_online(new_policy->cpu))
return -EINVAL;
pcpu->policy = new_policy;
pcpu->freq_table = cpufreq_frequency_get_table(new_policy->cpu);
pcpu->target_freq = new_policy->cur;
pcpu->freq_change_time_in_idle =
get_cpu_idle_time_us(new_policy->cpu,
&pcpu->freq_change_time);
pcpu->governor_enabled = 1;
/*
* Do not register the idle hook and create sysfs
* entries if we have already done so.
@ -255,20 +547,21 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *new_policy,
return rc;
pm_idle_old = pm_idle;
pm_idle = cpufreq_idle;
policy = new_policy;
pm_idle = cpufreq_interactive_idle;
break;
case CPUFREQ_GOV_STOP:
if (atomic_dec_return(&active_count) > 1)
pcpu->governor_enabled = 0;
if (atomic_dec_return(&active_count) > 0)
return 0;
sysfs_remove_group(cpufreq_global_kobject,
&interactive_attr_group);
pm_idle = pm_idle_old;
del_timer(&per_cpu(cpu_timer, new_policy->cpu));
break;
del_timer(&pcpu->cpu_timer);
break;
case CPUFREQ_GOV_LIMITS:
if (new_policy->max < new_policy->cur)
@ -285,28 +578,52 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *new_policy,
static int __init cpufreq_interactive_init(void)
{
unsigned int i;
struct timer_list *t;
struct cpufreq_interactive_cpuinfo *pcpu;
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
/* Initalize per-cpu timers */
for_each_possible_cpu(i) {
t = &per_cpu(cpu_timer, i);
init_timer_deferrable(t);
t->function = cpufreq_interactive_timer;
t->data = i;
pcpu = &per_cpu(cpuinfo, i);
init_timer(&pcpu->cpu_timer);
pcpu->cpu_timer.function = cpufreq_interactive_timer;
pcpu->cpu_timer.data = i;
}
/* Scale up is high priority */
up_wq = create_rt_workqueue("kinteractive_up");
up_task = kthread_create(cpufreq_interactive_up_task, NULL,
"kinteractiveup");
if (IS_ERR(up_task))
return PTR_ERR(up_task);
sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
get_task_struct(up_task);
/* No rescuer thread, bind to CPU queuing the work for possibly
warm cache (probably doesn't matter much). */
down_wq = create_workqueue("knteractive_down");
INIT_WORK(&freq_scale_work, cpufreq_interactive_freq_change_time_work);
if (! down_wq)
goto err_freeuptask;
INIT_WORK(&freq_scale_down_work,
cpufreq_interactive_freq_down);
#if DEBUG
spin_lock_init(&dbgpr_lock);
dbg_proc = create_proc_entry("igov", S_IWUSR | S_IRUGO, NULL);
dbg_proc->read_proc = dbg_proc_read;
#endif
return cpufreq_register_governor(&cpufreq_gov_interactive);
err_freeuptask:
put_task_struct(up_task);
return -ENOMEM;
}
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
pure_initcall(cpufreq_interactive_init);
fs_initcall(cpufreq_interactive_init);
#else
module_init(cpufreq_interactive_init);
#endif
@ -314,7 +631,8 @@ module_init(cpufreq_interactive_init);
static void __exit cpufreq_interactive_exit(void)
{
cpufreq_unregister_governor(&cpufreq_gov_interactive);
destroy_workqueue(up_wq);
kthread_stop(up_task);
put_task_struct(up_task);
destroy_workqueue(down_wq);
}