cpufreq: interactive: Changes to interactive governor
Changes include: * May scale up to intermediate speeds after scaling down, rather than scale to max speed and then only scale down until max speed needed. * Tweaked thresholds at which max speed requested (previously CPU must have been 100% busy since idle exit timer started, now will go max if at least 85% busy) and default minimum sample time raised to 80ms. Tweaking based on UI tests, still in progress. * SMP fixes. * Fixed attempted multiple delete of sysfs group on governor stop. Set a just-in-case-CPU-goes-busy-again timer even if nr_running == 0 at timer function run time, but cancel if that CPU goes idle (and don't re-arm timer if that CPU is currently idle). * Re-evaluate speed if a CPU goes idle while above min speed (and no timer currently set) in case the platform requires all CPUs to be at the same speed. * Realtime workqueues disappeared upstream, convert speed up workqueue to a realtime task. Average scheduling latency measured significantly less than WQ_HIGHPRI. * Timers are not deferrable, must wake CPU from idle, since we now re-evaluate speed for idle CPUs. * CPU load is computed from higher of short-term load since idle exit vs. long-term load since last frequency change, to avoid dropping speed during temporary dips in load on long-term-busy CPU. * Avoid 1 CPU starting new idle exit load eval interval in a race with timer running on another CPU. * New fugly debugging printfs should be reworked or go away eventually. Change-Id: I606b5c1850637c35a7814309df12362d5c044825 via: https://review.source.android.com//#change,15809
This commit is contained in:
parent
c8cec4c0c1
commit
7304611fc3
@ -187,23 +187,32 @@ default value of '20' it means that if the CPU usage needs to be below
|
||||
2.6 Interactive
|
||||
---------------
|
||||
|
||||
The CPUfreq governor "interactive" is designed for low latency,
|
||||
The CPUfreq governor "interactive" is designed for latency-sensitive,
|
||||
interactive workloads. This governor sets the CPU speed depending on
|
||||
usage, similar to "ondemand" and "conservative" governors. However
|
||||
there is no polling, or 'sample_rate' required to scale the CPU up.
|
||||
usage, similar to "ondemand" and "conservative" governors. However,
|
||||
the governor is more aggressive about scaling the CPU speed up in
|
||||
response to CPU-intensive activity.
|
||||
|
||||
Sampling CPU load every X ms can lead to under powering the CPU
|
||||
for X ms, leading to dropped framerate, stuttering UI etc..
|
||||
|
||||
Scaling the CPU up is done when coming out of idle, and like "ondemand"
|
||||
scaling up will always go to MAX, then step down based off of cpu load.
|
||||
Sampling the CPU load every X ms can lead to under-powering the CPU
|
||||
for X ms, leading to dropped frames, stuttering UI, etc. Instead of
|
||||
sampling the cpu at a specified rate, the interactive governor will
|
||||
check whether to scale the cpu frequency up soon after coming out of
|
||||
idle. When the cpu comes out of idle, a timer is configured to fire
|
||||
within 1-2 ticks. If the cpu is very busy between exiting idle and
|
||||
when the timer fires then we assume the cpu is underpowered and ramp
|
||||
to MAX speed.
|
||||
|
||||
If the cpu was not sufficiently busy to immediately ramp to MAX speed,
|
||||
then governor evaluates the cpu load since the last speed adjustment,
|
||||
choosing th highest value between that longer-term load or the
|
||||
short-term load since idle exit to determine the cpu speed to ramp to.
|
||||
|
||||
There is only one tuneable value for this governor:
|
||||
|
||||
min_sample_time: The ammount of time the CPU must spend (in uS)
|
||||
at the current frequency before scaling DOWN. This is done to
|
||||
more accurately determine the cpu workload and the best speed for that
|
||||
workload. The default is 50ms.
|
||||
min_sample_time: The minimum amount of time to spend at the current
|
||||
frequency before ramping down. This is to ensure that the governor has
|
||||
seen enough historic cpu load data to determine the appropriate
|
||||
workload. Default is 80000 uS.
|
||||
|
||||
|
||||
3. The Governor Interface in the CPUfreq Core
|
||||
|
@ -122,9 +122,10 @@ config CPU_FREQ_DEFAULT_GOV_INTERACTIVE
|
||||
bool "interactive"
|
||||
select CPU_FREQ_GOV_INTERACTIVE
|
||||
help
|
||||
Use the 'interactive' governor as default. This gets full cpu frequency
|
||||
scaling for workloads that are latency sensitive, typically interactive
|
||||
workloads.
|
||||
Use the CPUFreq governor 'interactive' as default. This allows
|
||||
you to get a full dynamic cpu frequency capable system by simply
|
||||
loading your cpufreq low-level hardware driver, using the
|
||||
'interactive' governor for latency-sensitive workloads.
|
||||
endchoice
|
||||
|
||||
config CPU_FREQ_GOV_PERFORMANCE
|
||||
@ -185,9 +186,8 @@ config CPU_FREQ_GOV_ONDEMAND
|
||||
config CPU_FREQ_GOV_INTERACTIVE
|
||||
tristate "'interactive' cpufreq governor"
|
||||
help
|
||||
'interactive' - This driver adds a dynamic cpufreq policy governor.
|
||||
Designed for low latency burst workloads. Sclaing is done when
|
||||
coming out idle instead of polling.
|
||||
'interactive' - This driver adds a dynamic cpufreq policy governor
|
||||
designed for latency-sensitive workloads
|
||||
|
||||
config CPU_FREQ_GOV_CONSERVATIVE
|
||||
tristate "'conservative' cpufreq governor"
|
||||
|
@ -24,37 +24,132 @@
|
||||
#include <linux/tick.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
#include <asm/cputime.h>
|
||||
|
||||
static void (*pm_idle_old)(void);
|
||||
static atomic_t active_count = ATOMIC_INIT(0);
|
||||
|
||||
static DEFINE_PER_CPU(struct timer_list, cpu_timer);
|
||||
struct cpufreq_interactive_cpuinfo {
|
||||
struct timer_list cpu_timer;
|
||||
int timer_idlecancel;
|
||||
u64 time_in_idle;
|
||||
u64 idle_exit_time;
|
||||
u64 timer_run_time;
|
||||
int idling;
|
||||
u64 freq_change_time;
|
||||
u64 freq_change_time_in_idle;
|
||||
struct cpufreq_policy *policy;
|
||||
struct cpufreq_frequency_table *freq_table;
|
||||
unsigned int target_freq;
|
||||
int governor_enabled;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(u64, time_in_idle);
|
||||
static DEFINE_PER_CPU(u64, idle_exit_time);
|
||||
|
||||
static struct cpufreq_policy *policy;
|
||||
static unsigned int target_freq;
|
||||
static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
|
||||
|
||||
/* Workqueues handle frequency scaling */
|
||||
static struct workqueue_struct *up_wq;
|
||||
static struct task_struct *up_task;
|
||||
static struct workqueue_struct *down_wq;
|
||||
static struct work_struct freq_scale_work;
|
||||
|
||||
static u64 freq_change_time;
|
||||
static u64 freq_change_time_in_idle;
|
||||
|
||||
static cpumask_t work_cpumask;
|
||||
static struct work_struct freq_scale_down_work;
|
||||
static cpumask_t up_cpumask;
|
||||
static cpumask_t down_cpumask;
|
||||
|
||||
/*
|
||||
* The minimum ammount of time to spend at a frequency before we can ramp down,
|
||||
* default is 50ms.
|
||||
* The minimum amount of time to spend at a frequency before we can ramp down.
|
||||
*/
|
||||
#define DEFAULT_MIN_SAMPLE_TIME 50000;
|
||||
#define DEFAULT_MIN_SAMPLE_TIME 80000;
|
||||
static unsigned long min_sample_time;
|
||||
|
||||
#define LOAD_SCALE_MAX 85
|
||||
|
||||
#define DEBUG 0
|
||||
#define BUFSZ 128
|
||||
|
||||
#if DEBUG
|
||||
#include <linux/proc_fs.h>
|
||||
|
||||
struct dbgln {
|
||||
int cpu;
|
||||
unsigned long jiffy;
|
||||
unsigned long run;
|
||||
char buf[BUFSZ];
|
||||
};
|
||||
|
||||
#define NDBGLNS 256
|
||||
|
||||
static struct dbgln dbgbuf[NDBGLNS];
|
||||
static int dbgbufs;
|
||||
static int dbgbufe;
|
||||
static struct proc_dir_entry *dbg_proc;
|
||||
static spinlock_t dbgpr_lock;
|
||||
|
||||
static u64 up_request_time;
|
||||
static unsigned int up_max_latency;
|
||||
|
||||
static void dbgpr(char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
int n;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dbgpr_lock, flags);
|
||||
n = dbgbufe;
|
||||
va_start(args, fmt);
|
||||
vsnprintf(dbgbuf[n].buf, BUFSZ, fmt, args);
|
||||
va_end(args);
|
||||
dbgbuf[n].cpu = smp_processor_id();
|
||||
dbgbuf[n].run = nr_running();
|
||||
dbgbuf[n].jiffy = jiffies;
|
||||
|
||||
if (++dbgbufe >= NDBGLNS)
|
||||
dbgbufe = 0;
|
||||
|
||||
if (dbgbufe == dbgbufs)
|
||||
if (++dbgbufs >= NDBGLNS)
|
||||
dbgbufs = 0;
|
||||
|
||||
spin_unlock_irqrestore(&dbgpr_lock, flags);
|
||||
}
|
||||
|
||||
static void dbgdump(void)
|
||||
{
|
||||
int i, j;
|
||||
unsigned long flags;
|
||||
static struct dbgln prbuf[NDBGLNS];
|
||||
|
||||
spin_lock_irqsave(&dbgpr_lock, flags);
|
||||
i = dbgbufs;
|
||||
j = dbgbufe;
|
||||
memcpy(prbuf, dbgbuf, sizeof(dbgbuf));
|
||||
dbgbufs = 0;
|
||||
dbgbufe = 0;
|
||||
spin_unlock_irqrestore(&dbgpr_lock, flags);
|
||||
|
||||
while (i != j)
|
||||
{
|
||||
printk("%lu %d %lu %s",
|
||||
prbuf[i].jiffy, prbuf[i].cpu, prbuf[i].run,
|
||||
prbuf[i].buf);
|
||||
if (++i == NDBGLNS)
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int dbg_proc_read(char *buffer, char **start, off_t offset,
|
||||
int count, int *peof, void *dat)
|
||||
{
|
||||
printk("max up_task latency=%uus\n", up_max_latency);
|
||||
dbgdump();
|
||||
*peof = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
#define dbgpr(...) do {} while (0)
|
||||
#endif
|
||||
|
||||
static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
|
||||
unsigned int event);
|
||||
|
||||
@ -70,142 +165,329 @@ struct cpufreq_governor cpufreq_gov_interactive = {
|
||||
|
||||
static void cpufreq_interactive_timer(unsigned long data)
|
||||
{
|
||||
u64 delta_idle;
|
||||
u64 update_time;
|
||||
u64 *cpu_time_in_idle;
|
||||
u64 *cpu_idle_exit_time;
|
||||
struct timer_list *t;
|
||||
|
||||
u64 now_idle = get_cpu_idle_time_us(data,
|
||||
&update_time);
|
||||
|
||||
|
||||
cpu_time_in_idle = &per_cpu(time_in_idle, data);
|
||||
cpu_idle_exit_time = &per_cpu(idle_exit_time, data);
|
||||
|
||||
if (update_time == *cpu_idle_exit_time)
|
||||
return;
|
||||
|
||||
delta_idle = cputime64_sub(now_idle, *cpu_time_in_idle);
|
||||
|
||||
/* Scale up if there were no idle cycles since coming out of idle */
|
||||
if (delta_idle == 0) {
|
||||
if (policy->cur == policy->max)
|
||||
return;
|
||||
|
||||
if (nr_running() < 1)
|
||||
return;
|
||||
|
||||
target_freq = policy->max;
|
||||
cpumask_set_cpu(data, &work_cpumask);
|
||||
queue_work(up_wq, &freq_scale_work);
|
||||
return;
|
||||
}
|
||||
unsigned int delta_idle;
|
||||
unsigned int delta_time;
|
||||
int cpu_load;
|
||||
int load_since_change;
|
||||
u64 time_in_idle;
|
||||
u64 idle_exit_time;
|
||||
struct cpufreq_interactive_cpuinfo *pcpu =
|
||||
&per_cpu(cpuinfo, data);
|
||||
u64 now_idle;
|
||||
unsigned int new_freq;
|
||||
unsigned int index;
|
||||
|
||||
/*
|
||||
* There is a window where if the cpu utlization can go from low to high
|
||||
* between the timer expiring, delta_idle will be > 0 and the cpu will
|
||||
* be 100% busy, preventing idle from running, and this timer from
|
||||
* firing. So setup another timer to fire to check cpu utlization.
|
||||
* Do not setup the timer if there is no scheduled work.
|
||||
* Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
|
||||
* this lets idle exit know the current idle time sample has
|
||||
* been processed, and idle exit can generate a new sample and
|
||||
* re-arm the timer. This prevents a concurrent idle
|
||||
* exit on that CPU from writing a new set of info at the same time
|
||||
* the timer function runs (the timer function can't use that info
|
||||
* until more time passes).
|
||||
*/
|
||||
t = &per_cpu(cpu_timer, data);
|
||||
if (!timer_pending(t) && nr_running() > 0) {
|
||||
*cpu_time_in_idle = get_cpu_idle_time_us(
|
||||
data, cpu_idle_exit_time);
|
||||
mod_timer(t, jiffies + 2);
|
||||
time_in_idle = pcpu->time_in_idle;
|
||||
idle_exit_time = pcpu->idle_exit_time;
|
||||
now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
|
||||
smp_wmb();
|
||||
|
||||
/* If we raced with cancelling a timer, skip. */
|
||||
if (!idle_exit_time) {
|
||||
dbgpr("timer %d: no valid idle exit sample\n", (int) data);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (policy->cur == policy->min)
|
||||
return;
|
||||
#if DEBUG
|
||||
if ((int) jiffies - (int) pcpu->cpu_timer.expires >= 10)
|
||||
dbgpr("timer %d: late by %d ticks\n",
|
||||
(int) data, jiffies - pcpu->cpu_timer.expires);
|
||||
#endif
|
||||
|
||||
delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle);
|
||||
delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
|
||||
idle_exit_time);
|
||||
|
||||
/*
|
||||
* If timer ran less than 1ms after short-term sample started, retry.
|
||||
*/
|
||||
if (delta_time < 1000) {
|
||||
dbgpr("timer %d: time delta %u too short exit=%llu now=%llu\n", (int) data,
|
||||
delta_time, idle_exit_time, pcpu->timer_run_time);
|
||||
goto rearm;
|
||||
}
|
||||
|
||||
if (delta_idle > delta_time)
|
||||
cpu_load = 0;
|
||||
else
|
||||
cpu_load = 100 * (delta_time - delta_idle) / delta_time;
|
||||
|
||||
delta_idle = (unsigned int) cputime64_sub(now_idle,
|
||||
pcpu->freq_change_time_in_idle);
|
||||
delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
|
||||
pcpu->freq_change_time);
|
||||
|
||||
if (delta_idle > delta_time)
|
||||
load_since_change = 0;
|
||||
else
|
||||
load_since_change =
|
||||
100 * (delta_time - delta_idle) / delta_time;
|
||||
|
||||
/*
|
||||
* Choose greater of short-term load (since last idle timer
|
||||
* started or timer function re-armed itself) or long-term load
|
||||
* (since last frequency change).
|
||||
*/
|
||||
if (load_since_change > cpu_load)
|
||||
cpu_load = load_since_change;
|
||||
|
||||
if (cpu_load >= LOAD_SCALE_MAX)
|
||||
new_freq = pcpu->policy->max;
|
||||
else
|
||||
new_freq = pcpu->policy->max * cpu_load / 100;
|
||||
|
||||
if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
|
||||
new_freq, CPUFREQ_RELATION_H,
|
||||
&index)) {
|
||||
dbgpr("timer %d: cpufreq_frequency_table_target error\n", (int) data);
|
||||
goto rearm;
|
||||
}
|
||||
|
||||
new_freq = pcpu->freq_table[index].frequency;
|
||||
|
||||
if (pcpu->target_freq == new_freq)
|
||||
{
|
||||
dbgpr("timer %d: load=%d, already at %d\n", (int) data, cpu_load, new_freq);
|
||||
goto rearm_if_notmax;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do not scale down unless we have been at this frequency for the
|
||||
* minimum sample time.
|
||||
*/
|
||||
if (cputime64_sub(update_time, freq_change_time) < min_sample_time)
|
||||
return;
|
||||
|
||||
target_freq = policy->min;
|
||||
cpumask_set_cpu(data, &work_cpumask);
|
||||
queue_work(down_wq, &freq_scale_work);
|
||||
}
|
||||
|
||||
static void cpufreq_idle(void)
|
||||
{
|
||||
struct timer_list *t;
|
||||
u64 *cpu_time_in_idle;
|
||||
u64 *cpu_idle_exit_time;
|
||||
|
||||
pm_idle_old();
|
||||
|
||||
if (!cpumask_test_cpu(smp_processor_id(), policy->cpus))
|
||||
return;
|
||||
|
||||
/* Timer to fire in 1-2 ticks, jiffie aligned. */
|
||||
t = &per_cpu(cpu_timer, smp_processor_id());
|
||||
cpu_idle_exit_time = &per_cpu(idle_exit_time, smp_processor_id());
|
||||
cpu_time_in_idle = &per_cpu(time_in_idle, smp_processor_id());
|
||||
|
||||
if (timer_pending(t) == 0) {
|
||||
*cpu_time_in_idle = get_cpu_idle_time_us(
|
||||
smp_processor_id(), cpu_idle_exit_time);
|
||||
mod_timer(t, jiffies + 2);
|
||||
if (new_freq < pcpu->target_freq) {
|
||||
if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time) <
|
||||
min_sample_time) {
|
||||
dbgpr("timer %d: load=%d cur=%d tgt=%d not yet\n", (int) data, cpu_load, pcpu->target_freq, new_freq);
|
||||
goto rearm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Choose the cpu frequency based off the load. For now choose the minimum
|
||||
* frequency that will satisfy the load, which is not always the lower power.
|
||||
*/
|
||||
static unsigned int cpufreq_interactive_calc_freq(unsigned int cpu)
|
||||
{
|
||||
unsigned int delta_time;
|
||||
unsigned int idle_time;
|
||||
unsigned int cpu_load;
|
||||
u64 current_wall_time;
|
||||
u64 current_idle_time;;
|
||||
dbgpr("timer %d: load=%d cur=%d tgt=%d queue\n", (int) data, cpu_load, pcpu->target_freq, new_freq);
|
||||
|
||||
current_idle_time = get_cpu_idle_time_us(cpu, ¤t_wall_time);
|
||||
if (new_freq < pcpu->target_freq) {
|
||||
pcpu->target_freq = new_freq;
|
||||
cpumask_set_cpu(data, &down_cpumask);
|
||||
queue_work(down_wq, &freq_scale_down_work);
|
||||
} else {
|
||||
pcpu->target_freq = new_freq;
|
||||
#if DEBUG
|
||||
up_request_time = ktime_to_us(ktime_get());
|
||||
#endif
|
||||
cpumask_set_cpu(data, &up_cpumask);
|
||||
wake_up_process(up_task);
|
||||
}
|
||||
|
||||
idle_time = (unsigned int) current_idle_time - freq_change_time_in_idle;
|
||||
delta_time = (unsigned int) current_wall_time - freq_change_time;
|
||||
rearm_if_notmax:
|
||||
/*
|
||||
* Already set max speed and don't see a need to change that,
|
||||
* wait until next idle to re-evaluate, don't need timer.
|
||||
*/
|
||||
if (pcpu->target_freq == pcpu->policy->max)
|
||||
goto exit;
|
||||
|
||||
if (delta_time == 0)
|
||||
return policy->cur;
|
||||
rearm:
|
||||
if (!timer_pending(&pcpu->cpu_timer)) {
|
||||
/*
|
||||
* If already at min: if that CPU is idle, don't set timer.
|
||||
* Else cancel the timer if that CPU goes idle. We don't
|
||||
* need to re-evaluate speed until the next idle exit.
|
||||
*/
|
||||
if (pcpu->target_freq == pcpu->policy->min) {
|
||||
smp_rmb();
|
||||
|
||||
cpu_load = 100 * (delta_time - idle_time) / delta_time;
|
||||
|
||||
return policy->cur * cpu_load / 100;
|
||||
}
|
||||
|
||||
|
||||
/* We use the same work function to sale up and down */
|
||||
static void cpufreq_interactive_freq_change_time_work(struct work_struct *work)
|
||||
{
|
||||
unsigned int cpu;
|
||||
cpumask_t *tmp_mask = &work_cpumask;
|
||||
for_each_cpu(cpu, tmp_mask) {
|
||||
if (target_freq == policy->max) {
|
||||
if (nr_running() == 1) {
|
||||
cpumask_clear_cpu(cpu, &work_cpumask);
|
||||
return;
|
||||
if (pcpu->idling) {
|
||||
dbgpr("timer %d: cpu idle, don't re-arm\n", (int) data);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
__cpufreq_driver_target(policy, target_freq,
|
||||
CPUFREQ_RELATION_H);
|
||||
} else {
|
||||
target_freq = cpufreq_interactive_calc_freq(cpu);
|
||||
__cpufreq_driver_target(policy, target_freq,
|
||||
CPUFREQ_RELATION_L);
|
||||
pcpu->timer_idlecancel = 1;
|
||||
}
|
||||
freq_change_time_in_idle = get_cpu_idle_time_us(cpu,
|
||||
&freq_change_time);
|
||||
|
||||
cpumask_clear_cpu(cpu, &work_cpumask);
|
||||
pcpu->time_in_idle = get_cpu_idle_time_us(
|
||||
data, &pcpu->idle_exit_time);
|
||||
mod_timer(&pcpu->cpu_timer, jiffies + 2);
|
||||
dbgpr("timer %d: set timer for %lu exit=%llu\n", (int) data, pcpu->cpu_timer.expires, pcpu->idle_exit_time);
|
||||
}
|
||||
|
||||
exit:
|
||||
return;
|
||||
}
|
||||
|
||||
static void cpufreq_interactive_idle(void)
|
||||
{
|
||||
struct cpufreq_interactive_cpuinfo *pcpu =
|
||||
&per_cpu(cpuinfo, smp_processor_id());
|
||||
int pending;
|
||||
|
||||
if (!pcpu->governor_enabled) {
|
||||
pm_idle_old();
|
||||
return;
|
||||
}
|
||||
|
||||
pcpu->idling = 1;
|
||||
smp_wmb();
|
||||
pending = timer_pending(&pcpu->cpu_timer);
|
||||
|
||||
if (pcpu->target_freq != pcpu->policy->min) {
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Entering idle while not at lowest speed. On some
|
||||
* platforms this can hold the other CPU(s) at that speed
|
||||
* even though the CPU is idle. Set a timer to re-evaluate
|
||||
* speed so this idle CPU doesn't hold the other CPUs above
|
||||
* min indefinitely. This should probably be a quirk of
|
||||
* the CPUFreq driver.
|
||||
*/
|
||||
if (!pending) {
|
||||
pcpu->time_in_idle = get_cpu_idle_time_us(
|
||||
smp_processor_id(), &pcpu->idle_exit_time);
|
||||
pcpu->timer_idlecancel = 0;
|
||||
mod_timer(&pcpu->cpu_timer, jiffies + 2);
|
||||
dbgpr("idle: enter at %d, set timer for %lu exit=%llu\n",
|
||||
pcpu->target_freq, pcpu->cpu_timer.expires,
|
||||
pcpu->idle_exit_time);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
/*
|
||||
* If at min speed and entering idle after load has
|
||||
* already been evaluated, and a timer has been set just in
|
||||
* case the CPU suddenly goes busy, cancel that timer. The
|
||||
* CPU didn't go busy; we'll recheck things upon idle exit.
|
||||
*/
|
||||
if (pending && pcpu->timer_idlecancel) {
|
||||
dbgpr("idle: cancel timer for %lu\n", pcpu->cpu_timer.expires);
|
||||
del_timer(&pcpu->cpu_timer);
|
||||
/*
|
||||
* Ensure last timer run time is after current idle
|
||||
* sample start time, so next idle exit will always
|
||||
* start a new idle sampling period.
|
||||
*/
|
||||
pcpu->idle_exit_time = 0;
|
||||
pcpu->timer_idlecancel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pm_idle_old();
|
||||
pcpu->idling = 0;
|
||||
smp_wmb();
|
||||
|
||||
/*
|
||||
* Arm the timer for 1-2 ticks later if not already, and if the timer
|
||||
* function has already processed the previous load sampling
|
||||
* interval. (If the timer is not pending but has not processed
|
||||
* the previous interval, it is probably racing with us on another
|
||||
* CPU. Let it compute load based on the previous sample and then
|
||||
* re-arm the timer for another interval when it's done, rather
|
||||
* than updating the interval start time to be "now", which doesn't
|
||||
* give the timer function enough time to make a decision on this
|
||||
* run.)
|
||||
*/
|
||||
if (timer_pending(&pcpu->cpu_timer) == 0 &&
|
||||
pcpu->timer_run_time >= pcpu->idle_exit_time) {
|
||||
pcpu->time_in_idle =
|
||||
get_cpu_idle_time_us(smp_processor_id(),
|
||||
&pcpu->idle_exit_time);
|
||||
pcpu->timer_idlecancel = 0;
|
||||
mod_timer(&pcpu->cpu_timer, jiffies + 2);
|
||||
dbgpr("idle: exit, set timer for %lu exit=%llu\n", pcpu->cpu_timer.expires, pcpu->idle_exit_time);
|
||||
#if DEBUG
|
||||
} else if (timer_pending(&pcpu->cpu_timer) == 0 &&
|
||||
pcpu->timer_run_time < pcpu->idle_exit_time) {
|
||||
dbgpr("idle: timer not run yet: exit=%llu tmrrun=%llu\n",
|
||||
pcpu->idle_exit_time, pcpu->timer_run_time);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int cpufreq_interactive_up_task(void *data)
|
||||
{
|
||||
unsigned int cpu;
|
||||
cpumask_t tmp_mask;
|
||||
struct cpufreq_interactive_cpuinfo *pcpu;
|
||||
|
||||
#if DEBUG
|
||||
u64 now;
|
||||
u64 then;
|
||||
unsigned int lat;
|
||||
#endif
|
||||
|
||||
while (1) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
if (cpumask_empty(&up_cpumask))
|
||||
schedule();
|
||||
|
||||
set_current_state(TASK_RUNNING);
|
||||
|
||||
if (kthread_should_stop())
|
||||
break;
|
||||
#if DEBUG
|
||||
then = up_request_time;
|
||||
now = ktime_to_us(ktime_get());
|
||||
|
||||
if (now > then) {
|
||||
lat = ktime_to_us(ktime_get()) - then;
|
||||
|
||||
if (lat > up_max_latency)
|
||||
up_max_latency = lat;
|
||||
}
|
||||
#endif
|
||||
|
||||
tmp_mask = up_cpumask;
|
||||
|
||||
for_each_cpu(cpu, &tmp_mask) {
|
||||
cpumask_clear_cpu(cpu, &up_cpumask);
|
||||
pcpu = &per_cpu(cpuinfo, cpu);
|
||||
|
||||
if (nr_running() == 1) {
|
||||
dbgpr("up %d: tgt=%d nothing else running\n", cpu,
|
||||
pcpu->target_freq);
|
||||
}
|
||||
|
||||
__cpufreq_driver_target(pcpu->policy,
|
||||
pcpu->target_freq,
|
||||
CPUFREQ_RELATION_H);
|
||||
pcpu->freq_change_time_in_idle =
|
||||
get_cpu_idle_time_us(cpu,
|
||||
&pcpu->freq_change_time);
|
||||
dbgpr("up %d: set tgt=%d (actual=%d)\n", cpu, pcpu->target_freq, pcpu->policy->cur);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cpufreq_interactive_freq_down(struct work_struct *work)
|
||||
{
|
||||
unsigned int cpu;
|
||||
cpumask_t tmp_mask = down_cpumask;
|
||||
struct cpufreq_interactive_cpuinfo *pcpu;
|
||||
|
||||
for_each_cpu(cpu, &tmp_mask) {
|
||||
cpumask_clear_cpu(cpu, &down_cpumask);
|
||||
pcpu = &per_cpu(cpuinfo, cpu);
|
||||
|
||||
__cpufreq_driver_target(pcpu->policy,
|
||||
pcpu->target_freq,
|
||||
CPUFREQ_RELATION_H);
|
||||
|
||||
pcpu->freq_change_time_in_idle =
|
||||
get_cpu_idle_time_us(cpu,
|
||||
&pcpu->freq_change_time);
|
||||
dbgpr("down %d: set tgt=%d (actual=%d)\n", cpu, pcpu->target_freq, pcpu->policy->cur);
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t show_min_sample_time(struct kobject *kobj,
|
||||
@ -237,11 +519,21 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *new_policy,
|
||||
unsigned int event)
|
||||
{
|
||||
int rc;
|
||||
struct cpufreq_interactive_cpuinfo *pcpu =
|
||||
&per_cpu(cpuinfo, new_policy->cpu);
|
||||
|
||||
switch (event) {
|
||||
case CPUFREQ_GOV_START:
|
||||
if (!cpu_online(new_policy->cpu))
|
||||
return -EINVAL;
|
||||
|
||||
pcpu->policy = new_policy;
|
||||
pcpu->freq_table = cpufreq_frequency_get_table(new_policy->cpu);
|
||||
pcpu->target_freq = new_policy->cur;
|
||||
pcpu->freq_change_time_in_idle =
|
||||
get_cpu_idle_time_us(new_policy->cpu,
|
||||
&pcpu->freq_change_time);
|
||||
pcpu->governor_enabled = 1;
|
||||
/*
|
||||
* Do not register the idle hook and create sysfs
|
||||
* entries if we have already done so.
|
||||
@ -255,20 +547,21 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *new_policy,
|
||||
return rc;
|
||||
|
||||
pm_idle_old = pm_idle;
|
||||
pm_idle = cpufreq_idle;
|
||||
policy = new_policy;
|
||||
pm_idle = cpufreq_interactive_idle;
|
||||
break;
|
||||
|
||||
case CPUFREQ_GOV_STOP:
|
||||
if (atomic_dec_return(&active_count) > 1)
|
||||
pcpu->governor_enabled = 0;
|
||||
|
||||
if (atomic_dec_return(&active_count) > 0)
|
||||
return 0;
|
||||
|
||||
sysfs_remove_group(cpufreq_global_kobject,
|
||||
&interactive_attr_group);
|
||||
|
||||
pm_idle = pm_idle_old;
|
||||
del_timer(&per_cpu(cpu_timer, new_policy->cpu));
|
||||
break;
|
||||
del_timer(&pcpu->cpu_timer);
|
||||
break;
|
||||
|
||||
case CPUFREQ_GOV_LIMITS:
|
||||
if (new_policy->max < new_policy->cur)
|
||||
@ -285,28 +578,52 @@ static int cpufreq_governor_interactive(struct cpufreq_policy *new_policy,
|
||||
static int __init cpufreq_interactive_init(void)
|
||||
{
|
||||
unsigned int i;
|
||||
struct timer_list *t;
|
||||
struct cpufreq_interactive_cpuinfo *pcpu;
|
||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
|
||||
|
||||
min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
|
||||
|
||||
/* Initalize per-cpu timers */
|
||||
for_each_possible_cpu(i) {
|
||||
t = &per_cpu(cpu_timer, i);
|
||||
init_timer_deferrable(t);
|
||||
t->function = cpufreq_interactive_timer;
|
||||
t->data = i;
|
||||
pcpu = &per_cpu(cpuinfo, i);
|
||||
init_timer(&pcpu->cpu_timer);
|
||||
pcpu->cpu_timer.function = cpufreq_interactive_timer;
|
||||
pcpu->cpu_timer.data = i;
|
||||
}
|
||||
|
||||
/* Scale up is high priority */
|
||||
up_wq = create_rt_workqueue("kinteractive_up");
|
||||
up_task = kthread_create(cpufreq_interactive_up_task, NULL,
|
||||
"kinteractiveup");
|
||||
if (IS_ERR(up_task))
|
||||
return PTR_ERR(up_task);
|
||||
|
||||
sched_setscheduler_nocheck(up_task, SCHED_FIFO, ¶m);
|
||||
get_task_struct(up_task);
|
||||
|
||||
/* No rescuer thread, bind to CPU queuing the work for possibly
|
||||
warm cache (probably doesn't matter much). */
|
||||
down_wq = create_workqueue("knteractive_down");
|
||||
|
||||
INIT_WORK(&freq_scale_work, cpufreq_interactive_freq_change_time_work);
|
||||
if (! down_wq)
|
||||
goto err_freeuptask;
|
||||
|
||||
INIT_WORK(&freq_scale_down_work,
|
||||
cpufreq_interactive_freq_down);
|
||||
|
||||
#if DEBUG
|
||||
spin_lock_init(&dbgpr_lock);
|
||||
dbg_proc = create_proc_entry("igov", S_IWUSR | S_IRUGO, NULL);
|
||||
dbg_proc->read_proc = dbg_proc_read;
|
||||
#endif
|
||||
|
||||
return cpufreq_register_governor(&cpufreq_gov_interactive);
|
||||
|
||||
err_freeuptask:
|
||||
put_task_struct(up_task);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
|
||||
pure_initcall(cpufreq_interactive_init);
|
||||
fs_initcall(cpufreq_interactive_init);
|
||||
#else
|
||||
module_init(cpufreq_interactive_init);
|
||||
#endif
|
||||
@ -314,7 +631,8 @@ module_init(cpufreq_interactive_init);
|
||||
static void __exit cpufreq_interactive_exit(void)
|
||||
{
|
||||
cpufreq_unregister_governor(&cpufreq_gov_interactive);
|
||||
destroy_workqueue(up_wq);
|
||||
kthread_stop(up_task);
|
||||
put_task_struct(up_task);
|
||||
destroy_workqueue(down_wq);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user