diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index aed082f4..d155c054 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -28,6 +28,7 @@ Contents: 2.3 Userspace 2.4 Ondemand 2.5 Conservative +2.6 Interactive 3. The Governor Interface in the CPUfreq Core @@ -182,6 +183,29 @@ governor but for the opposite direction. For example when set to its default value of '20' it means that if the CPU usage needs to be below 20% between samples to have the frequency decreased. + +2.6 Interactive +--------------- + +The CPUfreq governor "interactive" is designed for low latency, +interactive workloads. This governor sets the CPU speed depending on +usage, similar to "ondemand" and "conservative" governors. However +there is no polling, or 'sample_rate' required to scale the CPU up. + +Sampling CPU load every X ms can lead to under powering the CPU +for X ms, leading to dropped framerate, stuttering UI etc.. + +Scaling the CPU up is done when coming out of idle, and like "ondemand" +scaling up will always go to MAX, then step down based off of cpu load. + +There is only one tuneable value for this governor: + +min_sample_time: The ammount of time the CPU must spend (in uS) +at the current frequency before scaling DOWN. This is done to +more accurately determine the cpu workload and the best speed for that +workload. The default is 50ms. + + 3. The Governor Interface in the CPUfreq Core ============================================= diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 85503ae6..7593df05 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -418,10 +418,12 @@ CONFIG_CPU_FREQ_STAT_DETAILS=y # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_INTERACTIVE=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y # CONFIG_CPU_IDLE is not set diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index a8c8d9c1..ca7b0887 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -110,6 +110,14 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE Be aware that not all cpufreq drivers support the conservative governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. + +config CPU_FREQ_DEFAULT_GOV_INTERACTIVE + bool "interactive" + select CPU_FREQ_GOV_INTERACTIVE + help + Use the 'interactive' governor as default. This gets full cpu frequency + scaling for workloads that are latency sensitive, typically interactive + workloads. endchoice config CPU_FREQ_GOV_PERFORMANCE @@ -167,6 +175,13 @@ config CPU_FREQ_GOV_ONDEMAND If in doubt, say N. +config CPU_FREQ_GOV_INTERACTIVE + tristate "'interactive' cpufreq governor" + help + 'interactive' - This driver adds a dynamic cpufreq policy governor. + Designed for low latency burst workloads. Sclaing is done when + coming out idle instead of polling. + config CPU_FREQ_GOV_CONSERVATIVE tristate "'conservative' cpufreq governor" depends on CPU_FREQ diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 71fc3b41..30629f7d 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o +obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o # CPUfreq cross-arch helpers obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c new file mode 100644 index 00000000..36859a78 --- /dev/null +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -0,0 +1,324 @@ +/* + * drivers/cpufreq/cpufreq_interactive.c + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Mike Chan (mike@android.com) + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static void (*pm_idle_old)(void); +static atomic_t active_count = ATOMIC_INIT(0); + +static DEFINE_PER_CPU(struct timer_list, cpu_timer); + +static DEFINE_PER_CPU(u64, time_in_idle); +static DEFINE_PER_CPU(u64, idle_exit_time); + +static struct cpufreq_policy *policy; +static unsigned int target_freq; + +/* Workqueues handle frequency scaling */ +static struct workqueue_struct *up_wq; +static struct workqueue_struct *down_wq; +static struct work_struct freq_scale_work; + +static u64 freq_change_time; +static u64 freq_change_time_in_idle; + +static cpumask_t work_cpumask; + +/* + * The minimum ammount of time to spend at a frequency before we can ramp down, + * default is 50ms. + */ +#define DEFAULT_MIN_SAMPLE_TIME 50000; +static unsigned long min_sample_time; + +static int cpufreq_governor_interactive(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE +static +#endif +struct cpufreq_governor cpufreq_gov_interactive = { + .name = "interactive", + .governor = cpufreq_governor_interactive, + .max_transition_latency = 10000000, + .owner = THIS_MODULE, +}; + +static void cpufreq_interactive_timer(unsigned long data) +{ + u64 delta_idle; + u64 update_time; + u64 *cpu_time_in_idle; + u64 *cpu_idle_exit_time; + struct timer_list *t; + + u64 now_idle = get_cpu_idle_time_us(data, + &update_time); + + + cpu_time_in_idle = &per_cpu(time_in_idle, data); + cpu_idle_exit_time = &per_cpu(idle_exit_time, data); + + if (update_time == *cpu_idle_exit_time) + return; + + delta_idle = cputime64_sub(now_idle, *cpu_time_in_idle); + + /* Scale up if there were no idle cycles since coming out of idle */ + if (delta_idle == 0) { + if (policy->cur == policy->max) + return; + + if (nr_running() < 1) + return; + + target_freq = policy->max; + cpumask_set_cpu(data, &work_cpumask); + queue_work(up_wq, &freq_scale_work); + return; + } + + /* + * There is a window where if the cpu utlization can go from low to high + * between the timer expiring, delta_idle will be > 0 and the cpu will + * be 100% busy, preventing idle from running, and this timer from + * firing. So setup another timer to fire to check cpu utlization. + * Do not setup the timer if there is no scheduled work. + */ + t = &per_cpu(cpu_timer, data); + if (!timer_pending(t) && nr_running() > 0) { + *cpu_time_in_idle = get_cpu_idle_time_us( + data, cpu_idle_exit_time); + mod_timer(t, jiffies + 2); + } + + if (policy->cur == policy->min) + return; + + /* + * Do not scale down unless we have been at this frequency for the + * minimum sample time. + */ + if (cputime64_sub(update_time, freq_change_time) < min_sample_time) + return; + + target_freq = policy->min; + cpumask_set_cpu(data, &work_cpumask); + queue_work(down_wq, &freq_scale_work); +} + +static void cpufreq_idle(void) +{ + struct timer_list *t; + u64 *cpu_time_in_idle; + u64 *cpu_idle_exit_time; + + pm_idle_old(); + + if (!cpumask_test_cpu(smp_processor_id(), policy->cpus)) + return; + + /* Timer to fire in 1-2 ticks, jiffie aligned. */ + t = &per_cpu(cpu_timer, smp_processor_id()); + cpu_idle_exit_time = &per_cpu(idle_exit_time, smp_processor_id()); + cpu_time_in_idle = &per_cpu(time_in_idle, smp_processor_id()); + + if (timer_pending(t) == 0) { + *cpu_time_in_idle = get_cpu_idle_time_us( + smp_processor_id(), cpu_idle_exit_time); + mod_timer(t, jiffies + 2); + } +} + +/* + * Choose the cpu frequency based off the load. For now choose the minimum + * frequency that will satisfy the load, which is not always the lower power. + */ +static unsigned int cpufreq_interactive_calc_freq(unsigned int cpu) +{ + unsigned int delta_time; + unsigned int idle_time; + unsigned int cpu_load; + u64 current_wall_time; + u64 current_idle_time;; + + current_idle_time = get_cpu_idle_time_us(cpu, ¤t_wall_time); + + idle_time = (unsigned int) current_idle_time - freq_change_time_in_idle; + delta_time = (unsigned int) current_wall_time - freq_change_time; + + cpu_load = 100 * (delta_time - idle_time) / delta_time; + + return policy->cur * cpu_load / 100; +} + + +/* We use the same work function to sale up and down */ +static void cpufreq_interactive_freq_change_time_work(struct work_struct *work) +{ + unsigned int cpu; + cpumask_t *tmp_mask = &work_cpumask; + for_each_cpu(cpu, tmp_mask) { + if (target_freq == policy->max) { + if (nr_running() == 1) { + cpumask_clear_cpu(cpu, &work_cpumask); + return; + } + + __cpufreq_driver_target(policy, target_freq, + CPUFREQ_RELATION_H); + } else { + target_freq = cpufreq_interactive_calc_freq(cpu); + __cpufreq_driver_target(policy, target_freq, + CPUFREQ_RELATION_L); + } + freq_change_time_in_idle = get_cpu_idle_time_us(cpu, + &freq_change_time); + + cpumask_clear_cpu(cpu, &work_cpumask); + } + + +} + +static ssize_t show_min_sample_time(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", min_sample_time); +} + +static ssize_t store_min_sample_time(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + return strict_strtoul(buf, 0, &min_sample_time); +} + +static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644, + show_min_sample_time, store_min_sample_time); + +static struct attribute *interactive_attributes[] = { + &min_sample_time_attr.attr, + NULL, +}; + +static struct attribute_group interactive_attr_group = { + .attrs = interactive_attributes, + .name = "interactive", +}; + +static int cpufreq_governor_interactive(struct cpufreq_policy *new_policy, + unsigned int event) +{ + int rc; + switch (event) { + case CPUFREQ_GOV_START: + if (!cpu_online(new_policy->cpu)) + return -EINVAL; + + /* + * Do not register the idle hook and create sysfs + * entries if we have already done so. + */ + if (atomic_inc_return(&active_count) > 1) + return 0; + + rc = sysfs_create_group(cpufreq_global_kobject, + &interactive_attr_group); + if (rc) + return rc; + + pm_idle_old = pm_idle; + pm_idle = cpufreq_idle; + policy = new_policy; + break; + + case CPUFREQ_GOV_STOP: + if (atomic_dec_return(&active_count) > 1) + return 0; + + sysfs_remove_group(cpufreq_global_kobject, + &interactive_attr_group); + + pm_idle = pm_idle_old; + del_timer(&per_cpu(cpu_timer, new_policy->cpu)); + break; + + case CPUFREQ_GOV_LIMITS: + if (new_policy->max < new_policy->cur) + __cpufreq_driver_target(new_policy, + new_policy->max, CPUFREQ_RELATION_H); + else if (new_policy->min > new_policy->cur) + __cpufreq_driver_target(new_policy, + new_policy->min, CPUFREQ_RELATION_L); + break; + } + return 0; +} + +static int __init cpufreq_interactive_init(void) +{ + unsigned int i; + struct timer_list *t; + min_sample_time = DEFAULT_MIN_SAMPLE_TIME; + + /* Initalize per-cpu timers */ + for_each_possible_cpu(i) { + t = &per_cpu(cpu_timer, i); + init_timer_deferrable(t); + t->function = cpufreq_interactive_timer; + t->data = i; + } + + /* Scale up is high priority */ + up_wq = create_rt_workqueue("kinteractive_up"); + down_wq = create_workqueue("knteractive_down"); + + INIT_WORK(&freq_scale_work, cpufreq_interactive_freq_change_time_work); + + return cpufreq_register_governor(&cpufreq_gov_interactive); +} + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE +pure_initcall(cpufreq_interactive_init); +#else +module_init(cpufreq_interactive_init); +#endif + +static void __exit cpufreq_interactive_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_interactive); + destroy_workqueue(up_wq); + destroy_workqueue(down_wq); +} + +module_exit(cpufreq_interactive_exit); + +MODULE_AUTHOR("Mike Chan "); +MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for " + "Latency sensitive workloads"); +MODULE_LICENSE("GPL"); + diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 79a2340d..61dca28f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -338,6 +338,9 @@ extern struct cpufreq_governor cpufreq_gov_ondemand; #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) extern struct cpufreq_governor cpufreq_gov_conservative; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE) +extern struct cpufreq_governor cpufreq_gov_interactive; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_interactive) #endif diff --git a/kernel/sched.c b/kernel/sched.c index f03aff63..a7ee0eb5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2910,6 +2910,7 @@ unsigned long nr_running(void) return sum; } +EXPORT_SYMBOL_GPL(nr_running); unsigned long nr_uninterruptible(void) {