Skip to content

Commit bc2791f

Browse files
htejuntorvalds
authored andcommitted
slab: link memcg kmem_caches on their associated memory cgroup
With kmem cgroup support enabled, kmem_caches can be created and destroyed frequently and a great number of near empty kmem_caches can accumulate if there are a lot of transient cgroups and the system is not under memory pressure. When memory reclaim starts under such conditions, it can lead to consecutive deactivation and destruction of many kmem_caches, easily hundreds of thousands on moderately large systems, exposing scalability issues in the current slab management code. This is one of the patches to address the issue. While a memcg kmem_cache is listed on its root cache's ->children list, there is no direct way to iterate all kmem_caches which are assocaited with a memory cgroup. The only way to iterate them is walking all caches while filtering out caches which don't match, which would be most of them. This makes memcg destruction operations O(N^2) where N is the total number of slab caches which can be huge. This combined with the synchronous RCU operations can tie up a CPU and affect the whole machine for many hours when memory reclaim triggers offlining and destruction of the stale memcgs. This patch adds mem_cgroup->kmem_caches list which goes through memcg_cache_params->kmem_caches_node of all kmem_caches which are associated with the memcg. All memcg specific iterations, including stat file access, are updated to use the new list instead. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Tejun Heo <[email protected]> Reported-by: Jay Vana <[email protected]> Acked-by: Vladimir Davydov <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Pekka Enberg <[email protected]> Cc: David Rientjes <[email protected]> Cc: Joonsoo Kim <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 9eeadc8 commit bc2791f

File tree

5 files changed

+40
-10
lines changed

5 files changed

+40
-10
lines changed

include/linux/memcontrol.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ struct mem_cgroup {
253253
/* Index in the kmem_cache->memcg_params.memcg_caches array */
254254
int kmemcg_id;
255255
enum memcg_kmem_state kmem_state;
256+
struct list_head kmem_caches;
256257
#endif
257258

258259
int last_scanned_node;

include/linux/slab.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,8 @@ struct memcg_cache_array {
565565
* @memcg: Pointer to the memcg this cache belongs to.
566566
*
567567
* @children_node: List node for @root_cache->children list.
568+
*
569+
* @kmem_caches_node: List node for @memcg->kmem_caches list.
568570
*/
569571
struct memcg_cache_params {
570572
struct kmem_cache *root_cache;
@@ -576,6 +578,7 @@ struct memcg_cache_params {
576578
struct {
577579
struct mem_cgroup *memcg;
578580
struct list_head children_node;
581+
struct list_head kmem_caches_node;
579582
};
580583
};
581584
};

mm/memcontrol.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,6 +2837,7 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
28372837
*/
28382838
memcg->kmemcg_id = memcg_id;
28392839
memcg->kmem_state = KMEM_ONLINE;
2840+
INIT_LIST_HEAD(&memcg->kmem_caches);
28402841

28412842
return 0;
28422843
}
@@ -4002,9 +4003,9 @@ static struct cftype mem_cgroup_legacy_files[] = {
40024003
#ifdef CONFIG_SLABINFO
40034004
{
40044005
.name = "kmem.slabinfo",
4005-
.seq_start = slab_start,
4006-
.seq_next = slab_next,
4007-
.seq_stop = slab_stop,
4006+
.seq_start = memcg_slab_start,
4007+
.seq_next = memcg_slab_next,
4008+
.seq_stop = memcg_slab_stop,
40084009
.seq_show = memcg_slab_show,
40094010
},
40104011
#endif

mm/slab.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,9 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
494494
void *slab_start(struct seq_file *m, loff_t *pos);
495495
void *slab_next(struct seq_file *m, void *p, loff_t *pos);
496496
void slab_stop(struct seq_file *m, void *p);
497+
void *memcg_slab_start(struct seq_file *m, loff_t *pos);
498+
void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos);
499+
void memcg_slab_stop(struct seq_file *m, void *p);
497500
int memcg_slab_show(struct seq_file *m, void *p);
498501

499502
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);

mm/slab_common.c

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ static int init_memcg_params(struct kmem_cache *s,
154154
s->memcg_params.root_cache = root_cache;
155155
s->memcg_params.memcg = memcg;
156156
INIT_LIST_HEAD(&s->memcg_params.children_node);
157+
INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
157158
return 0;
158159
}
159160

@@ -224,6 +225,7 @@ int memcg_update_all_caches(int num_memcgs)
224225
static void unlink_memcg_cache(struct kmem_cache *s)
225226
{
226227
list_del(&s->memcg_params.children_node);
228+
list_del(&s->memcg_params.kmem_caches_node);
227229
}
228230
#else
229231
static inline int init_memcg_params(struct kmem_cache *s,
@@ -596,6 +598,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
596598

597599
list_add(&s->memcg_params.children_node,
598600
&root_cache->memcg_params.children);
601+
list_add(&s->memcg_params.kmem_caches_node, &memcg->kmem_caches);
599602

600603
/*
601604
* Since readers won't lock (see cache_from_memcg_idx()), we need a
@@ -651,9 +654,8 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
651654
get_online_mems();
652655

653656
mutex_lock(&slab_mutex);
654-
list_for_each_entry_safe(s, s2, &slab_caches, list) {
655-
if (is_root_cache(s) || s->memcg_params.memcg != memcg)
656-
continue;
657+
list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
658+
memcg_params.kmem_caches_node) {
657659
/*
658660
* The cgroup is about to be freed and therefore has no charges
659661
* left. Hence, all its caches must be empty by now.
@@ -1201,15 +1203,35 @@ static int slab_show(struct seq_file *m, void *p)
12011203
}
12021204

12031205
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
1206+
void *memcg_slab_start(struct seq_file *m, loff_t *pos)
1207+
{
1208+
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1209+
1210+
mutex_lock(&slab_mutex);
1211+
return seq_list_start(&memcg->kmem_caches, *pos);
1212+
}
1213+
1214+
void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
1215+
{
1216+
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1217+
1218+
return seq_list_next(p, &memcg->kmem_caches, pos);
1219+
}
1220+
1221+
void memcg_slab_stop(struct seq_file *m, void *p)
1222+
{
1223+
mutex_unlock(&slab_mutex);
1224+
}
1225+
12041226
int memcg_slab_show(struct seq_file *m, void *p)
12051227
{
1206-
struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
1228+
struct kmem_cache *s = list_entry(p, struct kmem_cache,
1229+
memcg_params.kmem_caches_node);
12071230
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
12081231

1209-
if (p == slab_caches.next)
1232+
if (p == memcg->kmem_caches.next)
12101233
print_slabinfo_header(m);
1211-
if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
1212-
cache_show(s, m);
1234+
cache_show(s, m);
12131235
return 0;
12141236
}
12151237
#endif

0 commit comments

Comments
 (0)