Skip to content

Commit 510ded3

Browse files
htejuntorvalds
authored andcommitted
slab: implement slab_root_caches list
With kmem cgroup support enabled, kmem_caches can be created and destroyed frequently and a great number of near empty kmem_caches can accumulate if there are a lot of transient cgroups and the system is not under memory pressure. When memory reclaim starts under such conditions, it can lead to consecutive deactivation and destruction of many kmem_caches, easily hundreds of thousands on moderately large systems, exposing scalability issues in the current slab management code. This is one of the patches to address the issue. slab_caches currently lists all caches including root and memcg ones. This is the only data structure which lists the root caches and iterating root caches can only be done by walking the list while skipping over memcg caches. As there can be a huge number of memcg caches, this can become very expensive. This also can make /proc/slabinfo behave very badly. seq_file processes reads in 4k chunks and seeks to the previous Nth position on slab_caches list to resume after each chunk. With a lot of memcg cache churns on the list, reading /proc/slabinfo can become very slow and its content often ends up with duplicate and/or missing entries. This patch adds a new list slab_root_caches which lists only the root caches. When memcg is not enabled, it becomes just an alias of slab_caches. memcg specific list operations are collected into memcg_[un]link_cache(). Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Tejun Heo <[email protected]> Reported-by: Jay Vana <[email protected]> Acked-by: Vladimir Davydov <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Pekka Enberg <[email protected]> Cc: David Rientjes <[email protected]> Cc: Joonsoo Kim <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent bc2791f commit 510ded3

File tree

4 files changed

+53
-25
lines changed

4 files changed

+53
-25
lines changed

include/linux/slab.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,8 @@ struct memcg_cache_array {
556556
* used to index child cachces during allocation and cleared
557557
* early during shutdown.
558558
*
559+
* @root_caches_node: List node for slab_root_caches list.
560+
*
559561
* @children: List of all child caches. While the child caches are also
560562
* reachable through @memcg_caches, a child cache remains on
561563
* this list until it is actually destroyed.
@@ -573,6 +575,7 @@ struct memcg_cache_params {
573575
union {
574576
struct {
575577
struct memcg_cache_array __rcu *memcg_caches;
578+
struct list_head __root_caches_node;
576579
struct list_head children;
577580
};
578581
struct {

mm/slab.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,11 @@ void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
201201
int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
202202

203203
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
204+
205+
/* List of all root caches. */
206+
extern struct list_head slab_root_caches;
207+
#define root_caches_node memcg_params.__root_caches_node
208+
204209
/*
205210
* Iterate over all memcg caches of the given root cache. The caller must hold
206211
* slab_mutex.
@@ -300,9 +305,14 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
300305
}
301306

302307
extern void slab_init_memcg_params(struct kmem_cache *);
308+
extern void memcg_link_cache(struct kmem_cache *s);
303309

304310
#else /* CONFIG_MEMCG && !CONFIG_SLOB */
305311

312+
/* If !memcg, all caches are root. */
313+
#define slab_root_caches slab_caches
314+
#define root_caches_node list
315+
306316
#define for_each_memcg_cache(iter, root) \
307317
for ((void)(iter), (void)(root); 0; )
308318

@@ -347,6 +357,11 @@ static inline void memcg_uncharge_slab(struct page *page, int order,
347357
static inline void slab_init_memcg_params(struct kmem_cache *s)
348358
{
349359
}
360+
361+
static inline void memcg_link_cache(struct kmem_cache *s)
362+
{
363+
}
364+
350365
#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
351366

352367
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)

mm/slab_common.c

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,9 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
138138
}
139139

140140
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
141+
142+
LIST_HEAD(slab_root_caches);
143+
141144
void slab_init_memcg_params(struct kmem_cache *s)
142145
{
143146
s->memcg_params.root_cache = NULL;
@@ -183,9 +186,6 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size)
183186
{
184187
struct memcg_cache_array *old, *new;
185188

186-
if (!is_root_cache(s))
187-
return 0;
188-
189189
new = kzalloc(sizeof(struct memcg_cache_array) +
190190
new_array_size * sizeof(void *), GFP_KERNEL);
191191
if (!new)
@@ -209,7 +209,7 @@ int memcg_update_all_caches(int num_memcgs)
209209
int ret = 0;
210210

211211
mutex_lock(&slab_mutex);
212-
list_for_each_entry(s, &slab_caches, list) {
212+
list_for_each_entry(s, &slab_root_caches, root_caches_node) {
213213
ret = update_memcg_params(s, num_memcgs);
214214
/*
215215
* Instead of freeing the memory, we'll just leave the caches
@@ -222,10 +222,26 @@ int memcg_update_all_caches(int num_memcgs)
222222
return ret;
223223
}
224224

225-
static void unlink_memcg_cache(struct kmem_cache *s)
225+
void memcg_link_cache(struct kmem_cache *s)
226+
{
227+
if (is_root_cache(s)) {
228+
list_add(&s->root_caches_node, &slab_root_caches);
229+
} else {
230+
list_add(&s->memcg_params.children_node,
231+
&s->memcg_params.root_cache->memcg_params.children);
232+
list_add(&s->memcg_params.kmem_caches_node,
233+
&s->memcg_params.memcg->kmem_caches);
234+
}
235+
}
236+
237+
static void memcg_unlink_cache(struct kmem_cache *s)
226238
{
227-
list_del(&s->memcg_params.children_node);
228-
list_del(&s->memcg_params.kmem_caches_node);
239+
if (is_root_cache(s)) {
240+
list_del(&s->root_caches_node);
241+
} else {
242+
list_del(&s->memcg_params.children_node);
243+
list_del(&s->memcg_params.kmem_caches_node);
244+
}
229245
}
230246
#else
231247
static inline int init_memcg_params(struct kmem_cache *s,
@@ -238,7 +254,7 @@ static inline void destroy_memcg_params(struct kmem_cache *s)
238254
{
239255
}
240256

241-
static inline void unlink_memcg_cache(struct kmem_cache *s)
257+
static inline void memcg_unlink_cache(struct kmem_cache *s)
242258
{
243259
}
244260
#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
@@ -285,7 +301,7 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
285301
if (flags & SLAB_NEVER_MERGE)
286302
return NULL;
287303

288-
list_for_each_entry_reverse(s, &slab_caches, list) {
304+
list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
289305
if (slab_unmergeable(s))
290306
continue;
291307

@@ -369,6 +385,7 @@ static struct kmem_cache *create_cache(const char *name,
369385

370386
s->refcount = 1;
371387
list_add(&s->list, &slab_caches);
388+
memcg_link_cache(s);
372389
out:
373390
if (err)
374391
return ERR_PTR(err);
@@ -514,9 +531,8 @@ static int shutdown_cache(struct kmem_cache *s)
514531
if (__kmem_cache_shutdown(s) != 0)
515532
return -EBUSY;
516533

534+
memcg_unlink_cache(s);
517535
list_del(&s->list);
518-
if (!is_root_cache(s))
519-
unlink_memcg_cache(s);
520536

521537
if (s->flags & SLAB_DESTROY_BY_RCU) {
522538
list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
@@ -596,10 +612,6 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
596612
goto out_unlock;
597613
}
598614

599-
list_add(&s->memcg_params.children_node,
600-
&root_cache->memcg_params.children);
601-
list_add(&s->memcg_params.kmem_caches_node, &memcg->kmem_caches);
602-
603615
/*
604616
* Since readers won't lock (see cache_from_memcg_idx()), we need a
605617
* barrier here to ensure nobody will see the kmem_cache partially
@@ -627,10 +639,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
627639
get_online_mems();
628640

629641
mutex_lock(&slab_mutex);
630-
list_for_each_entry(s, &slab_caches, list) {
631-
if (!is_root_cache(s))
632-
continue;
633-
642+
list_for_each_entry(s, &slab_root_caches, root_caches_node) {
634643
arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
635644
lockdep_is_held(&slab_mutex));
636645
c = arr->entries[idx];
@@ -829,6 +838,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
829838

830839
create_boot_cache(s, name, size, flags);
831840
list_add(&s->list, &slab_caches);
841+
memcg_link_cache(s);
832842
s->refcount = 1;
833843
return s;
834844
}
@@ -1136,12 +1146,12 @@ static void print_slabinfo_header(struct seq_file *m)
11361146
void *slab_start(struct seq_file *m, loff_t *pos)
11371147
{
11381148
mutex_lock(&slab_mutex);
1139-
return seq_list_start(&slab_caches, *pos);
1149+
return seq_list_start(&slab_root_caches, *pos);
11401150
}
11411151

11421152
void *slab_next(struct seq_file *m, void *p, loff_t *pos)
11431153
{
1144-
return seq_list_next(p, &slab_caches, pos);
1154+
return seq_list_next(p, &slab_root_caches, pos);
11451155
}
11461156

11471157
void slab_stop(struct seq_file *m, void *p)
@@ -1193,12 +1203,11 @@ static void cache_show(struct kmem_cache *s, struct seq_file *m)
11931203

11941204
static int slab_show(struct seq_file *m, void *p)
11951205
{
1196-
struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
1206+
struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
11971207

1198-
if (p == slab_caches.next)
1208+
if (p == slab_root_caches.next)
11991209
print_slabinfo_header(m);
1200-
if (is_root_cache(s))
1201-
cache_show(s, m);
1210+
cache_show(s, m);
12021211
return 0;
12031212
}
12041213

mm/slub.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4127,6 +4127,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
41274127
}
41284128
slab_init_memcg_params(s);
41294129
list_add(&s->list, &slab_caches);
4130+
memcg_link_cache(s);
41304131
return s;
41314132
}
41324133

0 commit comments

Comments
 (0)