1717 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
1818 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
1919
20+ #define BATCH_OPS (_name ) \
21+ .map_lookup_batch = \
22+ _name##_map_lookup_batch, \
23+ .map_lookup_and_delete_batch = \
24+ _name##_map_lookup_and_delete_batch, \
25+ .map_update_batch = \
26+ generic_map_update_batch, \
27+ .map_delete_batch = \
28+ generic_map_delete_batch
29+
2030struct bucket {
2131 struct hlist_nulls_head head ;
2232 raw_spinlock_t lock ;
@@ -1232,6 +1242,256 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
12321242 rcu_read_unlock ();
12331243}
12341244
1245+ static int
1246+ __htab_map_lookup_and_delete_batch (struct bpf_map * map ,
1247+ const union bpf_attr * attr ,
1248+ union bpf_attr __user * uattr ,
1249+ bool do_delete , bool is_lru_map ,
1250+ bool is_percpu )
1251+ {
1252+ struct bpf_htab * htab = container_of (map , struct bpf_htab , map );
1253+ u32 bucket_cnt , total , key_size , value_size , roundup_key_size ;
1254+ void * keys = NULL , * values = NULL , * value , * dst_key , * dst_val ;
1255+ void __user * uvalues = u64_to_user_ptr (attr -> batch .values );
1256+ void __user * ukeys = u64_to_user_ptr (attr -> batch .keys );
1257+ void * ubatch = u64_to_user_ptr (attr -> batch .in_batch );
1258+ u32 batch , max_count , size , bucket_size ;
1259+ u64 elem_map_flags , map_flags ;
1260+ struct hlist_nulls_head * head ;
1261+ struct hlist_nulls_node * n ;
1262+ unsigned long flags ;
1263+ struct htab_elem * l ;
1264+ struct bucket * b ;
1265+ int ret = 0 ;
1266+
1267+ elem_map_flags = attr -> batch .elem_flags ;
1268+ if ((elem_map_flags & ~BPF_F_LOCK ) ||
1269+ ((elem_map_flags & BPF_F_LOCK ) && !map_value_has_spin_lock (map )))
1270+ return - EINVAL ;
1271+
1272+ map_flags = attr -> batch .flags ;
1273+ if (map_flags )
1274+ return - EINVAL ;
1275+
1276+ max_count = attr -> batch .count ;
1277+ if (!max_count )
1278+ return 0 ;
1279+
1280+ if (put_user (0 , & uattr -> batch .count ))
1281+ return - EFAULT ;
1282+
1283+ batch = 0 ;
1284+ if (ubatch && copy_from_user (& batch , ubatch , sizeof (batch )))
1285+ return - EFAULT ;
1286+
1287+ if (batch >= htab -> n_buckets )
1288+ return - ENOENT ;
1289+
1290+ key_size = htab -> map .key_size ;
1291+ roundup_key_size = round_up (htab -> map .key_size , 8 );
1292+ value_size = htab -> map .value_size ;
1293+ size = round_up (value_size , 8 );
1294+ if (is_percpu )
1295+ value_size = size * num_possible_cpus ();
1296+ total = 0 ;
1297+ /* while experimenting with hash tables with sizes ranging from 10 to
1298+ * 1000, it was observed that a bucket can have upto 5 entries.
1299+ */
1300+ bucket_size = 5 ;
1301+
1302+ alloc :
1303+ /* We cannot do copy_from_user or copy_to_user inside
1304+ * the rcu_read_lock. Allocate enough space here.
1305+ */
1306+ keys = kvmalloc (key_size * bucket_size , GFP_USER | __GFP_NOWARN );
1307+ values = kvmalloc (value_size * bucket_size , GFP_USER | __GFP_NOWARN );
1308+ if (!keys || !values ) {
1309+ ret = - ENOMEM ;
1310+ goto after_loop ;
1311+ }
1312+
1313+ again :
1314+ preempt_disable ();
1315+ this_cpu_inc (bpf_prog_active );
1316+ rcu_read_lock ();
1317+ again_nocopy :
1318+ dst_key = keys ;
1319+ dst_val = values ;
1320+ b = & htab -> buckets [batch ];
1321+ head = & b -> head ;
1322+ raw_spin_lock_irqsave (& b -> lock , flags );
1323+
1324+ bucket_cnt = 0 ;
1325+ hlist_nulls_for_each_entry_rcu (l , n , head , hash_node )
1326+ bucket_cnt ++ ;
1327+
1328+ if (bucket_cnt > (max_count - total )) {
1329+ if (total == 0 )
1330+ ret = - ENOSPC ;
1331+ raw_spin_unlock_irqrestore (& b -> lock , flags );
1332+ rcu_read_unlock ();
1333+ this_cpu_dec (bpf_prog_active );
1334+ preempt_enable ();
1335+ goto after_loop ;
1336+ }
1337+
1338+ if (bucket_cnt > bucket_size ) {
1339+ bucket_size = bucket_cnt ;
1340+ raw_spin_unlock_irqrestore (& b -> lock , flags );
1341+ rcu_read_unlock ();
1342+ this_cpu_dec (bpf_prog_active );
1343+ preempt_enable ();
1344+ kvfree (keys );
1345+ kvfree (values );
1346+ goto alloc ;
1347+ }
1348+
1349+ hlist_nulls_for_each_entry_safe (l , n , head , hash_node ) {
1350+ memcpy (dst_key , l -> key , key_size );
1351+
1352+ if (is_percpu ) {
1353+ int off = 0 , cpu ;
1354+ void __percpu * pptr ;
1355+
1356+ pptr = htab_elem_get_ptr (l , map -> key_size );
1357+ for_each_possible_cpu (cpu ) {
1358+ bpf_long_memcpy (dst_val + off ,
1359+ per_cpu_ptr (pptr , cpu ), size );
1360+ off += size ;
1361+ }
1362+ } else {
1363+ value = l -> key + roundup_key_size ;
1364+ if (elem_map_flags & BPF_F_LOCK )
1365+ copy_map_value_locked (map , dst_val , value ,
1366+ true);
1367+ else
1368+ copy_map_value (map , dst_val , value );
1369+ check_and_init_map_lock (map , dst_val );
1370+ }
1371+ if (do_delete ) {
1372+ hlist_nulls_del_rcu (& l -> hash_node );
1373+ if (is_lru_map )
1374+ bpf_lru_push_free (& htab -> lru , & l -> lru_node );
1375+ else
1376+ free_htab_elem (htab , l );
1377+ }
1378+ dst_key += key_size ;
1379+ dst_val += value_size ;
1380+ }
1381+
1382+ raw_spin_unlock_irqrestore (& b -> lock , flags );
1383+ /* If we are not copying data, we can go to next bucket and avoid
1384+ * unlocking the rcu.
1385+ */
1386+ if (!bucket_cnt && (batch + 1 < htab -> n_buckets )) {
1387+ batch ++ ;
1388+ goto again_nocopy ;
1389+ }
1390+
1391+ rcu_read_unlock ();
1392+ this_cpu_dec (bpf_prog_active );
1393+ preempt_enable ();
1394+ if (bucket_cnt && (copy_to_user (ukeys + total * key_size , keys ,
1395+ key_size * bucket_cnt ) ||
1396+ copy_to_user (uvalues + total * value_size , values ,
1397+ value_size * bucket_cnt ))) {
1398+ ret = - EFAULT ;
1399+ goto after_loop ;
1400+ }
1401+
1402+ total += bucket_cnt ;
1403+ batch ++ ;
1404+ if (batch >= htab -> n_buckets ) {
1405+ ret = - ENOENT ;
1406+ goto after_loop ;
1407+ }
1408+ goto again ;
1409+
1410+ after_loop :
1411+ if (ret == - EFAULT )
1412+ goto out ;
1413+
1414+ /* copy # of entries and next batch */
1415+ ubatch = u64_to_user_ptr (attr -> batch .out_batch );
1416+ if (copy_to_user (ubatch , & batch , sizeof (batch )) ||
1417+ put_user (total , & uattr -> batch .count ))
1418+ ret = - EFAULT ;
1419+
1420+ out :
1421+ kvfree (keys );
1422+ kvfree (values );
1423+ return ret ;
1424+ }
1425+
1426+ static int
1427+ htab_percpu_map_lookup_batch (struct bpf_map * map , const union bpf_attr * attr ,
1428+ union bpf_attr __user * uattr )
1429+ {
1430+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , false,
1431+ false, true);
1432+ }
1433+
1434+ static int
1435+ htab_percpu_map_lookup_and_delete_batch (struct bpf_map * map ,
1436+ const union bpf_attr * attr ,
1437+ union bpf_attr __user * uattr )
1438+ {
1439+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , true,
1440+ false, true);
1441+ }
1442+
1443+ static int
1444+ htab_map_lookup_batch (struct bpf_map * map , const union bpf_attr * attr ,
1445+ union bpf_attr __user * uattr )
1446+ {
1447+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , false,
1448+ false, false);
1449+ }
1450+
1451+ static int
1452+ htab_map_lookup_and_delete_batch (struct bpf_map * map ,
1453+ const union bpf_attr * attr ,
1454+ union bpf_attr __user * uattr )
1455+ {
1456+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , true,
1457+ false, false);
1458+ }
1459+
1460+ static int
1461+ htab_lru_percpu_map_lookup_batch (struct bpf_map * map ,
1462+ const union bpf_attr * attr ,
1463+ union bpf_attr __user * uattr )
1464+ {
1465+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , false,
1466+ true, true);
1467+ }
1468+
1469+ static int
1470+ htab_lru_percpu_map_lookup_and_delete_batch (struct bpf_map * map ,
1471+ const union bpf_attr * attr ,
1472+ union bpf_attr __user * uattr )
1473+ {
1474+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , true,
1475+ true, true);
1476+ }
1477+
1478+ static int
1479+ htab_lru_map_lookup_batch (struct bpf_map * map , const union bpf_attr * attr ,
1480+ union bpf_attr __user * uattr )
1481+ {
1482+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , false,
1483+ true, false);
1484+ }
1485+
1486+ static int
1487+ htab_lru_map_lookup_and_delete_batch (struct bpf_map * map ,
1488+ const union bpf_attr * attr ,
1489+ union bpf_attr __user * uattr )
1490+ {
1491+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , true,
1492+ true, false);
1493+ }
1494+
12351495const struct bpf_map_ops htab_map_ops = {
12361496 .map_alloc_check = htab_map_alloc_check ,
12371497 .map_alloc = htab_map_alloc ,
@@ -1242,6 +1502,7 @@ const struct bpf_map_ops htab_map_ops = {
12421502 .map_delete_elem = htab_map_delete_elem ,
12431503 .map_gen_lookup = htab_map_gen_lookup ,
12441504 .map_seq_show_elem = htab_map_seq_show_elem ,
1505+ BATCH_OPS (htab ),
12451506};
12461507
12471508const struct bpf_map_ops htab_lru_map_ops = {
@@ -1255,6 +1516,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
12551516 .map_delete_elem = htab_lru_map_delete_elem ,
12561517 .map_gen_lookup = htab_lru_map_gen_lookup ,
12571518 .map_seq_show_elem = htab_map_seq_show_elem ,
1519+ BATCH_OPS (htab_lru ),
12581520};
12591521
12601522/* Called from eBPF program */
@@ -1368,6 +1630,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
13681630 .map_update_elem = htab_percpu_map_update_elem ,
13691631 .map_delete_elem = htab_map_delete_elem ,
13701632 .map_seq_show_elem = htab_percpu_map_seq_show_elem ,
1633+ BATCH_OPS (htab_percpu ),
13711634};
13721635
13731636const struct bpf_map_ops htab_lru_percpu_map_ops = {
@@ -1379,6 +1642,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
13791642 .map_update_elem = htab_lru_percpu_map_update_elem ,
13801643 .map_delete_elem = htab_lru_map_delete_elem ,
13811644 .map_seq_show_elem = htab_percpu_map_seq_show_elem ,
1645+ BATCH_OPS (htab_lru_percpu ),
13821646};
13831647
13841648static int fd_htab_map_alloc_check (union bpf_attr * attr )
0 commit comments