Skip to content

Commit 2e5f564

Browse files
authored
feat: Added API for discovering Feature Views by popular tags (#5558)
Signed-off-by: ntkathole <[email protected]>
1 parent 8318f64 commit 2e5f564

File tree

3 files changed

+362
-5
lines changed

3 files changed

+362
-5
lines changed

docs/reference/feature-servers/registry-server.md

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1123,7 +1123,72 @@ Please refer the [page](./../../../docs/getting-started/concepts/permission.md)
11231123

11241124
**Note**: Recent visits are automatically logged when users access registry objects via the REST API. The logging behavior can be configured through the `feature_server.recent_visit_logging` section in `feature_store.yaml` (see configuration section below).
11251125

1126-
---
1126+
#### Get Popular Tags
1127+
- **Endpoint**: `GET /api/v1/metrics/popular_tags`
1128+
- **Description**: Discover Feature Views by popular tags. Returns the most popular tags (tags assigned to maximum number of feature views) with their associated feature views. If no project is specified, returns popular tags across all projects.
1129+
- **Parameters**:
1130+
- `project` (optional): Project name for popular tags (returns all projects if not specified)
1131+
- `limit` (optional, default: 4): Number of popular tags to return
1132+
- `allow_cache` (optional, default: true): Whether to allow cached responses
1133+
- **Examples**:
1134+
```bash
1135+
# Basic usage (all projects)
1136+
curl -H "Authorization: Bearer <token>" \
1137+
"http://localhost:6572/api/v1/metrics/popular_tags"
1138+
1139+
# Specific project
1140+
curl -H "Authorization: Bearer <token>" \
1141+
"http://localhost:6572/api/v1/metrics/popular_tags?project=my_project"
1142+
1143+
# Custom limit
1144+
curl -H "Authorization: Bearer <token>" \
1145+
"http://localhost:6572/api/v1/metrics/popular_tags?project=my_project&limit=3"
1146+
```
1147+
- **Response Model**: `PopularTagsResponse`
1148+
- **Response Example**:
1149+
```json
1150+
{
1151+
"popular_tags": [
1152+
{
1153+
"tag_key": "environment",
1154+
"tag_value": "production",
1155+
"feature_views": [
1156+
{
1157+
"name": "user_features",
1158+
"project": "my_project"
1159+
},
1160+
{
1161+
"name": "order_features",
1162+
"project": "my_project"
1163+
}
1164+
],
1165+
"total_feature_views": 2
1166+
},
1167+
{
1168+
"tag_key": "team",
1169+
"tag_value": "ml_team",
1170+
"feature_views": [
1171+
{
1172+
"name": "user_features",
1173+
"project": "my_project"
1174+
}
1175+
],
1176+
"total_feature_views": 1
1177+
}
1178+
],
1179+
"metadata": {
1180+
"totalFeatureViews": 3,
1181+
"totalTags": 2,
1182+
"limit": 4
1183+
}
1184+
}
1185+
```
1186+
1187+
**Response Models:**
1188+
- `FeatureViewInfo`: Contains feature view name and project
1189+
- `PopularTagInfo`: Contains tag information and associated feature views
1190+
- `PopularTagsMetadata`: Contains metadata about the response
1191+
- `PopularTagsResponse`: Main response model containing popular tags and metadata
11271192

11281193
## Registry Server Configuration: Recent Visit Logging
11291194

@@ -1162,3 +1227,4 @@ feature_server:
11621227
- Only the most recent `limit` visits per user are stored
11631228
- Metrics endpoints (`/metrics/*`) are automatically excluded from logging to prevent circular references
11641229
- Visit data is stored per user and per project in the registry metadata
1230+

sdk/python/feast/api/registry/rest/metrics.py

Lines changed: 194 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import json
22
import logging
3-
from typing import Optional
3+
from typing import Dict, List, Optional
44

5-
from fastapi import APIRouter, Depends, Query, Request
5+
from fastapi import APIRouter, Depends, HTTPException, Query, Request
6+
from pydantic import BaseModel, Field
67

8+
from feast.api.registry.rest.feature_views import _extract_feature_view_from_any
79
from feast.api.registry.rest.rest_utils import (
810
get_pagination_params,
911
get_sorting_params,
@@ -13,6 +15,47 @@
1315
from feast.protos.feast.registry import RegistryServer_pb2
1416

1517

18+
class FeatureViewInfo(BaseModel):
19+
"""Feature view information in popular tags response."""
20+
21+
name: str = Field(..., description="Name of the feature view")
22+
project: str = Field(..., description="Project name of the feature view")
23+
24+
25+
class PopularTagInfo(BaseModel):
26+
"""Popular tag information with associated feature views."""
27+
28+
tag_key: str = Field(..., description="Tag key")
29+
tag_value: str = Field(..., description="Tag value")
30+
feature_views: List[FeatureViewInfo] = Field(
31+
..., description="List of feature views with this tag"
32+
)
33+
total_feature_views: int = Field(
34+
..., description="Total number of feature views with this tag"
35+
)
36+
37+
38+
class PopularTagsMetadata(BaseModel):
39+
"""Metadata for popular tags response."""
40+
41+
totalFeatureViews: int = Field(
42+
..., description="Total number of feature views processed"
43+
)
44+
totalTags: int = Field(..., description="Total number of unique tags found")
45+
limit: int = Field(..., description="Number of popular tags requested")
46+
47+
48+
class PopularTagsResponse(BaseModel):
49+
"""Response model for popular tags endpoint."""
50+
51+
popular_tags: List[PopularTagInfo] = Field(
52+
..., description="List of popular tags with their associated feature views"
53+
)
54+
metadata: PopularTagsMetadata = Field(
55+
..., description="Metadata about the response"
56+
)
57+
58+
1659
def get_metrics_router(grpc_handler, server=None) -> APIRouter:
1760
logger = logging.getLogger(__name__)
1861
router = APIRouter()
@@ -96,6 +139,155 @@ def count_resources_for_project(project_name: str):
96139
total_counts[k] += counts[k]
97140
return {"total": total_counts, "perProject": all_counts}
98141

142+
@router.get(
143+
"/metrics/popular_tags", tags=["Metrics"], response_model=PopularTagsResponse
144+
)
145+
async def popular_tags(
146+
project: Optional[str] = Query(
147+
None,
148+
description="Project name for popular tags (optional, returns all projects if not specified)",
149+
),
150+
limit: int = Query(4, description="Number of popular tags to return"),
151+
allow_cache: bool = Query(default=True),
152+
):
153+
"""
154+
Discover Feature Views by popular tags. Returns the most popular tags
155+
(tags assigned to maximum number of feature views) with their associated feature views.
156+
If no project is specified, returns popular tags across all projects.
157+
"""
158+
159+
def build_tag_collection(
160+
feature_views: List[Dict],
161+
) -> Dict[str, Dict[str, List[Dict]]]:
162+
"""Build a collection of tags grouped by tag key and tag value."""
163+
tag_collection: Dict[str, Dict[str, List[Dict]]] = {}
164+
165+
for fv in feature_views:
166+
tags = fv.get("spec", {}).get("tags", {})
167+
if not tags:
168+
continue
169+
170+
for tag_key, tag_value in tags.items():
171+
if tag_key not in tag_collection:
172+
tag_collection[tag_key] = {}
173+
174+
if tag_value not in tag_collection[tag_key]:
175+
tag_collection[tag_key][tag_value] = []
176+
177+
tag_collection[tag_key][tag_value].append(fv)
178+
179+
return tag_collection
180+
181+
def find_most_popular_tags(
182+
tag_collection: Dict[str, Dict[str, List[Dict]]],
183+
) -> List[Dict]:
184+
"""Find the most popular tags based on total feature view count."""
185+
tag_popularity = []
186+
187+
for tag_key, tag_values_map in tag_collection.items():
188+
for tag_value, fv_entries in tag_values_map.items():
189+
total_feature_views = len(fv_entries)
190+
tag_popularity.append(
191+
{
192+
"tag_key": tag_key,
193+
"tag_value": tag_value,
194+
"feature_views": fv_entries,
195+
"total_feature_views": total_feature_views,
196+
}
197+
)
198+
199+
return sorted(
200+
tag_popularity,
201+
key=lambda x: (x["total_feature_views"], x["tag_key"]),
202+
reverse=True,
203+
)
204+
205+
def get_feature_views_for_project(project_name: str) -> List[Dict]:
206+
"""Get feature views for a specific project."""
207+
req = RegistryServer_pb2.ListAllFeatureViewsRequest(
208+
project=project_name,
209+
allow_cache=allow_cache,
210+
)
211+
response = grpc_call(grpc_handler.ListAllFeatureViews, req)
212+
any_feature_views = response.get("featureViews", [])
213+
feature_views = []
214+
for any_feature_view in any_feature_views:
215+
feature_view = _extract_feature_view_from_any(any_feature_view)
216+
if feature_view:
217+
feature_view["project"] = project_name
218+
feature_views.append(feature_view)
219+
return feature_views
220+
221+
try:
222+
if project:
223+
feature_views = get_feature_views_for_project(project)
224+
else:
225+
projects_resp = grpc_call(
226+
grpc_handler.ListProjects,
227+
RegistryServer_pb2.ListProjectsRequest(allow_cache=allow_cache),
228+
)
229+
projects = projects_resp.get("projects", [])
230+
feature_views = []
231+
for project_info in projects:
232+
project_name = project_info["spec"]["name"]
233+
project_feature_views = get_feature_views_for_project(project_name)
234+
feature_views.extend(project_feature_views)
235+
236+
if not feature_views:
237+
return PopularTagsResponse(
238+
popular_tags=[],
239+
metadata=PopularTagsMetadata(
240+
totalFeatureViews=0,
241+
totalTags=0,
242+
limit=limit,
243+
),
244+
)
245+
246+
tag_collection = build_tag_collection(feature_views)
247+
248+
if not tag_collection:
249+
return PopularTagsResponse(
250+
popular_tags=[],
251+
metadata=PopularTagsMetadata(
252+
totalFeatureViews=len(feature_views),
253+
totalTags=0,
254+
limit=limit,
255+
),
256+
)
257+
popular_tags = find_most_popular_tags(tag_collection)
258+
top_popular_tags = popular_tags[:limit]
259+
formatted_tags = []
260+
for tag_info in top_popular_tags:
261+
feature_view_infos = [
262+
FeatureViewInfo(
263+
name=fv.get("spec", {}).get("name", "unknown"),
264+
project=fv.get("project", "unknown"),
265+
)
266+
for fv in tag_info["feature_views"]
267+
]
268+
269+
formatted_tag = PopularTagInfo(
270+
tag_key=tag_info["tag_key"],
271+
tag_value=tag_info["tag_value"],
272+
feature_views=feature_view_infos,
273+
total_feature_views=tag_info["total_feature_views"],
274+
)
275+
formatted_tags.append(formatted_tag)
276+
277+
return PopularTagsResponse(
278+
popular_tags=formatted_tags,
279+
metadata=PopularTagsMetadata(
280+
totalFeatureViews=len(feature_views),
281+
totalTags=len(popular_tags),
282+
limit=limit,
283+
),
284+
)
285+
except Exception as e:
286+
raise HTTPException(
287+
status_code=500,
288+
detail=f"Failed to generate popular tags: {str(e)}",
289+
)
290+
99291
@router.get("/metrics/recently_visited", tags=["Metrics"])
100292
async def recently_visited(
101293
request: Request,

0 commit comments

Comments
 (0)