Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 67 additions & 1 deletion docs/reference/feature-servers/registry-server.md
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,72 @@ Please refer the [page](./../../../docs/getting-started/concepts/permission.md)

**Note**: Recent visits are automatically logged when users access registry objects via the REST API. The logging behavior can be configured through the `feature_server.recent_visit_logging` section in `feature_store.yaml` (see configuration section below).

---
#### Get Popular Tags
- **Endpoint**: `GET /api/v1/metrics/popular_tags`
- **Description**: Discover Feature Views by popular tags. Returns the most popular tags (tags assigned to maximum number of feature views) with their associated feature views. If no project is specified, returns popular tags across all projects.
- **Parameters**:
- `project` (optional): Project name for popular tags (returns all projects if not specified)
- `limit` (optional, default: 4): Number of popular tags to return
- `allow_cache` (optional, default: true): Whether to allow cached responses
- **Examples**:
```bash
# Basic usage (all projects)
curl -H "Authorization: Bearer <token>" \
"http://localhost:6572/api/v1/metrics/popular_tags"

# Specific project
curl -H "Authorization: Bearer <token>" \
"http://localhost:6572/api/v1/metrics/popular_tags?project=my_project"

# Custom limit
curl -H "Authorization: Bearer <token>" \
"http://localhost:6572/api/v1/metrics/popular_tags?project=my_project&limit=3"
```
- **Response Model**: `PopularTagsResponse`
- **Response Example**:
```json
{
"popular_tags": [
{
"tag_key": "environment",
"tag_value": "production",
"feature_views": [
{
"name": "user_features",
"project": "my_project"
},
{
"name": "order_features",
"project": "my_project"
}
],
"total_feature_views": 2
},
{
"tag_key": "team",
"tag_value": "ml_team",
"feature_views": [
{
"name": "user_features",
"project": "my_project"
}
],
"total_feature_views": 1
}
],
"metadata": {
"totalFeatureViews": 3,
"totalTags": 2,
"limit": 4
}
}
```

**Response Models:**
- `FeatureViewInfo`: Contains feature view name and project
- `PopularTagInfo`: Contains tag information and associated feature views
- `PopularTagsMetadata`: Contains metadata about the response
- `PopularTagsResponse`: Main response model containing popular tags and metadata

## Registry Server Configuration: Recent Visit Logging

Expand Down Expand Up @@ -1162,3 +1227,4 @@ feature_server:
- Only the most recent `limit` visits per user are stored
- Metrics endpoints (`/metrics/*`) are automatically excluded from logging to prevent circular references
- Visit data is stored per user and per project in the registry metadata

196 changes: 194 additions & 2 deletions sdk/python/feast/api/registry/rest/metrics.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import json
import logging
from typing import Optional
from typing import Dict, List, Optional

from fastapi import APIRouter, Depends, Query, Request
from fastapi import APIRouter, Depends, HTTPException, Query, Request
from pydantic import BaseModel, Field

from feast.api.registry.rest.feature_views import _extract_feature_view_from_any
from feast.api.registry.rest.rest_utils import (
get_pagination_params,
get_sorting_params,
Expand All @@ -13,6 +15,47 @@
from feast.protos.feast.registry import RegistryServer_pb2


class FeatureViewInfo(BaseModel):
"""Feature view information in popular tags response."""

name: str = Field(..., description="Name of the feature view")
project: str = Field(..., description="Project name of the feature view")


class PopularTagInfo(BaseModel):
"""Popular tag information with associated feature views."""

tag_key: str = Field(..., description="Tag key")
tag_value: str = Field(..., description="Tag value")
feature_views: List[FeatureViewInfo] = Field(
..., description="List of feature views with this tag"
)
total_feature_views: int = Field(
..., description="Total number of feature views with this tag"
)


class PopularTagsMetadata(BaseModel):
"""Metadata for popular tags response."""

totalFeatureViews: int = Field(
..., description="Total number of feature views processed"
)
totalTags: int = Field(..., description="Total number of unique tags found")
limit: int = Field(..., description="Number of popular tags requested")


class PopularTagsResponse(BaseModel):
"""Response model for popular tags endpoint."""

popular_tags: List[PopularTagInfo] = Field(
..., description="List of popular tags with their associated feature views"
)
metadata: PopularTagsMetadata = Field(
..., description="Metadata about the response"
)


def get_metrics_router(grpc_handler, server=None) -> APIRouter:
logger = logging.getLogger(__name__)
router = APIRouter()
Expand Down Expand Up @@ -96,6 +139,155 @@ def count_resources_for_project(project_name: str):
total_counts[k] += counts[k]
return {"total": total_counts, "perProject": all_counts}

@router.get(
"/metrics/popular_tags", tags=["Metrics"], response_model=PopularTagsResponse
)
async def popular_tags(
project: Optional[str] = Query(
None,
description="Project name for popular tags (optional, returns all projects if not specified)",
),
limit: int = Query(4, description="Number of popular tags to return"),
allow_cache: bool = Query(default=True),
):
"""
Discover Feature Views by popular tags. Returns the most popular tags
(tags assigned to maximum number of feature views) with their associated feature views.
If no project is specified, returns popular tags across all projects.
"""

def build_tag_collection(
feature_views: List[Dict],
) -> Dict[str, Dict[str, List[Dict]]]:
"""Build a collection of tags grouped by tag key and tag value."""
tag_collection: Dict[str, Dict[str, List[Dict]]] = {}

for fv in feature_views:
tags = fv.get("spec", {}).get("tags", {})
if not tags:
continue

for tag_key, tag_value in tags.items():
if tag_key not in tag_collection:
tag_collection[tag_key] = {}

if tag_value not in tag_collection[tag_key]:
tag_collection[tag_key][tag_value] = []

tag_collection[tag_key][tag_value].append(fv)

return tag_collection

def find_most_popular_tags(
tag_collection: Dict[str, Dict[str, List[Dict]]],
) -> List[Dict]:
"""Find the most popular tags based on total feature view count."""
tag_popularity = []

for tag_key, tag_values_map in tag_collection.items():
for tag_value, fv_entries in tag_values_map.items():
total_feature_views = len(fv_entries)
tag_popularity.append(
{
"tag_key": tag_key,
"tag_value": tag_value,
"feature_views": fv_entries,
"total_feature_views": total_feature_views,
}
)

return sorted(
tag_popularity,
key=lambda x: (x["total_feature_views"], x["tag_key"]),
reverse=True,
)

def get_feature_views_for_project(project_name: str) -> List[Dict]:
"""Get feature views for a specific project."""
req = RegistryServer_pb2.ListAllFeatureViewsRequest(
project=project_name,
allow_cache=allow_cache,
)
response = grpc_call(grpc_handler.ListAllFeatureViews, req)
any_feature_views = response.get("featureViews", [])
feature_views = []
for any_feature_view in any_feature_views:
feature_view = _extract_feature_view_from_any(any_feature_view)
if feature_view:
feature_view["project"] = project_name
feature_views.append(feature_view)
return feature_views

try:
if project:
feature_views = get_feature_views_for_project(project)
else:
projects_resp = grpc_call(
grpc_handler.ListProjects,
RegistryServer_pb2.ListProjectsRequest(allow_cache=allow_cache),
)
projects = projects_resp.get("projects", [])
feature_views = []
for project_info in projects:
project_name = project_info["spec"]["name"]
project_feature_views = get_feature_views_for_project(project_name)
feature_views.extend(project_feature_views)

if not feature_views:
return PopularTagsResponse(
popular_tags=[],
metadata=PopularTagsMetadata(
totalFeatureViews=0,
totalTags=0,
limit=limit,
),
)

tag_collection = build_tag_collection(feature_views)

if not tag_collection:
return PopularTagsResponse(
popular_tags=[],
metadata=PopularTagsMetadata(
totalFeatureViews=len(feature_views),
totalTags=0,
limit=limit,
),
)
popular_tags = find_most_popular_tags(tag_collection)
top_popular_tags = popular_tags[:limit]
formatted_tags = []
for tag_info in top_popular_tags:
feature_view_infos = [
FeatureViewInfo(
name=fv.get("spec", {}).get("name", "unknown"),
project=fv.get("project", "unknown"),
)
for fv in tag_info["feature_views"]
]

formatted_tag = PopularTagInfo(
tag_key=tag_info["tag_key"],
tag_value=tag_info["tag_value"],
feature_views=feature_view_infos,
total_feature_views=tag_info["total_feature_views"],
)
formatted_tags.append(formatted_tag)

return PopularTagsResponse(
popular_tags=formatted_tags,
metadata=PopularTagsMetadata(
totalFeatureViews=len(feature_views),
totalTags=len(popular_tags),
limit=limit,
),
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to generate popular tags: {str(e)}",
)

@router.get("/metrics/recently_visited", tags=["Metrics"])
async def recently_visited(
request: Request,
Expand Down
Loading
Loading