Skip to content

Commit 528b589

Browse files
tonychang04claude
andauthored
✨ feat: add insforge + supabase mcp support to evaluate against postgres (#214)
Co-authored-by: Claude <[email protected]>
1 parent 705e07a commit 528b589

17 files changed

+2025
-17
lines changed

run-task.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,16 @@ elif [ "$SERVICE" = "filesystem" ]; then
139139
$([ -f .mcp_env ] && echo "-v $(pwd)/.mcp_env:/app/.mcp_env:ro") \
140140
"$DOCKER_IMAGE" \
141141
python3 -m pipeline --mcp "$SERVICE" --k 1 "$@"
142+
elif [ "$SERVICE" = "insforge" ]; then
143+
# For Insforge service, use host network to access Insforge backend on host
144+
docker run --rm \
145+
--memory="$DOCKER_MEMORY_LIMIT" \
146+
--cpus="$DOCKER_CPU_LIMIT" \
147+
--add-host=host.docker.internal:host-gateway \
148+
-v "$(pwd)/results:/app/results" \
149+
$([ -f .mcp_env ] && echo "-v $(pwd)/.mcp_env:/app/.mcp_env:ro") \
150+
"$DOCKER_IMAGE" \
151+
python3 -m pipeline --mcp "$SERVICE" --k 1 "$@"
142152
else
143153
# For other services (notion, github, playwright, etc.)
144154
docker run --rm \

src/agents/base_agent.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
class BaseMCPAgent(ABC):
2020
"""Base class with shared functionality for MCPMark agents."""
2121

22-
STDIO_SERVICES = ["notion", "filesystem", "playwright", "playwright_webarena", "postgres"]
23-
HTTP_SERVICES = ["github"]
22+
STDIO_SERVICES = ["notion", "filesystem", "playwright", "playwright_webarena", "postgres", "insforge"]
23+
HTTP_SERVICES = ["github", "supabase"]
2424
DEFAULT_TIMEOUT = 600
2525

2626
CLAUDE_THINKING_BUDGETS = {
@@ -207,6 +207,20 @@ def _create_stdio_server(self) -> MCPStdioServer:
207207
env={"DATABASE_URI": database_url},
208208
)
209209

210+
if self.mcp_service == "insforge":
211+
api_key = self.service_config.get("api_key")
212+
backend_url = self.service_config.get("backend_url")
213+
if not all([api_key, backend_url]):
214+
raise ValueError("Insforge requires api_key and backend_url")
215+
return MCPStdioServer(
216+
command="npx",
217+
args=["-y", "@insforge/mcp@dev"],
218+
env={
219+
"INSFORGE_API_KEY": api_key,
220+
"INSFORGE_BACKEND_URL": backend_url,
221+
},
222+
)
223+
210224
raise ValueError(f"Unsupported stdio service: {self.mcp_service}")
211225

212226
def _create_http_server(self) -> MCPHttpServer:

src/agents/mcpmark_agent.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -844,18 +844,32 @@ def _create_stdio_server(self) -> MCPStdioServer:
844844
username = self.service_config.get("username")
845845
password = self.service_config.get("password")
846846
database = self.service_config.get("current_database") or self.service_config.get("database")
847-
847+
848848
if not all([username, password, database]):
849849
raise ValueError("PostgreSQL requires username, password, and database")
850-
850+
851851
database_url = f"postgresql://{username}:{password}@{host}:{port}/{database}"
852-
852+
853853
return MCPStdioServer(
854854
command="pipx",
855855
args=["run", "postgres-mcp", "--access-mode=unrestricted"],
856856
env={"DATABASE_URI": database_url}
857857
)
858-
858+
859+
elif self.mcp_service == "insforge":
860+
api_key = self.service_config.get("api_key")
861+
backend_url = self.service_config.get("backend_url")
862+
if not all([api_key, backend_url]):
863+
raise ValueError("Insforge requires api_key and backend_url")
864+
return MCPStdioServer(
865+
command="npx",
866+
args=["-y", "@insforge/mcp@dev"],
867+
env={
868+
"INSFORGE_API_KEY": api_key,
869+
"INSFORGE_BACKEND_URL": backend_url,
870+
},
871+
)
872+
859873
else:
860874
raise ValueError(f"Unsupported stdio service: {self.mcp_service}")
861875

@@ -866,14 +880,34 @@ def _create_http_server(self) -> MCPHttpServer:
866880
github_token = self.service_config.get("github_token")
867881
if not github_token:
868882
raise ValueError("GitHub token required")
869-
883+
870884
return MCPHttpServer(
871885
url="https://api.githubcopilot.com/mcp/",
872886
headers={
873887
"Authorization": f"Bearer {github_token}",
874888
"User-Agent": "MCPMark/1.0"
875889
}
876890
)
891+
892+
elif self.mcp_service == "supabase":
893+
# Use built-in MCP server from Supabase CLI
894+
api_url = self.service_config.get("api_url", "http://localhost:54321")
895+
api_key = self.service_config.get("api_key", "")
896+
897+
if not api_key:
898+
raise ValueError("Supabase requires api_key (use secret key from 'supabase status')")
899+
900+
# Supabase CLI exposes MCP at /mcp endpoint
901+
mcp_url = f"{api_url}/mcp"
902+
903+
return MCPHttpServer(
904+
url=mcp_url,
905+
headers={
906+
"apikey": api_key,
907+
"Authorization": f"Bearer {api_key}",
908+
}
909+
)
910+
877911
else:
878912
raise ValueError(f"Unsupported HTTP service: {self.mcp_service}")
879913

src/aggregators/aggregate_results.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,19 @@
2323
def discover_tasks() -> Dict[str, List[str]]:
2424
"""Discover all tasks from ./tasks directory."""
2525
tasks_dir = Path("./tasks")
26-
26+
2727
all_tasks = {}
28-
28+
2929
# Handle each MCP service
3030
# Note: playwright and playwright_webarena both map to "playwright" MCP
3131
service_mappings = {
3232
"filesystem": ["filesystem"],
3333
"github": ["github"],
3434
"notion": ["notion"],
3535
"playwright": ["playwright", "playwright_webarena"], # Both count as playwright
36-
"postgres": ["postgres"]
36+
"postgres": ["postgres", "supabase", "insforge"] # All map to postgres
3737
}
38-
38+
3939
for mcp_service, task_dirs in service_mappings.items():
4040
tasks = []
4141
for task_dir_name in task_dirs:
@@ -68,9 +68,11 @@ def collect_results(exp_dir: Path, k: int) -> Dict[str, Dict[str, Any]]:
6868
continue
6969

7070
model, service = model_service_dir.name.split("__", 1)
71-
# Normalize service name: treat playwright_webarena as playwright
71+
# Normalize service names
7272
if service == "playwright_webarena":
7373
service = "playwright"
74+
elif service in ["supabase", "insforge"]:
75+
service = "postgres"
7476

7577
for run_idx in range(1, k + 1):
7678
run_dir = model_service_dir / f"run-{run_idx}"
@@ -874,23 +876,23 @@ def main():
874876
help="Comma-separated list of models that only need run-1"
875877
)
876878
parser.add_argument("--push", action="store_true", help="Push to GitHub (default to main)")
877-
879+
878880
args = parser.parse_args()
879-
881+
880882
# Parse single-run models
881883
single_run_models = []
882884
if args.single_run_models:
883885
single_run_models = [m.strip() for m in args.single_run_models.split(",")]
884886
print(f"📌 Single-run models: {', '.join(single_run_models)}")
885-
887+
886888
# Setup paths
887889
exp_dir = Path("./results") / args.exp_name
888890
if not exp_dir.exists():
889891
print(f"❌ Experiment directory {exp_dir} does not exist")
890892
return 1
891-
893+
892894
print(f"🔄 Processing experiment: {args.exp_name}")
893-
895+
894896
# Discover all tasks
895897
print("📋 Discovering tasks...")
896898
all_tasks = discover_tasks()

0 commit comments

Comments
 (0)