Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Simplified Checkout Service with Dummy SQL
apiVersion: v1
kind: Secret
metadata:
name: checkout-app
type: Opaque
stringData:
app.py: |
import os
import time
import random
from flask import Flask, request, jsonify
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.sdk.resources import Resource
from opentelemetry.instrumentation.flask import FlaskInstrumentor

# Configure OpenTelemetry
resource = Resource.create({"service.name": "checkout-service"})
provider = TracerProvider(resource=resource)
trace.set_tracer_provider(provider)

otlp_exporter = OTLPSpanExporter(
endpoint="tempo.app-115.svc.cluster.local:4317",
insecure=True
)
provider.add_span_processor(BatchSpanProcessor(otlp_exporter))

app = Flask(__name__)
FlaskInstrumentor().instrument_app(app)

tracer = trace.get_tracer(__name__)

@app.route('/health')
def health():
return 'OK'

@app.route('/checkout', methods=['POST'])
def checkout():
with tracer.start_as_current_span("process_checkout") as span:
data = request.json or {}

# Log the incoming request (without revealing the data)
print(f"[CHECKOUT] Processing checkout request for user {data.get('user_id', 'guest')}", flush=True)

# Extract parameters
user_id = data.get('user_id', 'guest')
zone_id = data.get('zone_id', 'us-west-1')
promo_code = data.get('promo_code')
items = data.get('items', [])

# Add span attributes
span.set_attribute("user.id", user_id)
span.set_attribute("zone.id", zone_id)
span.set_attribute("items.count", len(items))
if promo_code:
span.set_attribute("promo.code", promo_code)

# Simulate database query for shipping calculation
with tracer.start_as_current_span("database_query") as db_span:
db_span.set_attribute("db.system", "postgresql")
db_span.set_attribute("db.operation", "SELECT")

if promo_code:
# Simulate database error with promo_code
query = "SELECT rate_per_kg, discount_percent FROM shipping_rates WHERE zone_id = ? AND promo_code = ? AND active = true"
db_span.set_attribute("db.statement", query)

# Simulate a small delay before failure
sleep_time = random.uniform(0.05, 0.1)
time.sleep(sleep_time)

# Record the exception in the span
error_msg = f"ERROR: duplicate key value violates unique constraint 'promo_codes_pkey'"
db_span.record_exception(Exception(error_msg))
db_span.set_status(trace.Status(trace.StatusCode.ERROR, error_msg))

# Also record on parent span
span.record_exception(Exception(error_msg))
span.set_status(trace.Status(trace.StatusCode.ERROR, "Database error"))

print(f"[CHECKOUT] Request failed", flush=True)

return jsonify({
"error": "Internal server error",
"message": "Request failed"
}), 500
else:
# Simulate successful query without promo_code
query = "SELECT rate_per_kg, discount_percent FROM shipping_rates WHERE zone_id = ? AND active = true"
db_span.set_attribute("db.statement", query)
sleep_time = random.uniform(0.05, 0.1)
time.sleep(sleep_time)
shipping_rate = 5.0
discount = 0.0

# Calculate shipping cost (only reached for non-promo requests)
total_weight = sum(item.get('weight', 1.0) for item in items)
shipping_cost = total_weight * shipping_rate * (1 - discount/100)

# Calculate total
subtotal = sum(item.get('price', 0) for item in items)
total = subtotal + shipping_cost

response = {
"order_id": f"ord-{random.randint(1000, 9999)}",
"subtotal": subtotal,
"shipping": round(shipping_cost, 2),
"total": round(total, 2)
}

print(f"[CHECKOUT] Completed checkout request", flush=True)
return jsonify(response)

if __name__ == '__main__':
print("[CHECKOUT] Starting checkout service on port 8080", flush=True)
app.run(host='0.0.0.0', port=8080)
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: checkout
spec:
replicas: 1
selector:
matchLabels:
app: checkout
template:
metadata:
labels:
app: checkout
spec:
containers:
- name: checkout
image: python:3.11-slim
command: ["/bin/bash", "-c"]
args:
- |
pip install flask opentelemetry-api opentelemetry-sdk \
opentelemetry-instrumentation-flask \
opentelemetry-exporter-otlp-proto-grpc && \
python /app/app.py
volumeMounts:
- name: app
mountPath: /app
ports:
- containerPort: 8080
env:
- name: PYTHONUNBUFFERED
value: "1"
startupProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
successThreshold: 1
failureThreshold: 24
resources:
requests:
memory: "128Mi"
cpu: "50m"
limits:
memory: "256Mi"
cpu: "200m"
volumes:
- name: app
secret:
secretName: checkout-app
---
apiVersion: v1
kind: Service
metadata:
name: checkout
spec:
selector:
app: checkout
ports:
- port: 8080
targetPort: 8080
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
user_prompt:
- "The checkout service in namespace app-115 is experiencing errors. Investigate why."

expected_output:
- The answer must explicitly state that requests with promo codes are failing or all errors/failures occur when promo code is included.
# - The answer must mention or imply that there are other requests without promo codes that succeed.
- Including a query that mentions promo code is not sufficient.

tags:
- kubernetes
- hard
- chain-of-causation
- traces

port_forwards:
- namespace: app-115
service: tempo
local_port: 3200
remote_port: 3200

before_test: |
echo "🚀 Setting up test 115 - Creating namespace app-115"
kubectl create namespace app-115 || true
echo "✅ Namespace app-115 created successfully!"

echo "📦 Deploying Tempo from shared config"
kubectl apply -f ../../shared/tempo.yaml -n app-115

echo "⏳ Waiting for Tempo pod to be ready"
kubectl wait --for=condition=ready pod -l app=tempo -n app-115 --timeout=60s

echo "⏰ Waiting for Tempo to be fully ready (checking every 5s, timeout 60s)"
TEMPO_READY=false
for i in {1..12}; do
if kubectl exec -n app-115 deployment/tempo -- wget -q -O - http://localhost:3200/ready 2>/dev/null; then
echo "✅ Tempo is ready!"
TEMPO_READY=true
break
else
echo "⏳ Attempt $i/12: Tempo not ready yet, waiting 5s..."
sleep 5
fi
done

if [ "$TEMPO_READY" = false ]; then
echo "❌ Tempo failed to become ready after 60 seconds"
exit 1
fi

echo "✅ Tempo deployment complete!"

echo "🛒 Deploying checkout service"
kubectl apply -f checkout-service.yaml -n app-115

echo "⏳ Waiting for checkout pod to be ready"
kubectl wait --for=condition=ready pod -l app=checkout -n app-115 --timeout=60s

echo "🔍 Checking checkout deployment status"
kubectl get pods -n app-115 -l app=checkout

echo "🚦 Deploying traffic generator"
kubectl apply -f traffic-generator.yaml -n app-115

echo "⏳ Waiting for traffic generator pod to be ready"
kubectl wait --for=condition=ready pod -l app=traffic-generator -n app-115 --timeout=60s

echo "🔍 Checking all pods status"
kubectl get pods -n app-115

echo "⏰ Letting traffic generator run for 45 seconds to generate requests"
sleep 20

echo "🔍 Verifying traffic generator log entries"
if kubectl logs -n app-115 -l app=traffic-generator --tail=-1 | grep -q "WITH promo_code"; then
echo "✅ Found traffic generator log WITH promo_code"
else
echo "❌ Missing traffic generator log WITH promo_code"
exit 1
fi

if kubectl logs -n app-115 -l app=traffic-generator --tail=-1 | grep -q "WITHOUT promo_code"; then
echo "✅ Found traffic generator log WITHOUT promo_code"
else
echo "❌ Missing traffic generator log WITHOUT promo_code"
exit 1
fi

if kubectl logs -n app-115 -l app=checkout --tail=100 | grep -q "Processing checkout request"; then
echo "✅ Found checkout request log"
else
echo "❌ Missing checkout request log"
exit 1
fi

# Commented out traffic generator trace checks as it no longer sends traces
# echo "🔍 Querying Tempo for traces from traffic generator"
# TRAFFIC_GEN_TRACES=$(curl -s "http://localhost:3200/api/search?tags=service.name%3Dtraffic-generator&limit=10" 2>/dev/null | grep -o '"traceID"' | wc -l || echo "0")
# echo "Found $TRAFFIC_GEN_TRACES traces from traffic-generator"

echo "🔍 Querying Tempo for traces from checkout service"
CHECKOUT_TRACES=$(kubectl run -n app-115 tempo-query --rm -i --restart=Never --image=curlimages/curl -- -s "http://tempo:3200/api/search?tags=service.name%3Dcheckout-service&limit=10" 2>/dev/null | grep -o '"traceID"' | wc -l || echo "0")
echo "Found $CHECKOUT_TRACES traces from checkout-service"

# Commented out traffic generator trace check
# if [ "$TRAFFIC_GEN_TRACES" -gt "0" ]; then
# echo "✅ Found traces from traffic-generator"
# else
# echo "❌ No traces found from traffic-generator"
# exit 1
# fi

if [ "$CHECKOUT_TRACES" -gt "0" ]; then
echo "✅ Found traces from checkout-service"
else
echo "❌ No traces found from checkout-service"
exit 1
fi

# Delete Traffic generator so the ai won't cheat
kubectl delete -f traffic-generator.yaml -n app-115

echo "✅ Test setup complete!"

after_test: |
kubectl delete namespace app-115 || true
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
toolsets:
kubernetes/core:
enabled: true
kubernetes/logs:
enabled: true
grafana/tempo:
enabled: true
config:
url: http://localhost:3200
healthcheck: "ready"
Loading
Loading