Your weekly dose of actionable cloud wisdom to start the week right
The Problem
Your API is either completely unprotected (vulnerable to abuse and DDoS attacks) or so aggressively rate-limited that legitimate users get frustrated with constant 429 errors. You’re caught between security teams demanding protection and product teams demanding performance. Meanwhile, your API costs are unpredictable because you can’t control traffic spikes.
The Solution
Implement intelligent API Gateway rate limiting that protects your infrastructure whilst maintaining excellent user experience. The key is using multiple rate limiting strategies based on user behaviour, API endpoint sensitivity, and business requirements rather than applying blanket limits.
Smart Rate Limiting Strategies:
1. Tiered Rate Limiting by API Key
# API Gateway usage plan configuration
UsagePlans:
BasicTier:
throttle:
burstLimit: 100 # Peak concurrent requests
rateLimit: 50 # Requests per second sustained
quota:
limit: 10000 # Total requests per day
period: DAY
PremiumTier:
throttle:
burstLimit: 500
rateLimit: 200
quota:
limit: 100000
period: DAY
InternalServices:
throttle:
burstLimit: 1000
rateLimit: 500
quota:
limit: 1000000
period: DAY
# Create usage plans via CLI
aws apigateway create-usage-plan \
--name "BasicTier" \
--description "Basic API access with standard limits" \
--throttle burstLimit=100,rateLimit=50 \
--quota limit=10000,period=DAY
# Create API key
aws apigateway create-api-key \
--name "customer-basic-key" \
--description "Basic tier customer access" \
--enabled
# Link API key to usage plan
aws apigateway create-usage-plan-key \
--usage-plan-id "your-usage-plan-id" \
--key-id "your-api-key-id" \
--key-type "API_KEY"
2. Method-Specific Rate Limiting
# Terraform configuration for granular endpoint limits
resource "aws_api_gateway_method_settings" "read_endpoints" {
rest_api_id = aws_api_gateway_rest_api.api.id
stage_name = aws_api_gateway_stage.prod.stage_name
method_path = "*/GET"
settings {
throttling_rate_limit = 100 # Higher limits for read operations
throttling_burst_limit = 200
logging_level = "INFO"
}
}
resource "aws_api_gateway_method_settings" "write_endpoints" {
rest_api_id = aws_api_gateway_rest_api.api.id
stage_name = aws_api_gateway_stage.prod.stage_name
method_path = "*/POST"
settings {
throttling_rate_limit = 20 # Lower limits for write operations
throttling_burst_limit = 50
logging_level = "INFO"
}
}
resource "aws_api_gateway_method_settings" "admin_endpoints" {
rest_api_id = aws_api_gateway_rest_api.api.id
stage_name = aws_api_gateway_stage.prod.stage_name
method_path = "/admin/*/ANY"
settings {
throttling_rate_limit = 5 # Very restrictive for admin operations
throttling_burst_limit = 10
logging_level = "INFO"
}
}
3. WAF Integration for Advanced Protection
{
"Name": "APIRateLimitRule",
"Priority": 1,
"Statement": {
"RateBasedStatement": {
"Limit": 2000,
"AggregateKeyType": "IP",
"ScopeDownStatement": {
"ByteMatchStatement": {
"SearchString": "/api/",
"FieldToMatch": {
"UriPath": {}
},
"TextTransformations": [
{
"Priority": 0,
"Type": "LOWERCASE"
}
],
"PositionalConstraint": "STARTS_WITH"
}
}
}
},
"Action": {
"Block": {}
},
"VisibilityConfig": {
"SampledRequestsEnabled": true,
"CloudWatchMetricsEnabled": true,
"MetricName": "APIRateLimitRule"
}
}
# Create WAF rule for API protection
aws wafv2 create-rule-group \
--name "api-rate-limit-rules" \
--scope "REGIONAL" \
--capacity 100 \
--rules file://rate-limit-rules.json
4. Dynamic Rate Limiting with Lambda
import json
import boto3
import time
from decimal import Decimal
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('api-rate-limits')
def lambda_handler(event, context):
"""
Advanced rate limiting with user behaviour analysis
"""
api_key = event.get('headers', {}).get('x-api-key')
client_ip = event.get('requestContext', {}).get('identity', {}).get('sourceIp')
endpoint = event.get('path')
method = event.get('httpMethod')
# Get user's request history
user_key = f"{api_key}#{client_ip}"
current_time = int(time.time())
window_start = current_time - 60 # 1-minute window
try:
# Get recent requests
response = table.query(
KeyConditionExpression='user_key = :uk AND request_time BETWEEN :start AND :end',
ExpressionAttributeValues={
':uk': user_key,
':start': window_start,
':end': current_time
}
)
recent_requests = len(response['Items'])
# Dynamic rate limits based on endpoint sensitivity
rate_limits = {
'GET': 100, # High limit for reads
'POST': 20, # Medium limit for writes
'PUT': 20, # Medium limit for updates
'DELETE': 5, # Low limit for deletes
'PATCH': 10 # Low limit for patches
}
# Adjust for endpoint patterns
if '/admin/' in endpoint:
limit = 5
elif '/public/' in endpoint:
limit = rate_limits.get(method, 50) * 2 # Double limit for public endpoints
else:
limit = rate_limits.get(method, 50)
# Check if user exceeded limit
if recent_requests >= limit:
return {
'statusCode': 429,
'headers': {
'Retry-After': '60',
'X-RateLimit-Limit': str(limit),
'X-RateLimit-Remaining': '0',
'X-RateLimit-Reset': str(current_time + 60)
},
'body': json.dumps({
'error': 'Rate limit exceeded',
'limit': limit,
'window': '60 seconds'
})
}
# Log this request
table.put_item(
Item={
'user_key': user_key,
'request_time': current_time,
'endpoint': endpoint,
'method': method,
'ttl': current_time + 3600 # Auto-delete after 1 hour
}
)
# Allow request to proceed
return {
'statusCode': 200,
'headers': {
'X-RateLimit-Limit': str(limit),
'X-RateLimit-Remaining': str(max(0, limit - recent_requests - 1)),
'X-RateLimit-Reset': str(current_time + 60)
}
}
except Exception as e:
print(f"Rate limiting error: {str(e)}")
# Fail open - allow request if rate limiting service fails
return {'statusCode': 200}
5. Client-Side Rate Limit Handling
// Client-side rate limit handling with exponential backoff
class APIClient {
constructor(baseURL, apiKey) {
this.baseURL = baseURL;
this.apiKey = apiKey;
this.requestQueue = [];
this.processing = false;
}
async makeRequest(endpoint, options = {}) {
return new Promise((resolve, reject) => {
this.requestQueue.push({ endpoint, options, resolve, reject });
this.processQueue();
});
}
async processQueue() {
if (this.processing || this.requestQueue.length === 0) return;
this.processing = true;
while (this.requestQueue.length > 0) {
const { endpoint, options, resolve, reject } = this.requestQueue.shift();
try {
const response = await this.executeRequest(endpoint, options);
if (response.status === 429) {
// Rate limited - handle gracefully
const retryAfter = parseInt(response.headers.get('Retry-After') || '1');
const remaining = response.headers.get('X-RateLimit-Remaining');
console.log(`Rate limited. Retrying after ${retryAfter} seconds. Remaining: ${remaining}`);
// Re-queue the request
this.requestQueue.unshift({ endpoint, options, resolve, reject });
// Wait before processing next request
await this.sleep(retryAfter * 1000);
} else {
const data = await response.json();
resolve(data);
}
} catch (error) {
reject(error);
}
}
this.processing = false;
}
async executeRequest(endpoint, options) {
const headers = {
'Content-Type': 'application/json',
'X-API-Key': this.apiKey,
...options.headers
};
return fetch(`${this.baseURL}${endpoint}`, {
...options,
headers
});
}
sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
// Usage example with graceful degradation
const apiClient = new APIClient('https://api.yourservice.com', 'your-api-key');
async function fetchUserData(userId) {
try {
return await apiClient.makeRequest(`/users/${userId}`);
} catch (error) {
console.error('Failed to fetch user data:', error);
// Return cached data or default values
return getCachedUserData(userId);
}
}
Rate Limit Monitoring and Alerting
# CloudWatch dashboard for API rate limiting
aws cloudwatch put-dashboard \
--dashboard-name "API-Rate-Limiting" \
--dashboard-body '{
"widgets": [
{
"type": "metric",
"properties": {
"metrics": [
["AWS/ApiGateway", "4XXError", "ApiName", "MyAPI"],
["AWS/ApiGateway", "Count", "ApiName", "MyAPI"],
["AWS/ApiGateway", "Latency", "ApiName", "MyAPI"]
],
"period": 300,
"stat": "Sum",
"region": "eu-west-1",
"title": "API Gateway Metrics"
}
}
]
}'
# Create alarm for high 429 error rate
aws cloudwatch put-metric-alarm \
--alarm-name "high-rate-limit-errors" \
--alarm-description "Alert when too many requests are being rate limited" \
--metric-name "4XXError" \
--namespace "AWS/ApiGateway" \
--statistic "Sum" \
--period 300 \
--threshold 100 \
--comparison-operator "GreaterThanThreshold" \
--evaluation-periods 2
Why It Matters
- Cost Control: Prevent runaway API costs from traffic spikes or abuse
- Performance: Protect backend services from being overwhelmed
- Security: Block malicious traffic and prevent DDoS attacks
- User Experience: Provide predictable, fair access to your API
Try This Week
- Audit current API protection – Check if your APIs have any rate limiting
- Implement basic usage plans – Create at least two tiers (basic/premium)
- Add rate limit headers – Help clients understand their limits
- Set up monitoring – Track 429 errors and usage patterns
Quick Rate Limiting Assessment Script
#!/bin/bash
# API Gateway rate limiting audit script
API_ID="your-api-gateway-id"
STAGE="prod"
echo "=== API Gateway Rate Limiting Audit ==="
echo
echo "Usage Plans:"
aws apigateway get-usage-plans --query 'items[*].[name,throttle.rateLimit,throttle.burstLimit,quota.limit]' --output table
echo
echo "Method Settings (Rate Limits):"
aws apigateway get-stage --rest-api-id $API_ID --stage-name $STAGE \
--query 'methodSettings' --output json | jq -r 'to_entries[] | "\(.key): Rate=\(.value.throttlingRateLimit // "N/A"), Burst=\(.value.throttlingBurstLimit // "N/A")"'
echo
echo "Recent 429 Errors (last 24 hours):"
aws logs filter-log-events \
--log-group-name "API-Gateway-Execution-Logs_${API_ID}/${STAGE}" \
--start-time $(date -d '24 hours ago' +%s)000 \
--filter-pattern '[timestamp, requestId, ip, user, timestamp, method, resource, protocol, status=429, ...]' \
--query 'events[*].message' --output text | wc -l
Common Rate Limiting Mistakes
- One-size-fits-all limits: Same limits for all endpoints regardless of cost/sensitivity
- No client communication: Not providing rate limit headers or clear error messages
- Ignoring burst traffic: Not allowing for legitimate traffic spikes
- No monitoring: Flying blind on actual usage patterns vs limits
- Forgetting internal services: Rate limiting your own backend services
Advanced Rate Limiting Patterns
- Sliding window counters: More accurate than fixed time windows
- Token bucket algorithms: Allow burst traffic while maintaining average rates
- Distributed rate limiting: Coordinate limits across multiple API Gateway instances
- User behaviour analysis: Dynamic limits based on historical patterns
Pro Tip: Start with generous rate limits and tighten them based on actual usage data. It’s better to have some protection than none, and you can always adjust limits down once you understand your traffic patterns.
Dealing with sophisticated API abuse patterns? I’d love to hear about creative rate limiting solutions you’ve implemented – advanced protection strategies make great tips!








