Security considerations, testing strategies, and best practices for tool development
Building secure and maintainable tools is crucial for production NOMOS agents. This guide covers security considerations, testing strategies, and development best practices.
Only provide tools when needed for specific steps
# Good: Minimal tool access per step
steps:
- step_id: greeting
available_tools: [] # No tools needed for greeting
- step_id: lookup_order
available_tools: [search_orders] # Only order search
- step_id: process_refund
available_tools: [search_orders, process_refund] # Add refund capability
- step_id: admin_functions
available_tools: [search_orders, process_refund, delete_user] # Admin only
# Avoid: Giving all tools to all steps
steps:
- step_id: greeting
available_tools: [search_orders, process_refund, delete_user] # Too much access
Validate all tool inputs to prevent security issues
import re
import html
from typing import List
def execute_database_query(table: str, conditions: str) -> str:
"""
Execute a safe database query.
Args:
table: Table name (validated against allowed tables)
conditions: WHERE conditions (sanitized)
Returns:
str: Query results
"""
# Validate table name against whitelist
allowed_tables = ["orders", "customers", "products", "inventory"]
if table not in allowed_tables:
return f"Error: Table '{table}' not allowed. Allowed tables: {', '.join(allowed_tables)}"
# Sanitize conditions to prevent SQL injection
# Remove dangerous SQL keywords
dangerous_patterns = [
r'\b(DROP|DELETE|INSERT|UPDATE|CREATE|ALTER|EXEC|EXECUTE)\b',
r'[;\'"\\]', # Dangerous characters
r'--', # SQL comments
r'/\*.*\*/', # SQL block comments
]
for pattern in dangerous_patterns:
if re.search(pattern, conditions, re.IGNORECASE):
return f"Error: Invalid characters or keywords in conditions"
# Execute safe query (simplified example)
sanitized_conditions = html.escape(conditions)
return f"Query executed: SELECT * FROM {table} WHERE {sanitized_conditions}"
def safe_file_operation(file_path: str, operation: str) -> str:
"""
Perform safe file operations.
Args:
file_path: Path to file (validated)
operation: Operation to perform
Returns:
str: Operation result
"""
import os
# Validate file path to prevent directory traversal
if '..' in file_path or file_path.startswith('/'):
return "Error: Invalid file path. Relative paths with '..' not allowed"
# Restrict to allowed directories
allowed_base_dirs = ['/tmp/uploads', '/app/data', '/var/app/files']
full_path = os.path.abspath(file_path)
if not any(full_path.startswith(allowed_dir) for allowed_dir in allowed_base_dirs):
return f"Error: File access restricted to allowed directories"
# Validate operation
if operation not in ['read', 'list', 'stat']:
return f"Error: Operation '{operation}' not allowed"
try:
if operation == 'read' and os.path.isfile(full_path):
with open(full_path, 'r') as f:
content = f.read(1000) # Limit read size
return f"File content (first 1000 chars): {content}"
elif operation == 'list' and os.path.isdir(full_path):
files = os.listdir(full_path)[:10] # Limit listing
return f"Directory contents: {', '.join(files)}"
else:
return f"Error: File not found or invalid operation"
except Exception as e:
return f"Error: {str(e)}"
Implement rate limiting for expensive operations
import time
from collections import defaultdict
from threading import Lock
class RateLimiter:
"""Simple rate limiter for tool calls."""
def __init__(self, max_calls: int = 10, window_seconds: int = 60):
self.max_calls = max_calls
self.window_seconds = window_seconds
self.calls = defaultdict(list)
self.lock = Lock()
def is_allowed(self, key: str) -> bool:
"""Check if call is allowed for the given key."""
with self.lock:
now = time.time()
# Remove old calls outside the window
self.calls[key] = [
call_time for call_time in self.calls[key]
if now - call_time < self.window_seconds
]
# Check if under limit
if len(self.calls[key]) < self.max_calls:
self.calls[key].append(now)
return True
return False
# Global rate limiter
api_rate_limiter = RateLimiter(max_calls=5, window_seconds=60)
def rate_limited_api_call(endpoint: str, user_id: str = "default") -> str:
"""
Make rate-limited API call.
Args:
endpoint: API endpoint to call
user_id: User identifier for rate limiting
Returns:
str: API response or rate limit message
"""
if not api_rate_limiter.is_allowed(user_id):
return f"Error: Rate limit exceeded for user {user_id}. Please try again later."
# Simulate API call
time.sleep(0.1) # Prevent API abuse
return f"API response from {endpoint}"
def memory_limited_processing(data: str, max_size_mb: int = 10) -> str:
"""
Process data with memory limits.
Args:
data: Data to process
max_size_mb: Maximum data size in MB
Returns:
str: Processing result or error
"""
import sys
data_size_mb = sys.getsizeof(data) / (1024 * 1024)
if data_size_mb > max_size_mb:
return f"Error: Data size ({data_size_mb:.2f}MB) exceeds limit ({max_size_mb}MB)"
# Process data safely
return f"Processed {len(data)} characters ({data_size_mb:.2f}MB)"
Protect sensitive information in logs and responses
import re
from typing import Dict, Any
def redact_sensitive_info(data: str) -> str:
"""
Redact sensitive information from data.
Args:
data: Text data that may contain sensitive information
Returns:
str: Data with sensitive information redacted
"""
# Email addresses
data = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', data)
# Phone numbers (US format)
data = re.sub(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', '[PHONE]', data)
# Social Security Numbers
data = re.sub(r'\b\d{3}-\d{2}-\d{4}\b', '[SSN]', data)
# Credit card numbers (basic pattern)
data = re.sub(r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b', '[CARD]', data)
# API keys and tokens (common patterns)
data = re.sub(r'\b[Aa]pi_key[:\s=]+[A-Za-z0-9_-]+', 'api_key=[REDACTED]', data)
data = re.sub(r'\b[Tt]oken[:\s=]+[A-Za-z0-9_.-]+', 'token=[REDACTED]', data)
return data
def secure_user_lookup(user_identifier: str) -> str:
"""
Look up user information with data protection.
Args:
user_identifier: User ID, email, or phone
Returns:
str: Redacted user information
"""
# Simulate user lookup
user_data = {
"name": "John Doe",
"email": "john.doe@example.com",
"phone": "555-123-4567",
"address": "123 Main St, City, State 12345"
}
# Create response with selective redaction
safe_response = f"""
User Information:
Name: {user_data['name']}
Email: {redact_sensitive_info(user_data['email'])}
Phone: {redact_sensitive_info(user_data['phone'])}
Address: {user_data['address']}
"""
return safe_response
import pytest
from unittest.mock import patch, MagicMock
def test_basic_tool():
"""Test basic tool functionality."""
from your_tools import calculate_total
result = calculate_total(100.0, 0.08) # $100 with 8% tax
assert "108.00" in result
assert "Total" in result
def test_tool_validation():
"""Test tool input validation."""
from your_tools import set_temperature
# Valid input
result = set_temperature(72.0, "F")
assert "72°F" in result
# Invalid temperature
result = set_temperature(200.0, "F")
assert "Error" in result
def test_async_tool():
"""Test async tool functionality."""
import asyncio
from your_tools import fetch_weather_data
async def run_test():
result = await fetch_weather_data("New York")
assert len(result) > 0
assert "Error" not in result
asyncio.run(run_test())
@patch('requests.get')
def test_external_api_tool(mock_get):
"""Test tool that calls external API."""
from your_tools import get_stock_price
# Mock API response
mock_response = MagicMock()
mock_response.json.return_value = {"price": 150.0, "change": 2.5}
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response
result = get_stock_price("AAPL")
assert "150.0" in result
assert "2.5" in result
def test_error_handling():
"""Test tool error handling."""
from your_tools import divide_numbers
# Valid division
result = divide_numbers(10, 2)
assert "5.0" in result
# Division by zero
result = divide_numbers(10, 0)
assert "Error" in result
assert "zero" in result.lower()
import pytest
from nomos import Agent, AgentConfig
from your_tools import tools
def test_tool_integration():
"""Test tools integrated with NOMOS agent."""
config = AgentConfig(
name="test_agent",
steps=[
Step(
step_id="test",
description="Test step",
available_tools=["calculate_total"],
routes=[]
)
],
start_step_id="test"
)
agent = Agent.from_config(config, tools=tools)
session = agent.create_session()
# Test tool usage
result = session.next("Calculate total for $100 with 8% tax")
assert result.decision.action == "TOOL_CALL"
assert "calculate_total" in str(result.decision.tool_call)
def test_tool_security():
"""Test tool security measures."""
from your_tools import secure_file_read
# Test allowed file
result = secure_file_read("data/allowed.txt")
assert "Error" not in result
# Test directory traversal attempt
result = secure_file_read("../../../etc/passwd")
assert "Error" in result
assert "not allowed" in result.lower()
Write comprehensive docstrings and include examples
def complex_calculation(
base_amount: float,
tax_rate: float,
discount_percent: float = 0.0,
shipping_cost: float = 0.0,
currency: str = "USD"
) -> str:
"""
Calculate final price including tax, discount, and shipping.
This tool calculates the total cost for a purchase including all fees
and discounts. Use this when customers ask about final pricing.
Args:
base_amount: Base price before tax and fees (required)
Must be positive value greater than 0
tax_rate: Tax rate as decimal (e.g., 0.08 for 8% tax) (required)
Must be between 0.0 and 1.0
discount_percent: Discount percentage (e.g., 10 for 10% off) (optional)
Must be between 0 and 100
shipping_cost: Additional shipping cost (optional)
Must be non-negative
currency: Currency code (optional, default: USD)
Must be valid 3-letter currency code
Returns:
str: Detailed breakdown of final price calculation with currency
Raises:
ValueError: If any parameter is outside valid range
Examples:
>>> complex_calculation(100.0, 0.08, 10.0, 5.95)
"Base: $100.00, Discount: -$10.00, Tax: $7.20, Shipping: $5.95, Total: $103.15"
>>> complex_calculation(50.0, 0.05, currency="EUR")
"Base: €50.00, Tax: €2.50, Total: €52.50"
Note:
Tax is calculated after discount is applied.
Shipping is added to the final total.
"""
# Validation
if base_amount <= 0:
raise ValueError("Base amount must be positive")
if not 0 <= tax_rate <= 1:
raise ValueError("Tax rate must be between 0 and 1")
if not 0 <= discount_percent <= 100:
raise ValueError("Discount percent must be between 0 and 100")
if shipping_cost < 0:
raise ValueError("Shipping cost cannot be negative")
# Currency symbols
currency_symbols = {"USD": "$", "EUR": "€", "GBP": "£"}
symbol = currency_symbols.get(currency, currency)
# Calculations
discount_amount = base_amount * (discount_percent / 100)
discounted_amount = base_amount - discount_amount
tax_amount = discounted_amount * tax_rate
total = discounted_amount + tax_amount + shipping_cost
# Format response
parts = [f"Base: {symbol}{base_amount:.2f}"]
if discount_amount > 0:
parts.append(f"Discount: -{symbol}{discount_amount:.2f}")
if tax_amount > 0:
parts.append(f"Tax: {symbol}{tax_amount:.2f}")
if shipping_cost > 0:
parts.append(f"Shipping: {symbol}{shipping_cost:.2f}")
parts.append(f"Total: {symbol}{total:.2f}")
return ", ".join(parts)
Provide fallback options when tools fail
def resilient_data_lookup(query: str) -> str:
"""
Look up data with multiple fallback options.
Args:
query: Search query
Returns:
str: Data or fallback response
"""
# Try primary data source
try:
result = primary_database_lookup(query)
if result:
return f"Found in primary DB: {result}"
except Exception as e:
print(f"Primary DB failed: {e}")
# Try secondary data source
try:
result = secondary_api_lookup(query)
if result:
return f"Found in secondary API: {result}"
except Exception as e:
print(f"Secondary API failed: {e}")
# Try cache
try:
result = cached_lookup(query)
if result:
return f"Found in cache (may be outdated): {result}"
except Exception as e:
print(f"Cache lookup failed: {e}")
# Final fallback
return f"Unable to find data for '{query}'. Please try a different search term or contact support."
def smart_api_call(endpoint: str, max_retries: int = 3) -> str:
"""
Make API call with intelligent retry and fallback.
Args:
endpoint: API endpoint
max_retries: Maximum retry attempts
Returns:
str: API response or fallback message
"""
import requests
import time
for attempt in range(max_retries):
try:
response = requests.get(endpoint, timeout=10)
if response.status_code == 200:
return response.text
elif response.status_code == 429: # Rate limited
wait_time = 2 ** attempt # Exponential backoff
time.sleep(wait_time)
continue
elif response.status_code >= 500: # Server error
if attempt < max_retries - 1:
time.sleep(1)
continue
else:
break
else:
return f"API error: {response.status_code} - {response.text}"
except requests.Timeout:
if attempt < max_retries - 1:
time.sleep(1)
continue
except requests.ConnectionError:
return "Network error: Unable to connect to the service"
except Exception as e:
return f"Unexpected error: {str(e)}"
return "Service temporarily unavailable. Please try again later."
Monitor tool performance and resource usage
import time
import psutil
import threading
from dataclasses import dataclass
from typing import Dict, List
@dataclass
class PerformanceMetrics:
"""Performance metrics for tool execution."""
execution_time: float
memory_used: float
cpu_percent: float
success: bool
error_message: str = ""
class PerformanceMonitor:
"""Monitor tool performance metrics."""
def __init__(self):
self.metrics: Dict[str, List[PerformanceMetrics]] = {}
self.lock = threading.Lock()
def record_metrics(self, tool_name: str, metrics: PerformanceMetrics):
"""Record performance metrics for a tool."""
with self.lock:
if tool_name not in self.metrics:
self.metrics[tool_name] = []
self.metrics[tool_name].append(metrics)
# Keep only last 100 metrics per tool
if len(self.metrics[tool_name]) > 100:
self.metrics[tool_name] = self.metrics[tool_name][-100:]
def get_summary(self, tool_name: str) -> str:
"""Get performance summary for a tool."""
with self.lock:
if tool_name not in self.metrics:
return f"No metrics available for {tool_name}"
tool_metrics = self.metrics[tool_name]
# Calculate averages
avg_time = sum(m.execution_time for m in tool_metrics) / len(tool_metrics)
avg_memory = sum(m.memory_used for m in tool_metrics) / len(tool_metrics)
success_rate = sum(1 for m in tool_metrics if m.success) / len(tool_metrics)
return f"""
Tool: {tool_name}
Executions: {len(tool_metrics)}
Avg Time: {avg_time:.3f}s
Avg Memory: {avg_memory:.2f}MB
Success Rate: {success_rate:.1%}
"""
# Global performance monitor
perf_monitor = PerformanceMonitor()
def monitored_tool(func):
"""Decorator to monitor tool performance."""
def wrapper(*args, **kwargs):
start_time = time.time()
start_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB
try:
result = func(*args, **kwargs)
success = True
error_msg = ""
except Exception as e:
result = f"Error: {str(e)}"
success = False
error_msg = str(e)
end_time = time.time()
end_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB
metrics = PerformanceMetrics(
execution_time=end_time - start_time,
memory_used=end_memory - start_memory,
cpu_percent=psutil.cpu_percent(),
success=success,
error_message=error_msg
)
perf_monitor.record_metrics(func.__name__, metrics)
return result
return wrapper
@monitored_tool
def example_monitored_tool(data: str) -> str:
"""Example tool with performance monitoring."""
# Simulate processing
time.sleep(0.1)
return f"Processed {len(data)} characters"
Here are examples of well-documented, secure tools from real NOMOS deployments:
def calculate_investment_return(
principal: float,
annual_rate: float,
years: int,
compound_frequency: int = 12
) -> str:
"""
Calculate compound investment returns.
Calculates the future value of an investment with compound interest.
Used for financial planning and investment advice.
Args:
principal: Initial investment amount (must be positive)
annual_rate: Annual interest rate as decimal (e.g., 0.07 for 7%)
years: Investment period in years (must be positive)
compound_frequency: Compounding frequency per year (default: 12 for monthly)
Returns:
str: Detailed breakdown of investment growth
Security:
- Input validation prevents negative values
- Results rounded to 2 decimal places
- No external API calls or data storage
"""
# Input validation
if principal <= 0:
return "Error: Principal must be positive"
if annual_rate < 0:
return "Error: Interest rate cannot be negative"
if years <= 0:
return "Error: Investment period must be positive"
if compound_frequency <= 0:
return "Error: Compound frequency must be positive"
# Calculate compound interest
amount = principal * (1 + annual_rate / compound_frequency) ** (compound_frequency * years)
total_return = amount - principal
return f"Investment: ${principal:,.2f}, After {years} years: ${amount:,.2f}, Total Return: ${total_return:,.2f}"
Learn the basics of tools and how to create your own
Integrate Python packages, CrewAI, and LangChain tools
Learn how to test your tools and agents
See complete examples of secure tools in action
By following these security practices and development guidelines, you can build tools that are both powerful and safe for production use. Remember that tools are often the primary attack vector for AI agents, so investing in security upfront pays dividends in reliability and user trust.