Protect sensitive user data and ensure compliance with privacy regulations like GDPR, CCPA, and HIPAA using PromptGuard’s advanced PII detection and redaction capabilities.
Data Privacy Overview
Data privacy is critical for AI applications that handle personal information. PromptGuard provides comprehensive protection for:Personal Identifiable Information (PII)
- Contact Information: Email addresses, phone numbers, addresses
- Government IDs: Social Security Numbers, passport numbers, driver’s licenses
- Financial Data: Credit card numbers, bank accounts, routing numbers
- Health Information: Medical record numbers, health conditions
- Technical Identifiers: IP addresses, device IDs, session tokens
Sensitive Data Categories
- Biometric Data: Fingerprints, facial recognition data
- Location Data: GPS coordinates, precise locations
- Behavioral Data: Browsing patterns, user preferences
- Communication Data: Email content, chat messages
- Professional Data: Employee IDs, salary information
PII Detection and Redaction
Automatic PII Detection
PromptGuard automatically detects and handles common PII patterns:Copy
# PLANNED - PII detection configuration API (not yet available)
# Currently, PII detection is configured via project presets in dashboard
# Configure PII detection settings (roadmap feature)
curl https://api.promptguard.co/api/v1/privacy/pii-detection \
-H "X-API-Key: YOUR_PROMPTGUARD_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"detection_config": {
"email_addresses": {
"enabled": true,
"action": "redact",
"replacement": "[EMAIL]"
},
"phone_numbers": {
"enabled": true,
"action": "redact",
"replacement": "[PHONE]"
},
"ssn": {
"enabled": true,
"action": "block",
"replacement": "[SSN]"
},
"credit_cards": {
"enabled": true,
"action": "block",
"replacement": "[CREDIT_CARD]"
},
"addresses": {
"enabled": true,
"action": "redact",
"replacement": "[ADDRESS]"
},
"names": {
"enabled": true,
"action": "anonymize",
"replacement": "[NAME]"
}
}
}'
Custom PII Patterns
Copy
# PLANNED - Custom PII patterns API (not yet available)
# Currently, use project presets or contact support for custom patterns
# Add custom PII detection patterns (roadmap feature)
curl https://api.promptguard.co/api/v1/privacy/custom-patterns \
-H "X-API-Key: YOUR_PROMPTGUARD_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"patterns": [
{
"name": "employee_id",
"pattern": "EMP-\\d{6}",
"category": "internal_id",
"action": "redact",
"replacement": "[EMPLOYEE_ID]"
},
{
"name": "medical_record",
"pattern": "MRN-\\d{8}",
"category": "healthcare",
"action": "block",
"replacement": "[MEDICAL_RECORD]"
},
{
"name": "api_key",
"pattern": "sk-[a-zA-Z0-9]{48}",
"category": "technical",
"action": "block",
"replacement": "[API_KEY]"
}
]
}'
Implementation Examples
Healthcare Data Protection (HIPAA)
Copy
import { OpenAI } from 'openai';
const openai = new OpenAI({
apiKey: process.env.PROMPTGUARD_API_KEY,
baseURL: 'https://api.promptguard.co/api/v1'
});
class HIPAACompliantAI {
constructor() {
this.auditLog = [];
this.patientDataHandlers = new Map();
}
async processHealthcareQuery(patientId, query, userRole) {
try {
// Verify user authorization
if (!this.isAuthorizedForPatientData(userRole, patientId)) {
throw new Error('Unauthorized access to patient data');
}
// Log access attempt
this.logDataAccess(patientId, query, userRole);
// Process with healthcare-specific protection
const response = await this.processWithHIPAAProtection(query, patientId);
// Log successful processing
this.logDataProcessing(patientId, 'success', userRole);
return response;
} catch (error) {
this.logDataProcessing(patientId, 'error', userRole, error.message);
throw error;
}
}
async processWithHIPAAProtection(query, patientId) {
const messages = [
{
role: 'system',
content: `You are a HIPAA-compliant medical AI assistant.
IMPORTANT RULES:
- Never reveal specific patient identifiers
- Do not store or remember patient information between sessions
- Only provide general medical information, not specific diagnoses
- Always recommend consulting with healthcare professionals
- Do not process or discuss protected health information (PHI)
- If asked about specific patient data, redirect to proper channels`
},
{
role: 'user',
content: query
}
];
const completion = await openai.chat.completions.create({
model: 'gpt-4',
messages: messages,
temperature: 0.3, // Lower temperature for consistency
user: `healthcare_${patientId}` // Track for audit purposes
});
const response = completion.choices[0].message.content;
// Additional PHI scanning
const scannedResponse = await this.scanForPHI(response);
return {
response: scannedResponse.cleanedResponse,
phi_detected: scannedResponse.phiFound,
audit_id: this.generateAuditId()
};
}
async scanForPHI(response) {
// Enhanced PHI detection patterns
const phiPatterns = [
/\b\d{3}-\d{2}-\d{4}\b/g, // SSN
/\b\d{2}\/\d{2}\/\d{4}\b/g, // Dates (potential DOB)
/MRN[-:]?\s*\d+/gi, // Medical Record Numbers
/\b[A-Z]{2}\d{7}\b/g, // Insurance numbers
/\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b/g // Credit cards
];
let cleanedResponse = response;
let phiFound = false;
phiPatterns.forEach(pattern => {
if (pattern.test(response)) {
phiFound = true;
cleanedResponse = cleanedResponse.replace(pattern, '[REDACTED]');
}
});
return {
cleanedResponse: cleanedResponse,
phiFound: phiFound
};
}
logDataAccess(patientId, query, userRole) {
const logEntry = {
timestamp: new Date().toISOString(),
event_type: 'data_access',
patient_id: this.hashPatientId(patientId),
user_role: userRole,
query_hash: this.hashContent(query),
ip_address: this.getCurrentUserIP(),
session_id: this.getCurrentSessionId()
};
this.auditLog.push(logEntry);
this.sendToComplianceSystem(logEntry);
}
logDataProcessing(patientId, status, userRole, error = null) {
const logEntry = {
timestamp: new Date().toISOString(),
event_type: 'data_processing',
patient_id: this.hashPatientId(patientId),
user_role: userRole,
status: status,
error: error,
compliance_flags: this.getComplianceFlags()
};
this.auditLog.push(logEntry);
this.sendToComplianceSystem(logEntry);
}
isAuthorizedForPatientData(userRole, patientId) {
const authorizedRoles = ['doctor', 'nurse', 'admin', 'patient'];
if (!authorizedRoles.includes(userRole)) {
return false;
}
// Additional role-based checks
if (userRole === 'patient') {
return this.isPatientAccessingOwnData(patientId);
}
return this.hasPatientAccess(userRole, patientId);
}
hashPatientId(patientId) {
// Use cryptographic hash to protect patient ID in logs
const crypto = require('crypto');
return crypto.createHash('sha256').update(patientId).digest('hex').substring(0, 16);
}
generateAuditId() {
return 'audit_' + Date.now() + '_' + Math.random().toString(36).substring(7);
}
}
// API endpoint for healthcare queries
app.post('/api/healthcare/query', async (req, res) => {
const { patientId, query, userRole, sessionToken } = req.body;
// Validate session and permissions
const session = await validateHealthcareSession(sessionToken);
if (!session.isValid) {
return res.status(401).json({ error: 'Invalid session' });
}
const healthcareAI = new HIPAACompliantAI();
try {
const result = await healthcareAI.processHealthcareQuery(
patientId,
query,
userRole
);
res.json({
response: result.response,
audit_id: result.audit_id,
compliance_verified: true,
phi_detected: result.phi_detected
});
} catch (error) {
console.error('Healthcare query error:', error);
res.status(500).json({
error: 'Unable to process healthcare query',
message: 'Please contact your healthcare provider',
compliance_violation: error.message.includes('Unauthorized')
});
}
});
GDPR-Compliant Data Processing
Copy
class GDPRCompliantProcessor {
constructor() {
this.consentRecords = new Map();
this.dataSubjectRequests = [];
this.processingActivities = [];
}
async processWithGDPRCompliance(userId, data, processingPurpose) {
try {
// Verify consent
const consentValid = await this.verifyConsent(userId, processingPurpose);
if (!consentValid) {
throw new Error('Valid consent required for data processing');
}
// Check data minimization
const minimizedData = this.minimizeData(data, processingPurpose);
// Process with privacy protection
const result = await this.processWithPrivacyProtection(
userId,
minimizedData,
processingPurpose
);
// Log processing activity
this.logProcessingActivity(userId, processingPurpose, minimizedData);
return result;
} catch (error) {
this.logProcessingError(userId, error);
throw error;
}
}
async verifyConsent(userId, purpose) {
const consent = this.consentRecords.get(userId);
if (!consent) {
return false;
}
// Check if consent is still valid
const isValid = consent.purposes.includes(purpose) &&
consent.timestamp > Date.now() - (365 * 24 * 60 * 60 * 1000) && // 1 year
!consent.withdrawn;
return isValid;
}
minimizeData(data, purpose) {
// Implement data minimization based on purpose
const minimizationRules = {
'analytics': ['user_id', 'session_id', 'timestamp'],
'personalization': ['user_id', 'preferences', 'history'],
'support': ['user_id', 'issue_type', 'communication']
};
const allowedFields = minimizationRules[purpose] || [];
const minimized = {};
allowedFields.forEach(field => {
if (data[field] !== undefined) {
minimized[field] = data[field];
}
});
return minimized;
}
async processDataSubjectRequest(requestType, userId, details) {
const requestId = this.generateRequestId();
const request = {
id: requestId,
type: requestType,
userId: userId,
details: details,
timestamp: new Date().toISOString(),
status: 'pending',
deadline: this.calculateDeadline(requestType)
};
this.dataSubjectRequests.push(request);
switch (requestType) {
case 'access':
return await this.handleAccessRequest(request);
case 'rectification':
return await this.handleRectificationRequest(request);
case 'erasure':
return await this.handleErasureRequest(request);
case 'portability':
return await this.handlePortabilityRequest(request);
default:
throw new Error('Unknown request type');
}
}
async handleAccessRequest(request) {
// Gather all data for the user
const userData = await this.gatherUserData(request.userId);
// Create comprehensive data export
const dataExport = {
personal_data: userData.personal,
processing_activities: userData.activities,
consent_records: userData.consents,
automated_decisions: userData.automatedDecisions
};
request.status = 'completed';
request.response = dataExport;
return {
requestId: request.id,
data: dataExport,
format: 'structured_json',
completion_date: new Date().toISOString()
};
}
async handleErasureRequest(request) {
// Verify right to erasure applies
const canErase = await this.verifyErasureRight(request.userId);
if (!canErase) {
request.status = 'denied';
request.denial_reason = 'Legal obligations prevent erasure';
return {
requestId: request.id,
status: 'denied',
reason: 'Data must be retained for legal compliance'
};
}
// Perform erasure
await this.eraseUserData(request.userId);
request.status = 'completed';
return {
requestId: request.id,
status: 'completed',
erasure_date: new Date().toISOString(),
retained_data: 'Legal and security logs only'
};
}
recordConsentWithdrawal(userId, purpose) {
const consent = this.consentRecords.get(userId);
if (consent) {
if (purpose) {
// Withdraw specific purpose
consent.purposes = consent.purposes.filter(p => p !== purpose);
} else {
// Withdraw all consent
consent.withdrawn = true;
consent.withdrawalDate = new Date().toISOString();
}
this.consentRecords.set(userId, consent);
}
// Stop processing for withdrawn purposes
this.stopProcessingForWithdrawnConsent(userId, purpose);
}
}
Privacy-by-Design Implementation
Data Anonymization
Copy
class DataAnonymizer {
constructor() {
this.anonymizationTechniques = {
'generalization': this.generalizeData,
'suppression': this.suppressData,
'perturbation': this.perturbData,
'pseudonymization': this.pseudonymizeData
};
}
async anonymizeDataset(dataset, anonymizationLevel = 'standard') {
const config = this.getAnonymizationConfig(anonymizationLevel);
let anonymizedData = [...dataset];
for (const technique of config.techniques) {
anonymizedData = await this.applyTechnique(
anonymizedData,
technique.name,
technique.parameters
);
}
// Verify k-anonymity
const kValue = this.calculateKAnonymity(anonymizedData);
if (kValue < config.minKValue) {
throw new Error(`Anonymization failed: k-anonymity = ${kValue}, required = ${config.minKValue}`);
}
return {
data: anonymizedData,
kAnonymity: kValue,
techniques: config.techniques,
privacyMetrics: this.calculatePrivacyMetrics(dataset, anonymizedData)
};
}
getAnonymizationConfig(level) {
const configs = {
'minimal': {
techniques: [
{ name: 'pseudonymization', parameters: { fields: ['user_id'] } }
],
minKValue: 2
},
'standard': {
techniques: [
{ name: 'generalization', parameters: { fields: ['age', 'location'], levels: 2 } },
{ name: 'suppression', parameters: { threshold: 0.05 } },
{ name: 'pseudonymization', parameters: { fields: ['user_id', 'email'] } }
],
minKValue: 5
},
'strict': {
techniques: [
{ name: 'generalization', parameters: { fields: ['age', 'location', 'income'], levels: 3 } },
{ name: 'suppression', parameters: { threshold: 0.02 } },
{ name: 'perturbation', parameters: { fields: ['numerical_data'], noise: 0.1 } },
{ name: 'pseudonymization', parameters: { fields: ['all_identifiers'] } }
],
minKValue: 10
}
};
return configs[level] || configs.standard;
}
generalizeData(data, parameters) {
const { fields, levels } = parameters;
return data.map(record => {
const newRecord = { ...record };
fields.forEach(field => {
if (newRecord[field]) {
newRecord[field] = this.generalizeValue(newRecord[field], field, levels);
}
});
return newRecord;
});
}
generalizeValue(value, field, levels) {
switch (field) {
case 'age':
return this.generalizeAge(value, levels);
case 'location':
return this.generalizeLocation(value, levels);
case 'income':
return this.generalizeIncome(value, levels);
default:
return value;
}
}
generalizeAge(age, levels) {
const ranges = [
[0, 18, '0-18'],
[19, 30, '19-30'],
[31, 50, '31-50'],
[51, 70, '51-70'],
[71, 120, '70+']
];
if (levels >= 2) {
// More general ranges
if (age < 30) return '18-30';
if (age < 60) return '30-60';
return '60+';
}
for (const [min, max, range] of ranges) {
if (age >= min && age <= max) {
return range;
}
}
return '18+';
}
pseudonymizeData(data, parameters) {
const { fields } = parameters;
const pseudonymMap = new Map();
return data.map(record => {
const newRecord = { ...record };
fields.forEach(field => {
if (newRecord[field]) {
if (!pseudonymMap.has(newRecord[field])) {
pseudonymMap.set(newRecord[field], this.generatePseudonym());
}
newRecord[field] = pseudonymMap.get(newRecord[field]);
}
});
return newRecord;
});
}
generatePseudonym() {
return 'pseudo_' + Math.random().toString(36).substring(2, 15);
}
calculateKAnonymity(data) {
// Group records by quasi-identifiers
const groups = this.groupByQuasiIdentifiers(data);
// Find minimum group size
return Math.min(...Object.values(groups).map(group => group.length));
}
calculatePrivacyMetrics(original, anonymized) {
return {
dataUtility: this.calculateDataUtility(original, anonymized),
informationLoss: this.calculateInformationLoss(original, anonymized),
reidentificationRisk: this.calculateReidentificationRisk(anonymized)
};
}
}
Privacy Compliance Framework
Copy
class PrivacyComplianceFramework:
def __init__(self):
self.regulations = {
'GDPR': {
'data_subject_rights': [
'access', 'rectification', 'erasure', 'portability',
'restriction', 'objection', 'automated_decision_making'
],
'lawful_bases': [
'consent', 'contract', 'legal_obligation',
'vital_interests', 'public_task', 'legitimate_interests'
],
'retention_periods': {
'default': 365 * 2, # 2 years
'marketing': 365 * 3, # 3 years
'financial': 365 * 7 # 7 years
}
},
'CCPA': {
'consumer_rights': [
'know', 'delete', 'opt_out', 'non_discrimination'
],
'categories': [
'identifiers', 'personal_info', 'commercial',
'biometric', 'internet_activity', 'geolocation',
'sensory', 'professional', 'education', 'inferences'
]
}
}
self.privacy_policies = {}
self.compliance_checks = []
def create_privacy_policy(self, regulation: str, data_types: List[str],
processing_purposes: List[str]) -> Dict:
"""Create privacy policy based on regulation requirements"""
if regulation not in self.regulations:
raise ValueError(f"Unsupported regulation: {regulation}")
reg_config = self.regulations[regulation]
policy = {
'regulation': regulation,
'created_date': datetime.utcnow().isoformat(),
'data_types': data_types,
'processing_purposes': processing_purposes,
'retention_schedule': self._calculate_retention_schedule(
data_types, processing_purposes, reg_config
),
'subject_rights': reg_config.get('data_subject_rights', []),
'lawful_basis': self._determine_lawful_basis(
processing_purposes, reg_config
),
'security_measures': self._define_security_measures(data_types),
'third_party_sharing': [],
'international_transfers': []
}
self.privacy_policies[f"{regulation}_{len(self.privacy_policies)}"] = policy
return policy
def assess_compliance(self, data_processing_activity: Dict) -> Dict:
"""Assess compliance for a data processing activity"""
assessment = {
'activity': data_processing_activity,
'compliance_score': 0,
'violations': [],
'recommendations': [],
'risk_level': 'low'
}
# Check each regulation
for reg_name, reg_config in self.regulations.items():
reg_assessment = self._assess_regulation_compliance(
data_processing_activity, reg_name, reg_config
)
assessment[f'{reg_name}_compliance'] = reg_assessment
assessment['violations'].extend(reg_assessment['violations'])
assessment['recommendations'].extend(reg_assessment['recommendations'])
# Calculate overall compliance score
total_checks = len(self.compliance_checks)
passed_checks = total_checks - len(assessment['violations'])
assessment['compliance_score'] = (passed_checks / total_checks) * 100 if total_checks > 0 else 0
# Determine risk level
if assessment['compliance_score'] < 60:
assessment['risk_level'] = 'high'
elif assessment['compliance_score'] < 80:
assessment['risk_level'] = 'medium'
else:
assessment['risk_level'] = 'low'
return assessment
def _assess_regulation_compliance(self, activity: Dict,
regulation: str, config: Dict) -> Dict:
"""Assess compliance with specific regulation"""
violations = []
recommendations = []
# Check consent requirements (for GDPR)
if regulation == 'GDPR':
if not activity.get('consent_obtained') and \
activity.get('lawful_basis') == 'consent':
violations.append({
'type': 'missing_consent',
'description': 'Consent required but not obtained',
'severity': 'high'
})
# Check data minimization
if not activity.get('data_minimized'):
violations.append({
'type': 'data_minimization',
'description': 'Data minimization principle not applied',
'severity': 'medium'
})
# Check retention periods
retention_period = activity.get('retention_period')
max_retention = config.get('retention_periods', {}).get(
activity.get('purpose'), config.get('retention_periods', {}).get('default', 365)
)
if retention_period and retention_period > max_retention:
violations.append({
'type': 'excessive_retention',
'description': f'Retention period exceeds maximum allowed ({max_retention} days)',
'severity': 'medium'
})
# Check security measures
if not activity.get('encryption_enabled'):
recommendations.append({
'type': 'security_enhancement',
'description': 'Enable encryption for data at rest and in transit',
'priority': 'high'
})
return {
'regulation': regulation,
'violations': violations,
'recommendations': recommendations,
'compliant': len(violations) == 0
}
def generate_privacy_impact_assessment(self, processing_activity: Dict) -> Dict:
"""Generate Privacy Impact Assessment (PIA)"""
pia = {
'assessment_date': datetime.utcnow().isoformat(),
'activity': processing_activity,
'risk_assessment': self._assess_privacy_risks(processing_activity),
'mitigation_measures': self._recommend_mitigation_measures(processing_activity),
'compliance_status': self.assess_compliance(processing_activity),
'approval_required': False
}
# Determine if DPO/authority approval required
high_risk_indicators = [
processing_activity.get('involves_sensitive_data', False),
processing_activity.get('large_scale_processing', False),
processing_activity.get('automated_decision_making', False),
processing_activity.get('public_monitoring', False)
]
if any(high_risk_indicators):
pia['approval_required'] = True
pia['recommended_actions'] = [
'Consult with Data Protection Officer',
'Consider regulatory consultation',
'Implement additional safeguards'
]
return pia
def _assess_privacy_risks(self, activity: Dict) -> Dict:
"""Assess privacy risks for processing activity"""
risks = {
'identification_risk': 'low',
'discrimination_risk': 'low',
'financial_risk': 'low',
'reputational_risk': 'low',
'overall_risk': 'low'
}
# Assess based on data types and processing
sensitive_data = activity.get('involves_sensitive_data', False)
large_scale = activity.get('large_scale_processing', False)
automated_decisions = activity.get('automated_decision_making', False)
if sensitive_data:
risks['identification_risk'] = 'high'
risks['discrimination_risk'] = 'medium'
if large_scale:
risks['reputational_risk'] = 'medium'
if automated_decisions:
risks['discrimination_risk'] = 'high'
risks['financial_risk'] = 'medium'
# Calculate overall risk
risk_levels = list(risks.values())[:-1] # Exclude overall_risk
if 'high' in risk_levels:
risks['overall_risk'] = 'high'
elif 'medium' in risk_levels:
risks['overall_risk'] = 'medium'
return risks
Privacy Testing and Validation
PII Detection Testing
Copy
class PIIDetectionTester {
constructor(piiDetectionEndpoint) {
this.endpoint = piiDetectionEndpoint;
this.testCases = this.loadTestCases();
}
loadTestCases() {
return {
emails: [
{ text: "Contact me at [email protected]", expected: true },
{ text: "My email is [email protected]", expected: true },
{ text: "Email at domain dot com", expected: false }
],
phones: [
{ text: "Call me at (555) 123-4567", expected: true },
{ text: "Phone: 555.123.4567", expected: true },
{ text: "Five five five one two three four", expected: false }
],
ssn: [
{ text: "My SSN is 123-45-6789", expected: true },
{ text: "Social Security: 987654321", expected: true },
{ text: "ID number one two three", expected: false }
],
credit_cards: [
{ text: "My card is 4532-1234-5678-9012", expected: true },
{ text: "Credit card: 4532123456789012", expected: true },
{ text: "Card ending in 9012", expected: false }
]
};
}
async runAllTests() {
const results = {};
for (const [category, tests] of Object.entries(this.testCases)) {
results[category] = await this.runCategoryTests(category, tests);
}
return this.generateTestReport(results);
}
async runCategoryTests(category, tests) {
const results = [];
for (const test of tests) {
try {
const detectionResult = await this.testPIIDetection(test.text);
const detected = detectionResult.pii_detected &&
detectionResult.detected_types.includes(category);
results.push({
...test,
detected: detected,
passed: detected === test.expected,
details: detectionResult
});
} catch (error) {
results.push({
...test,
detected: false,
passed: false,
error: error.message
});
}
}
return results;
}
async testPIIDetection(text) {
const response = await fetch(this.endpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ content: text })
});
if (!response.ok) {
throw new Error(`API request failed: ${response.status}`);
}
return await response.json();
}
generateTestReport(results) {
let totalTests = 0;
let totalPassed = 0;
const report = {
summary: {},
by_category: {},
failed_tests: []
};
for (const [category, categoryResults] of Object.entries(results)) {
const passed = categoryResults.filter(r => r.passed).length;
const total = categoryResults.length;
totalTests += total;
totalPassed += passed;
report.by_category[category] = {
total: total,
passed: passed,
failed: total - passed,
pass_rate: (passed / total) * 100
};
// Collect failed tests
const failed = categoryResults.filter(r => !r.passed);
report.failed_tests.push(...failed.map(f => ({ ...f, category })));
}
report.summary = {
total_tests: totalTests,
total_passed: totalPassed,
total_failed: totalTests - totalPassed,
overall_pass_rate: (totalPassed / totalTests) * 100
};
return report;
}
}
// Usage
const tester = new PIIDetectionTester('/api/detect-pii');
tester.runAllTests().then(report => {
console.log('PII Detection Test Report:', report);
});
Next Steps
Enterprise Setup
Configure PromptGuard for enterprise environments
Content Moderation
Implement comprehensive content filtering
Chatbot Protection
Secure conversational AI applications
Security Overview
Complete security configuration guide