0% found this document useful (0 votes)
11 views36 pages

allcodes

The document contains Python code for compliance rules and file tracking, focusing on data protection regulations such as GDPR, HIPAA, PCI-DSS, and NIST. It includes functions to check file encryption, access control, PHI protection, and password complexity, as well as a FileTracker class for monitoring file modifications. Additionally, it provides utilities for handling sensitive information and determining file types based on extensions and content.

Uploaded by

sawet61245
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views36 pages

allcodes

The document contains Python code for compliance rules and file tracking, focusing on data protection regulations such as GDPR, HIPAA, PCI-DSS, and NIST. It includes functions to check file encryption, access control, PHI protection, and password complexity, as well as a FileTracker class for monitoring file modifications. Additionally, it provides utilities for handling sensitive information and determining file types based on extensions and content.

Uploaded by

sawet61245
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 36

#compliance_rules.

py
import os
import re
from typing import Callable, Dict, Any, Optional
import logging

logger = logging.getLogger(__name__)

def safe_read_file(path: str, read_bytes: bool = False, max_size: int = 1024) ->
Optional[Any]:
"""Safely read a file with error handling."""
try:
mode = 'rb' if read_bytes else 'r'
with open(path, mode) as f:
return f.read(max_size)
except (IOError, OSError) as e:
logger.debug(f"Cannot read file {path}: {str(e)}")
return None

def check_data_encryption(path: str) -> bool:


"""Check if file is encrypted or contains encryption indicators."""
if path.endswith(".encrypted"):
return True

content = safe_read_file(path, read_bytes=True)


if not content:
return False

# Check for common encryption headers or markers


encryption_markers = [b"ENCRYPTED", b"AES", b"-----BEGIN PGP MESSAGE-----"]
return any(marker in content for marker in encryption_markers)

def check_access_control(path: str) -> bool:


"""Check if file has secure permissions."""
try:
permissions = os.stat(path).st_mode & 0o777
# Return True if permissions are 600 (owner read/write only)
return permissions == 0o600
except (IOError, OSError):
logger.debug(f"Cannot check permissions for {path}")
return False

def check_phi_protection(path: str) -> bool:


"""Check if file contains PHI data."""
content = safe_read_file(path, read_bytes=True)
if not content:
return True # Can't read file, assume it's protected

# Check for common PHI indicators


phi_patterns = [b"PHI", b"patient", b"health record", b"medical record"]
return not any(pattern in content.lower() for pattern in phi_patterns)

def check_cardholder_data(path: str) -> bool:


"""Check if file contains cardholder data."""
content = safe_read_file(path, read_bytes=True)
if not content:
return True # Can't read file, assume it's protected

# Check for common cardholder data indicators


card_patterns = [b"cardholder", b"credit card", b"card number", b"CVV", b"CVC"]
return not any(pattern in content.lower() for pattern in card_patterns)

def check_password_complexity(path: str) -> bool:


"""Check if file contains complex passwords."""
content = safe_read_file(path)
if not content:
return True # Can't read file, assume it's compliant

# Check for password complexity indicators


has_digit = re.search(r'\d', content) is not None
has_upper = re.search(r'[A-Z]', content) is not None
has_lower = re.search(r'[a-z]', content) is not None
has_special = re.search(r'[!@#$%^&*(),.?":{}|<>]', content) is not None

return has_digit and has_upper and has_lower and has_special

# Rule definitions
GDPR_RULES = {
"DATA_ENCRYPTION": {
"description": "Unencrypted sensitive data detected",
"check": check_data_encryption,
"recommendation": "Encrypt using AES-256",
"severity": "High"
},
"ACCESS_CONTROL": {
"description": "Insecure file permissions",
"check": check_access_control,
"recommendation": "Set permissions to 600",
"severity": "Medium"
}
}

HIPAA_RULES = {
"PHI_PROTECTION": {
"description": "PHI data exposure",
"check": check_phi_protection,
"recommendation": "Remove/encrypt PHI",
"severity": "Critical"
}
}

PCI_DSS_RULES = {
"CARDHOLDER_DATA": {
"description": "Cardholder data in plaintext",
"check": check_cardholder_data,
"recommendation": "Encrypt data",
"severity": "High"
}
}

NIST_RULES = {
"PASSWORD_COMPLEXITY": {
"description": "Weak password policy",
"check": check_password_complexity,
"recommendation": "Enforce NIST guidelines",
"severity": "Medium"
}
}

# Combined rules dictionary for easy access


ALL_RULES = {
"GDPR": GDPR_RULES,
"HIPAA": HIPAA_RULES,
"PCI-DSS": PCI_DSS_RULES,
"NIST": NIST_RULES
}

#file_tracker.py
import os
import json
import time
from typing import List, Dict

class FileTracker:
"""
FileTracker maintains a cache of file modification times to support incremental
scans.
The cache is stored as a JSON file in the provided cache directory.
"""
def __init__(self, cache_dir: str):
self.cache_dir = cache_dir
if not os.path.exists(self.cache_dir):
os.makedirs(self.cache_dir, exist_ok=True)
self.cache_file = os.path.join(self.cache_dir, "file_cache.json")
self.file_cache: Dict[str, float] = self._load_cache()

def _load_cache(self) -> Dict[str, float]:


if os.path.exists(self.cache_file):
try:
with open(self.cache_file, "r") as f:
return json.load(f)
except Exception:
# If there's any error reading the cache, start fresh.
return {}
return {}

def get_modified_files(self, scan_path: str) -> List[str]:


"""
Walk through the scan_path and return a list of files that have been
modified
since the last recorded modification time.
"""
modified_files = []
for root, _, files in os.walk(scan_path):
for file in files:
file_path = os.path.join(root, file)
try:
current_mtime = os.path.getmtime(file_path)
except Exception:
continue
cached_mtime = self.file_cache.get(file_path, 0)
if current_mtime > cached_mtime:
modified_files.append(file_path)
return modified_files

def update_file_cache(self, scan_path: str) -> None:


"""
Update the file cache with the current modification times for all files in
scan_path.
"""
for root, _, files in os.walk(scan_path):
for file in files:
file_path = os.path.join(root, file)
try:
self.file_cache[file_path] = os.path.getmtime(file_path)
except Exception:
continue
self._save_cache()

def _save_cache(self) -> None:


try:
with open(self.cache_file, "w") as f:
json.dump(self.file_cache, f, indent=2)
except Exception as e:
print(f"Error saving file cache: {e}")

#helper_utils.py
import os
import sys
import platform
import re
import json
import logging
from typing import Dict, List, Optional, Any, Tuple

logger = logging.getLogger(__name__)

def get_os_info() -> Dict[str, str]:


"""Get information about the operating system."""
info = {
"system": platform.system(),
"release": platform.release(),
"version": platform.version(),
"machine": platform.machine(),
"processor": platform.processor()
}

# Add more detailed information based on OS


if info["system"] == "Linux":
try:
with open("/etc/os-release", "r") as f:
for line in f:
if "=" in line:
key, value = line.strip().split("=", 1)
info[key.lower()] = value.strip('"')
except Exception as e:
logger.debug(f"Could not read /etc/os-release: {str(e)}")
return info

def sanitize_output(text: str) -> str:


"""Sanitize output by removing sensitive information."""
# Remove password patterns
text = re.sub(r"password\s*=\s*['\"](.*?)['\"]", "password = '[REDACTED]'",
text, flags=re.IGNORECASE)

# Remove API key patterns


text = re.sub(r"api[-_]?key\s*=\s*['\"](.*?)['\"]", "api_key = '[REDACTED]'",
text, flags=re.IGNORECASE)

# Remove other sensitive patterns


text = re.sub(r"token\s*=\s*['\"](.*?)['\"]", "token = '[REDACTED]'", text,
flags=re.IGNORECASE)
text = re.sub(r"secret\s*=\s*['\"](.*?)['\"]", "secret = '[REDACTED]'", text,
flags=re.IGNORECASE)

return text

def parse_requirements(requirements_file: str) -> Dict[str, str]:


"""Parse a requirements.txt file and return a dictionary of package names and
versions."""
packages = {}

try:
with open(requirements_file, "r") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue

# Handle requirements with versions


if "==" in line:
name, version = line.split("==", 1)
# Remove any extras or comments
name = name.split("[")[0].strip()
version = version.split("#")[0].strip()
packages[name] = version
# Handle requirements without versions
else:
name = line.split("[")[0].strip()
packages[name] = "latest"
except Exception as e:
logger.error(f"Error parsing requirements file: {str(e)}")

return packages

def get_config(config_file: str) -> Dict[str, Any]:


"""Load configuration from a JSON file."""
config = {}

try:
if os.path.exists(config_file):
with open(config_file, "r") as f:
config = json.load(f)
else:
logger.warning(f"Config file not found: {config_file}")
except Exception as e:
logger.error(f"Error loading config file: {str(e)}")

return config

def save_config(config: Dict[str, Any], config_file: str) -> bool:


"""Save configuration to a JSON file."""
try:
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(config_file), exist_ok=True)

with open(config_file, "w") as f:


json.dump(config, f, indent=4)
return True
except Exception as e:
logger.error(f"Error saving config file: {str(e)}")
return False

def get_file_type(file_path: str) -> str:


"""Determine file type based on extension and content."""
# Check extension first
_, ext = os.path.splitext(file_path)
ext = ext.lower()

# Common file types


if ext in ['.py', '.pyw']:
return 'Python'
elif ext in ['.js', '.jsx', '.ts', '.tsx']:
return 'JavaScript'
elif ext in ['.html', '.htm']:
return 'HTML'
elif ext in ['.css']:
return 'CSS'
elif ext in ['.json']:
return 'JSON'
elif ext in ['.xml']:
return 'XML'
elif ext in ['.md', '.markdown']:
return 'Markdown'
elif ext in ['.txt']:
return 'Text'
elif ext in ['.sh', '.bash']:
return 'Shell'
elif ext in ['.bat', '.cmd']:
return 'Batch'
elif ext in ['.php']:
return 'PHP'
elif ext in ['.java']:
return 'Java'
elif ext in ['.rb']:
return 'Ruby'
elif ext in ['.go']:
return 'Go'
elif ext in ['.rs']:
return 'Rust'
elif ext in ['.c', '.cpp', '.cc', '.h', '.hpp']:
return 'C/C++'
elif ext in ['.cs']:
return 'C#'
elif ext in ['.sql']:
return 'SQL'
elif ext in ['.yaml', '.yml']:
return 'YAML'
elif ext in ['.toml']:
return 'TOML'
elif ext in ['.ini', '.cfg']:
return 'INI'
elif ext in ['.log']:
return 'Log'
elif ext in ['.env']:
return 'Env'
elif ext in ['.csv']:
return 'CSV'
elif ext in ['.doc', '.docx']:
return 'Word'
elif ext in ['.xls', '.xlsx']:
return 'Excel'
elif ext in ['.ppt', '.pptx']:
return 'PowerPoint'
elif ext in ['.pdf']:
return 'PDF'
elif ext in ['.zip', '.tar', '.gz', '.bz2', '.7z', '.rar']:
return 'Archive'
elif ext in ['.exe', '.dll', '.so', '.dylib']:
return 'Binary'
elif ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg']:
return 'Image'
elif ext in ['.mp3', '.wav', '.ogg', '.flac']:
return 'Audio'
elif ext in ['.mp4', '.avi', '.mov', '.wmv']:
return 'Video'
elif ext in ['.ttf', '.otf', '.woff', '.woff2']:
return 'Font'

# If we reached here, check the content for specific patterns


try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read(4096) # Read first 4KB

if '#!/bin/bash' in content or '#!/bin/sh' in content:


return 'Shell'
elif '<?php' in content:
return 'PHP'
elif '<html' in content or '<!DOCTYPE html' in content:
return 'HTML'
elif 'import ' in content and 'from ' in content and 'def ' in content:
return 'Python'
elif '{' in content and '}' in content and (
'function(' in content or 'var ' in content or 'const ' in
content):
return 'JavaScript'
except:
pass
# Default to generic file type based on binary/text detection
try:
with open(file_path, 'rb') as f:
if b'\0' in f.read(1024):
return 'Binary'
except:
pass

return 'Unknown'

#password_checker.py
import os
import re
import logging
from typing import List, Dict, Any

logger = logging.getLogger(__name__)

class PasswordChecker:
def __init__(self, directory: str):
self.directory = directory

# Password policy requirements


self.min_length = 8
self.require_uppercase = True
self.require_lowercase = True
self.require_digits = True
self.require_special = True

# Patterns to detect sensitive credentials


self.patterns = {
"password": r"(?:password|passwd|pwd)\s*=\s*['\"]([^'\"]{1,64})['\"]",
"api_key": r"(?:api_key|api[-_]token|secret[-_]key|access[-_]token)\
s*=\s*['\"]([^'\"]{1,64})['\"]",
"database": r"(?:db_password|mysql_pwd|postgres_pwd|oracle_pwd)\s*=\
s*['\"]([^'\"]{1,64})['\"]",
"aws": r"(?:aws_secret|aws_key|aws_token)\s*=\s*['\"]([^'\"]{1,64})
['\"]",
}

# Common password file patterns


self.password_file_patterns = [
r"\.env$", r"config\.ini$", r"\.config$", r"credentials\.json$",
r"secrets\.yaml$",
r"password", r"apikey", r"\.key$", r"\.pem$", r"\.htpasswd$"
]

logger.info(f"Initialized password checker for directory: {directory}")

def _is_password_file(self, file_path: str) -> bool:


"""Check if file is likely to contain passwords."""
filename = os.path.basename(file_path).lower()
return any(re.search(pattern, filename) for pattern in
self.password_file_patterns)

def _is_strong_password(self, password: str) -> bool:


"""Check if password meets strength requirements."""
if len(password) < self.min_length:
return False

has_upper = bool(re.search(r'[A-Z]', password)) if self.require_uppercase


else True
has_lower = bool(re.search(r'[a-z]', password)) if self.require_lowercase
else True
has_digit = bool(re.search(r'\d', password)) if self.require_digits else
True
has_special = bool(re.search(r'[!@#$%^&*(),.?":{}|<>]', password)) if
self.require_special else True

return has_upper and has_lower and has_digit and has_special

def _detect_sensitive_info(self, file_path: str) -> List[Dict[str, Any]]:


"""Detect sensitive information in file."""
sensitive_info = []

try:
# Skip binary files and large files
if os.path.getsize(file_path) > 10 * 1024 * 1024: # 10MB
logger.debug(f"Skipping large file: {file_path}")
return []

with open(file_path, "r", encoding="utf-8", errors="ignore") as f:


content = f.read()

for credential_type, pattern in self.patterns.items():


matches = re.finditer(pattern, content, re.IGNORECASE |
re.MULTILINE)

for match in matches:


password = match.group(1)
# Get line number by counting newlines
line_num = content[:match.start()].count('\n') + 1

if not self._is_strong_password(password):
sensitive_info.append({
"file": file_path,
"line": line_num,
"type": credential_type,
"value": password[:3] + "***" + password[-2:] if
len(password) > 5 else "***",
# Mask most of the password
"is_strong": False
})
except Exception as e:
logger.error(f"Error processing file {file_path}: {str(e)}")

return sensitive_info

def check_passwords(self) -> List[Dict[str, Any]]:


"""Check all files for sensitive credentials and weak passwords."""
issues = []
logger.info(f"Scanning directory for password issues: {self.directory}")

if not os.path.exists(self.directory):
logger.error(f"Directory not found: {self.directory}")
return [{"error": f"Directory '{self.directory}' not found"}]
try:
# First pass: check known password files
for root, _, files in os.walk(self.directory):
for file in files:
file_path = os.path.join(root, file)
if self._is_password_file(file_path):
logger.debug(f"Checking password file: {file_path}")
sensitive_entries = self._detect_sensitive_info(file_path)

for entry in sensitive_entries:


# Continuing from password_checker.py
issues.append({
"file": entry["file"],
"issue": f"Weak {entry['type']} detected",
"rule": "Password Policy",
"severity": "High",
"recommendation": "Replace with a stronger
credential",
"details": f"Line {entry['line']}: {entry['value']}
- fails security requirements"
})

# Second pass: sample other files that might contain


credentials
for root, _, files in os.walk(self.directory):
for file in files:
file_path = os.path.join(root, file)
if not self._is_password_file(file_path):
# Check file extension for common code/config files
ext = os.path.splitext(file_path)[1].lower()
if ext in ['.py', '.js', '.php', '.java', '.xml', '.yaml',
'.json', '.ini',
'.conf']:
logger.debug(f"Checking potential credential file:
{file_path}")
sensitive_entries =
self._detect_sensitive_info(file_path)

for entry in sensitive_entries:


issues.append({
"file": entry["file"],
"issue": f"Weak {entry['type']} detected",
"rule": "Password Policy",
"severity": "High",
"recommendation": "Replace with a stronger
credential",
"details": f"Line {entry['line']}:
{entry['value']} - fails security requirements"
})

except Exception as e:
logger.error(f"Error during password check: {str(e)}")
issues.append({"error": f"Error during password check: {str(e)}"})

logger.info(f"Password check completed. Found {len(issues)} weak


credentials.")
return issues

#permission_checker.py
import os
import re
import logging
from typing import List, Dict, Any

logger = logging.getLogger(__name__)

class PasswordChecker:
def __init__(self, directory: str):
self.directory = directory

# Password policy requirements


self.min_length = 8
self.require_uppercase = True
self.require_lowercase = True
self.require_digits = True
self.require_special = True

# Patterns to detect sensitive credentials


self.patterns = {
"password": r"(?:password|passwd|pwd)\s*=\s*['\"]([^'\"]{1,64})['\"]",
"api_key": r"(?:api_key|api[-_]token|secret[-_]key|access[-_]token)\
s*=\s*['\"]([^'\"]{1,64})['\"]",
"database": r"(?:db_password|mysql_pwd|postgres_pwd|oracle_pwd)\s*=\
s*['\"]([^'\"]{1,64})['\"]",
"aws": r"(?:aws_secret|aws_key|aws_token)\s*=\s*['\"]([^'\"]{1,64})
['\"]",
}

# Common password file patterns


self.password_file_patterns = [
r"\.env$", r"config\.ini$", r"\.config$", r"credentials\.json$",
r"secrets\.yaml$",
r"password", r"apikey", r"\.key$", r"\.pem$", r"\.htpasswd$"
]

logger.info(f"Initialized password checker for directory: {directory}")

def _is_password_file(self, file_path: str) -> bool:


"""Check if file is likely to contain passwords."""
filename = os.path.basename(file_path).lower()
return any(re.search(pattern, filename) for pattern in
self.password_file_patterns)

def _is_strong_password(self, password: str) -> bool:


"""Check if password meets strength requirements."""
if len(password) < self.min_length:
return False

has_upper = bool(re.search(r'[A-Z]', password)) if self.require_uppercase


else True
has_lower = bool(re.search(r'[a-z]', password)) if self.require_lowercase
else True
has_digit = bool(re.search(r'\d', password)) if self.require_digits else
True
has_special = bool(re.search(r'[!@#$%^&*(),.?":{}|<>]', password)) if
self.require_special else True
return has_upper and has_lower and has_digit and has_special

def _detect_sensitive_info(self, file_path: str) -> List[Dict[str, Any]]:


"""Detect sensitive information in file."""
sensitive_info = []

try:
# Skip binary files and large files
if os.path.getsize(file_path) > 10 * 1024 * 1024: # 10MB
logger.debug(f"Skipping large file: {file_path}")
return []

with open(file_path, "r", encoding="utf-8", errors="ignore") as f:


content = f.read()

for credential_type, pattern in self.patterns.items():


matches = re.finditer(pattern, content, re.IGNORECASE |
re.MULTILINE)

for match in matches:


password = match.group(1)
# Get line number by counting newlines
line_num = content[:match.start()].count('\n') + 1

if not self._is_strong_password(password):
sensitive_info.append({
"file": file_path,
"line": line_num,
"type": credential_type,
"value": password[:3] + "***" + password[-2:] if
len(password) > 5 else "***",
# Mask most of the password
"is_strong": False
})
except Exception as e:
logger.error(f"Error processing file {file_path}: {str(e)}")

return sensitive_info

def check_passwords(self) -> List[Dict[str, Any]]:


"""Check all files for sensitive credentials and weak passwords."""
issues = []
logger.info(f"Scanning directory for password issues: {self.directory}")

if not os.path.exists(self.directory):
logger.error(f"Directory not found: {self.directory}")
return [{"error": f"Directory '{self.directory}' not found"}]

try:
# First pass: check known password files
for root, _, files in os.walk(self.directory):
for file in files:
file_path = os.path.join(root, file)
if self._is_password_file(file_path):
logger.debug(f"Checking password file: {file_path}")
sensitive_entries = self._detect_sensitive_info(file_path)

for entry in sensitive_entries:


# Continuing from password_checker.py
issues.append({
"file": entry["file"],
"issue": f"Weak {entry['type']} detected",
"rule": "Password Policy",
"severity": "High",
"recommendation": "Replace with a stronger
credential",
"details": f"Line {entry['line']}: {entry['value']}
- fails security requirements"
})

# Second pass: sample other files that might contain


credentials
for root, _, files in os.walk(self.directory):
for file in files:
file_path = os.path.join(root, file)
if not self._is_password_file(file_path):
# Check file extension for common code/config files
ext = os.path.splitext(file_path)[1].lower()
if ext in ['.py', '.js', '.php', '.java', '.xml', '.yaml',
'.json', '.ini',
'.conf']:
logger.debug(f"Checking potential credential file:
{file_path}")
sensitive_entries =
self._detect_sensitive_info(file_path)

for entry in sensitive_entries:


issues.append({
"file": entry["file"],
"issue": f"Weak {entry['type']} detected",
"rule": "Password Policy",
"severity": "High",
"recommendation": "Replace with a stronger
credential",
"details": f"Line {entry['line']}:
{entry['value']} - fails security requirements"
})

except Exception as e:
logger.error(f"Error during password check: {str(e)}")
issues.append({"error": f"Error during password check: {str(e)}"})

logger.info(f"Password check completed. Found {len(issues)} weak


credentials.")
return issues

#import csv
import json
import os
from typing import Dict, List, Optional
from reportlab.lib.pagesizes import letter
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
from reportlab.lib.styles import getSampleStyleSheet
from colorama import Fore, Style
from helper_utils import sanitize_output
class ReportGenerator:
@staticmethod
def _ensure_dir(output_file: str):
"""Ensure the output directory exists."""
if output_file:
os.makedirs(os.path.dirname(output_file), exist_ok=True)

@staticmethod
def _get_rule(item: Dict) -> str:
"""Get the rule name from an issue item."""
return item.get("rule", "Unknown Rule")

@staticmethod
def _get_severity(issue: str) -> str:
"""Categorize issue severity."""
critical_issues = ["Encryption Failure", "Weak Password", "Unpatched
Vulnerability"]
moderate_issues = ["Access Control Violation", "Outdated Software"]
return "Critical" if issue in critical_issues else "Moderate" if issue in
moderate_issues else "Low"

@staticmethod
def generate_csv(issues: List[Dict], output_file: str):
"""Generate a CSV report."""
ReportGenerator._ensure_dir(output_file)
with open(output_file, "w", newline="", encoding="utf-8") as file:
file.write(ReportGenerator.generate_csv_string(issues))

@staticmethod
def generate_csv_string(issues: List[Dict]) -> str:
"""Generate CSV content as a string."""
from io import StringIO

output = StringIO()
writer = csv.writer(output)
writer.writerow(["Type", "Item", "Issue", "Severity", "Rule",
"Recommendation", "Details"])
for item in issues:
details = item.get("details", "N/A")
severity = ReportGenerator._get_severity(item.get("issue", "Unknown
Issue"))
writer.writerow([
"Software" if "package" in item else "File",
item.get("package", item.get("file", "N/A")),
item.get("issue", "Unknown Issue"),
severity,
ReportGenerator._get_rule(item),
item.get("recommendation", "N/A"),
details
])
return output.getvalue()

@staticmethod
def generate_json(issues: List[Dict], output_file: str):
"""Generate a JSON report."""
ReportGenerator._ensure_dir(output_file)
with open(output_file, "w", encoding="utf-8") as file:
file.write(ReportGenerator.generate_json_string(issues))
@staticmethod
def generate_json_string(issues: List[Dict]) -> str:
"""Generate JSON content as a string."""
report_data = {
"issues": issues,
"summary": {
"total_issues": len(issues),
"critical_issues": sum(
1 for i in issues if
ReportGenerator._get_severity(i.get("issue")) == "Critical"),
"moderate_issues": sum(
1 for i in issues if
ReportGenerator._get_severity(i.get("issue")) == "Moderate"),
"low_issues": sum(1 for i in issues if
ReportGenerator._get_severity(i.get("issue")) == "Low")
}
}
return json.dumps(report_data, indent=4)

@staticmethod
def generate_pdf(issues: List[Dict], output_file: str):
"""Generate a PDF report."""
ReportGenerator._ensure_dir(output_file)
doc = SimpleDocTemplate(output_file, pagesize=letter)
styles = getSampleStyleSheet()
story = []

# Title
story.append(Paragraph("Compliance Report", styles["Title"]))

# Summary
summary_text = f"Total Issues Found: {len(issues)}"
story.append(Paragraph(summary_text, styles["BodyText"]))

# Table data
table_data = [["Type", "Item", "Issue", "Severity", "Rule",
"Recommendation", "Details"]]
for item in issues:
details = item.get("details", "N/A")
severity = ReportGenerator._get_severity(item.get("issue", "Unknown
Issue"))
table_data.append([
"Software" if "package" in item else "File",
item.get("package", item.get("file", "N/A")),
item.get("issue", "Unknown Issue"),
severity,
ReportGenerator._get_rule(item),
item.get("recommendation", "N/A"),
details
])

# Table style
table = Table(table_data)
table.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#003366")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
("ALIGN", (0, 0), (-1, -1), "CENTER"),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, 0), 10),
("BOTTOMPADDING", (0, 0), (-1, 0), 12),
("BACKGROUND", (0, 1), (-1, -1), colors.HexColor("#F0F8FF")),
("GRID", (0, 0), (-1, -1), 1, colors.black),
("WORDWRAP", (0, 0), (-1, -1)),
]))
story.append(table)

doc.build(story)

@staticmethod
@staticmethod
def generate_text(issues: List[Dict], output_file: Optional[str] = None,
**kwargs) -> str:
report = [
f"{Fore.CYAN}=== Compliance Report ==={Style.RESET_ALL}",
f"{Fore.YELLOW}Total Issues: {len(issues)}{Style.RESET_ALL}\n"
]

current_file = None
for item in issues:
if "file" in item:
if item["file"] != current_file:
report.append(f"\n{Fore.GREEN}File: {item['file']}
{Style.RESET_ALL}")
current_file = item["file"]
report.extend([
f"{Fore.RED}• Issue: {item.get('issue', 'Unknown')}",
f" Severity: {item.get('severity', 'Low')}",
f" Rule: {item.get('rule', 'Unknown')}",
f" Recommendation: {item.get('recommendation', 'N/A')}",
"-" * 50
])

report_str = "\n".join(report)
if output_file:
with open(output_file, "w") as f:
f.write(report_str.replace(Fore.CYAN, "").replace(Style.RESET_ALL,
""))
return f"Report saved to {output_file}"
return sanitize_output(report_str)

@staticmethod
def generate_report(issues: List[Dict], output_file: str, report_format: str =
"text"):
"""Unified report generation interface."""
report_format = report_format.lower() # Fix: Use the parameter name
correctly
if report_format == "csv":
ReportGenerator.generate_csv(issues, output_file)
elif report_format == "json":
ReportGenerator.generate_json(issues, output_file)
elif report_format == "pdf":
ReportGenerator.generate_pdf(issues, output_file)
elif report_format == "text":
ReportGenerator.generate_text(issues, output_file)
else:
raise ValueError(f"Unsupported format: {report_format}")

#scanner.py
import os
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
from typing import Dict, List, Any
from compliance_rules import ALL_RULES

logger = logging.getLogger(__name__)

class FileScanner:
def __init__(self, directory: str, framework: str, max_workers: int = 4,
max_file_size: int =10 * 1024 * 1024):
self.directory = directory
self.framework = framework.upper()
self.max_workers = max(1, min(max_workers, 16)) # Limit between 1 and 16
self.max_file_size = max_file_size # Default: 10MB
self.file_queue = []

# Validate framework
if self.framework not in ALL_RULES:
raise ValueError(
f"Unsupported framework: {self.framework}. Available frameworks:
{', '.join(ALL_RULES.keys())}")

self.rules = ALL_RULES[self.framework]
logger.info(f"Initialized scanner for {self.framework} framework with
{self.max_workers} workers")

def set_max_file_size(self, size_in_bytes: int) -> None:


self.max_file_size = size_in_bytes
logger.info(f"Max file size to scan (in bytes).")

def scan_for_issues(self) -> List[Dict[str, Any]]:


"""Scan directory for compliance issues."""
issues = []

# Validate directory
if not os.path.exists(self.directory):
logger.error(f"Directory not found: {self.directory}")
return [{"error": f"Directory '{self.directory}' not found"}]

if not os.path.isdir(self.directory):
logger.error(f"Not a directory: {self.directory}")
return [{"error": f"'{self.directory}' is not a directory"}]

# Collect files to scan


try:
self._collect_files()
except Exception as e:
logger.error(f"Error collecting files: {str(e)}")
return [{"error": f"Error collecting files: {str(e)}"}]

if not self.file_queue:
logger.warning(f"No files found in {self.directory}")
return [{"warning": "No files found to scan"}]

logger.info(f"Found {len(self.file_queue)} files to scan")


# Scan files in parallel
with tqdm(total=len(self.file_queue), desc="Scanning", unit="file") as
pbar:
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
futures = [executor.submit(self._process_file, file_path) for
file_path in self.file_queue]

for future in futures:


try:
file_issues = future.result()
if file_issues:
issues.extend(file_issues)
except Exception as e:
logger.error(f"Error processing file: {str(e)}")
issues.append({"error": f"Error processing file:
{str(e)}"})
finally:
pbar.update(1)

logger.info(f"Scan completed. Found {len(issues)} issues.")


return issues

def _collect_files(self) -> None:


"""Collect files to scan from the directory."""
self.file_queue = []
for root, _, files in os.walk(self.directory):
for file in files:
file_path = os.path.join(root, file)
# Skip very large files and binary files
try:
if os.path.getsize(file_path) > self.max_file_size: # 10MB
logger.debug(f"Skipping large file: {file_path}")
continue
self.file_queue.append(file_path)
except OSError as e:
logger.debug(f"Cannot access file {file_path}: {str(e)}")

def _process_file(self, file_path: str) -> List[Dict[str, Any]]:


"""Process a single file against all rules."""
file_issues = []

for rule_name, rule in self.rules.items():


try:
if not rule["check"](file_path):
file_issues.append({
"file": file_path,
"issue": rule["description"],
"severity": rule["severity"],
"rule": rule_name,
"recommendation": rule["recommendation"],
"details": f"Failed {rule_name} check"
})
except Exception as e:
logger.error(f"Error checking {rule_name} for {file_path}:
{str(e)}")
file_issues.append({
"file": file_path,
"issue": f"Error checking {rule_name}",
"severity": "Error",
"rule": rule_name,
"recommendation": "Check logs for details",
"details": str(e)
})

return file_issues

#software_checker.py
import subprocess
import platform
import json
import logging
import re
from typing import Dict, List, Any, Optional, Tuple
from packaging.version import parse as parse_version, Version, InvalidVersion
from tqdm import tqdm

logger = logging.getLogger(__name__)

class SoftwareChecker:
def __init__(self, verbose: bool = False):
self.verbose = verbose
self.cache = {} # Cache for API responses

# Configure logger
if verbose:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)

logger.info("Initialized software checker")

def check_outdated_software(self) -> List[Dict[str, Any]]:


"""Check for outdated software packages."""
outdated_packages = []

try:
# Get installed packages
packages = self.get_installed_software()
total_packages = len(packages)

if not packages:
logger.warning("No software packages found to check")
return [{
"warning": "No software packages found to check",
"rule": "Software Updates",
"recommendation": "Ensure package managers are properly
installed"
}]

logger.info(f"Checking {total_packages} installed packages")

# Check each package


with tqdm(total=total_packages, desc="Checking packages", unit="pkg",
disable=not self.verbose) as pbar:
for name, version in packages.items():
try:
# Skip packages with invalid versions
if not version or not self._is_valid_version(version):
pbar.update(1)
continue

# Get latest version


latest_version, source = self._get_latest_version(name)

if latest_version and self._is_outdated(version,


latest_version):
severity = self._determine_severity(version,
latest_version)
outdated_packages.append({
"package": name,
"issue": "Outdated software",
"severity": severity,
"rule": "Software Updates",
"recommendation": f"Update {name} from {version} to
{latest_version}",
"details": f"Current: {version}, Latest:
{latest_version}, Source: {source}"
})

except Exception as e:
logger.debug(f"Error checking {name}: {str(e)}")
finally:
pbar.update(1)

logger.info(f"Software check completed. Found {len(outdated_packages)}


outdated packages.")

except Exception as e:
logger.error(f"Error during software check: {str(e)}")
return [{
"error": f"Error during software check: {str(e)}",
"rule": "Software Updates",
"recommendation": "Check logs for more details"
}]

return outdated_packages

def get_installed_software(self) -> Dict[str, str]:


"""Get installed software packages based on OS."""
os_type = platform.system()

if os_type == "Linux":
return self._get_linux_packages()
elif os_type == "Windows":
return self._get_windows_packages()
elif os_type == "Darwin": # macOS
return self._get_macos_packages()
else:
logger.warning(f"Unsupported OS: {os_type}")
return {}

def _get_linux_packages(self) -> Dict[str, str]:


"""Get installed packages on Linux."""
packages = {}

# Try apt (Debian/Ubuntu)


if self._command_exists("apt"):
try:
output = subprocess.check_output(
["apt", "list", "--installed"],
text=True,
encoding="utf-8",
errors="replace"
)

for line in output.split("\n"):


if "/" in line:
parts = line.split("/")[0].split()
if len(parts) >= 2:
packages[parts[0]] = parts[1]

if packages:
return packages
except Exception as e:
logger.debug(f"Error getting apt packages: {str(e)}")

# Try dpkg (Debian/Ubuntu)


if self._command_exists("dpkg"):
try:
output = subprocess.check_output(
["dpkg", "-l"],
text=True,
encoding="utf-8",
errors="replace"
)

for line in output.split("\n"):


if line.startswith("ii "):
parts = line.split()
if len(parts) >= 3:
packages[parts[1]] = parts[2]

if packages:
return packages
except Exception as e:
logger.debug(f"Error getting dpkg packages: {str(e)}")

# Try rpm (Red Hat/CentOS/Fedora)


if self._command_exists("rpm"):
try:
output = subprocess.check_output(
["rpm", "-qa", "--queryformat", "%{NAME} %{VERSION}\n"],
text=True,
encoding="utf-8",
errors="replace"
)

for line in output.split("\n"):


if line.strip():
parts = line.split()
if len(parts) >= 2:
packages[parts[0]] = parts[1]

if packages:
return packages
except Exception as e:
logger.debug(f"Error getting rpm packages: {str(e)}")

# Try pacman (Arch Linux)


if self._command_exists("pacman"):
try:
output = subprocess.check_output(
["pacman", "-Q"],
text=True,
encoding="utf-8",
errors="replace"
)

for line in output.split("\n"):


if line.strip():
parts = line.split()
if len(parts) >= 2:
packages[parts[0]] = parts[1]

if packages:
return packages
except Exception as e:
logger.debug(f"Error getting pacman packages: {str(e)}")

# Try pip (Python)


if self._command_exists("pip"):
try:
output = subprocess.check_output(
["pip", "list", "--format=json"],
text=True,
encoding="utf-8",
errors="replace"
)

pip_packages = json.loads(output)
for pkg in pip_packages:
packages[pkg["name"]] = pkg["version"]

if packages:
return packages
except Exception as e:
logger.debug(f"Error getting pip packages: {str(e)}")

return packages

def _get_windows_packages(self) -> Dict[str, str]:


"""Get installed packages on Windows."""
packages = {}

try:
# Use PowerShell to get installed applications
output = subprocess.check_output(
["powershell", "-Command",
"Get-WmiObject -Class Win32_Product | Select-Object Name, Version
| ConvertTo-Json"],
text=True,
encoding="utf-8",
errors="replace"
)
if output.strip():
try:
apps = json.loads(output)
if isinstance(apps, dict): # Single app
packages[apps["Name"]] = apps["Version"]
else: # Multiple apps
for app in apps:
if app["Name"] and app["Version"]:
packages[app["Name"]] = app["Version"]
except Exception as e:
logger.debug(f"Error parsing Windows packages: {str(e)}")

except Exception as e:
logger.debug(f"Error getting Windows packages: {str(e)}")

return packages

def _get_macos_packages(self) -> Dict[str, str]:


"""Get installed packages on macOS."""
packages = {}

# Try brew (Homebrew)


if self._command_exists("brew"):
try:
output = subprocess.check_output(
["brew", "list", "--versions"],
text=True,
encoding="utf-8",
errors="replace"
)

for line in output.split("\n"):


if line.strip():
parts = line.split()
if len(parts) >= 2:
packages[parts[0]] = parts[1]

if packages:
return packages
except Exception as e:
logger.debug(f"Error getting brew packages: {str(e)}")

# Try system_profiler for macOS applications


try:
output = subprocess.check_output(
["system_profiler", "SPApplicationsDataType", "-json"],
text=True,
encoding="utf-8",
errors="replace"
)

apps_data = json.loads(output)
if "SPApplicationsDataType" in apps_data:
for app in apps_data["SPApplicationsDataType"]:
if "_name" in app and "version" in app:
packages[app["_name"]] = app["version"]

except Exception as e:
logger.debug(f"Error getting macOS apps: {str(e)}")

return packages

def _get_latest_version(self, package_name: str) -> Tuple[Optional[str], str]:


"""Get the latest version of a package."""
# Check cache first
if package_name in self.cache:
return self.cache[package_name]

if not re.match(r"^[a-zA-Z0-9_-]+$", package_name):


logger.debug(f"Skipping invalid package name: {package_name}")
return None, "Invalid package name"

# Skip known non-Python packages


non_python_packages = ["Microsoft Visual C++", "Office", "HP", "Oracle
VirtualBox"]
if any(npp in package_name for npp in non_python_packages):
logger.debug(f"Skipping non-Python package: {package_name}")
return None, "Non-Python package"

# Try multiple sources


sources = [
self._check_pypi,
self._check_github,
self._check_npm
]

for source_check in sources:


try:
result = source_check(package_name)
if result[0]:
# Cache the result
self.cache[package_name] = result
return result
except Exception as e:
logger.debug(f"Error checking {source_check.__name__} for
{package_name}: {str(e)}")

# If no source found the package


return None, "Unknown"

def _check_pypi(self, package_name: str) -> Tuple[Optional[str], str]:


"""Check PyPI for package version."""
try:
# Use pip to check PyPI
output = subprocess.check_output(
["pip", "install", f"{package_name}==", "--dry-run"],
stderr=subprocess.STDOUT,
text=True,
encoding="utf-8",
errors="replace"
)

# Extract version from output


match = re.search(
r"Could not find a version that satisfies the requirement.*\(from
versions: (.*)\)", output)
if match:
versions = match.group(1).split(", ")
if versions:
latest = versions[-1]
return latest, "PyPI"

except Exception as e:
logger.debug(f"PyPI check failed for {package_name}: {str(e)}")

return None, "PyPI"

def _check_github(self, package_name: str) -> Tuple[Optional[str], str]:


"""Check GitHub for package version."""
try:
# Use curl to check GitHub API
output = subprocess.check_output(
["curl", "-s",
f"https://api.github.com/repos/{package_name}/releases/latest"],
text=True,
encoding="utf-8",
errors="replace"
)

data = json.loads(output)
if "tag_name" in data:
version = data["tag_name"].lstrip("v")
return version, "GitHub"

except Exception as e:
logger.debug(f"GitHub check failed for {package_name}: {str(e)}")

return None, "GitHub"

def _check_npm(self, package_name: str) -> Tuple[Optional[str], str]:


"""Check NPM for package version."""
try:
# Use npm to check registry
output = subprocess.check_output(
["npm", "view", package_name, "version"],
text=True,
encoding="utf-8",
errors="replace"
)

if output.strip():
return output.strip(), "NPM"

except Exception as e:
logger.debug(f"NPM check failed for {package_name}: {str(e)}")

return None, "NPM"

def _is_valid_version(self, version: str) -> bool:


"""Check if version string is valid."""
try:
parse_version(version)
return True
except InvalidVersion:
return False
def _is_outdated(self, current: str, latest: str) -> bool:
"""Check if current version is outdated compared to latest."""
try:
current_v = parse_version(current)
latest_v = parse_version(latest)
return current_v < latest_v
except InvalidVersion:
# If version parsing fails, do string comparison
return current != latest

def _determine_severity(self, current: str, latest: str) -> str:


"""Determine severity of outdated software."""
try:
current_v = parse_version(current)
latest_v = parse_version(latest)

# Major version difference


if latest_v.major > current_v.major:
return "High"

# Minor version difference


if latest_v.minor > current_v.minor:
return "Medium"

# Patch version difference


if latest_v.micro > current_v.micro:
return "Low"

return "Low"
except (InvalidVersion, AttributeError):
# If version parsing fails, assume medium severity
return "Medium"

def _command_exists(self, cmd: str) -> bool:


"""Check if a command exists."""
try:
subprocess.check_output(["which", cmd], stderr=subprocess.STDOUT)
return True
except subprocess.CalledProcessError:
return False

#main.py
import argparse
import os
import time
import sys
import logging
from typing import List, Dict, Any
from concurrent.futures import ThreadPoolExecutor
from abc import ABC, abstractmethod
from colorama import Fore, Style, init

# Initialize colorama with auto-reset


init(autoreset=True)

VERSION = "2.2.0"
CONFIG_FILE = "config/compliance_settings.json"

# Import helper functions


from helper_utils import (
sanitize_output,
get_os_info,
parse_requirements,
get_config,
save_config,
get_file_type
)

# Define base checker class


class ComplianceChecker(ABC):
"""Base class for all compliance checkers."""

def __init__(self, scan_path: str, framework: str, verbose: bool = False):


self.scan_path = scan_path
self.framework = framework
self.verbose = verbose

@abstractmethod
def check(self) -> List[Dict[str, Any]]:
"""Run the compliance check and return list of issues."""
pass

# Custom logging filter to sanitize log messages


class SanitizationFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
# Replace the message with its sanitized version.
record.msg = sanitize_output(record.getMessage())
return True

# Configure logging
def setup_logging(log_file: str = 'compliance.log', verbose: bool = False) -> None:
"""Set up logging configuration."""
log_level = logging.DEBUG if verbose else logging.INFO

# Create logs directory if it doesn't exist


log_dir = os.path.dirname(log_file)
if log_dir and not os.path.exists(log_dir):
os.makedirs(log_dir, exist_ok=True)

logging.basicConfig(
filename=log_file,
level=log_level,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)

# Add console handler if verbose


if verbose:
console = logging.StreamHandler()
console.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)

logging.getLogger().addFilter(SanitizationFilter())
def print_status(message: str, color: str = Fore.WHITE) -> None:
"""Print status message with timestamp and color."""
safe_message = sanitize_output(message)
print(f"{color}[{time.strftime('%H:%M:%S')}] {safe_message}")

def parse_arguments() -> argparse.Namespace:


"""Parse command line arguments with improved structure."""
parser = argparse.ArgumentParser(
description=f"{Fore.CYAN}Compliance Automation Tool {VERSION}
{Style.RESET_ALL}",
formatter_class=argparse.RawTextHelpFormatter,
epilog=r"""\
{Fore.YELLOW}Examples:{Style.RESET_ALL}
Basic scan: {Fore.GREEN}python src/main.py --scan data --framework
GDPR{Style.RESET_ALL}
Full checks: {Fore.GREEN}python src/main.py --scan data --check-
all{Style.RESET_ALL}
PDF report: {Fore.GREEN}python src/main.py --scan data --output
report.pdf --output-format pdf{Style.RESET_ALL}
Save settings: {Fore.GREEN}python src/main.py --scan data --framework
HIPAA --save-config{Style.RESET_ALL}"""
)

# Scan options
scan_group = parser.add_argument_group("Scan options")
scan_group.add_argument("-s", "--scan", type=str, metavar="PATH",
help="Directory to scan")
scan_group.add_argument("-f", "--framework", type=str,
choices=["GDPR", "HIPAA", "PCI-DSS", "NIST", "SOC2",
"ISO27001"],
default="GDPR",
help="Compliance framework (default: GDPR)")
scan_group.add_argument("-t", "--threads", type=int, default=4,
help="Number of threads for scanning (default: 4)")
scan_group.add_argument("--exclude", type=str, metavar="PATTERNS",
help="Comma-separated patterns to exclude from
scanning")
scan_group.add_argument("--max-file-size", type=int, default=10,
help="Maximum file size to scan in MB (default: 10)")
scan_group.add_argument("--incremental", action="store_true",
help="Only scan files modified since last scan")

# Check options
check_group = parser.add_argument_group("Check options")
check_group.add_argument("-p", "--check-passwords", action="store_true",
help="Enable password checks")
check_group.add_argument("-u", "--check-outdated", action="store_true",
help="Check for outdated software packages")
check_group.add_argument("--check-permissions", action="store_true",
help="Check file and directory permissions")
check_group.add_argument("--check-all", action="store_true",
help="Enable all checks")
check_group.add_argument("--check-dependencies", action="store_true",
help="Check project dependencies against requirements
file")
check_group.add_argument("--min-severity", type=str,
choices=["info", "low", "medium", "high", "critical"],
default="low",
help="Minimum severity level to report (default:
low)")

# Output options
output_group = parser.add_argument_group("Output options")
output_group.add_argument("-o", "--output", type=str, metavar="FILE",
help="Output report file")
output_group.add_argument("-F", "--output-format", type=str,
choices=["csv", "json", "pdf", "text", "html",
"xml"],
default="text",
help="Report format (default: text)")
output_group.add_argument("--output-all", action="store_true",
help="Generate reports in all available formats")
output_group.add_argument("-v", "--verbose", action="store_true",
help="Show detailed processing information")
output_group.add_argument("--log-file", type=str,
default="logs/compliance.log",
help="Log file location (default:
logs/compliance.log)")
output_group.add_argument("--version", action="version",
version=f"%(prog)s {VERSION}",
help="Show version information")
output_group.add_argument("--save-config", action="store_true",
help="Save current settings as default
configuration")
output_group.add_argument("--profile", type=str, default="default",
help="Configuration profile to use or save (default:
default)")
output_group.add_argument("--progress", action="store_true",
help="Show progress bar for long operations")

# Advanced options
advanced_group = parser.add_argument_group("Advanced options")
advanced_group.add_argument("--interactive", action="store_true",
help="Run in interactive mode")
advanced_group.add_argument("--debug", action="store_true",
help="Enable debug mode")
advanced_group.add_argument("--cache-dir", type=str, default=".cache",
help="Cache directory for incremental scans")

return parser.parse_args()

def load_checker(checker_type: str, scan_path: str, framework: str, verbose: bool =


False) -> ComplianceChecker:
"""Dynamically load and instantiate the appropriate checker class."""
if checker_type == "file":
from scanner import FileScanner
# Ensure FileScanner is a subclass of ComplianceChecker.
return FileScanner(scan_path, framework, threads=4)
elif checker_type == "password":
from password_checker import PasswordChecker
return PasswordChecker(scan_path)
elif checker_type == "permission":
from permission_checker import PermissionChecker
return PermissionChecker(scan_path, framework)
elif checker_type == "software":
from software_checker import SoftwareChecker
return SoftwareChecker(verbose)
else:
raise ValueError(f"Unknown checker type: {checker_type}")

def check_project_dependencies(requirements_file: str = "requirements.txt",


verbose: bool = False) -> List[Dict[str, Any]]:
"""Check project dependencies against requirements file."""
issues = []

try:
if not os.path.exists(requirements_file):
print_status(f"Requirements file not found: {requirements_file}",
Fore.YELLOW)
return issues

print_status("Checking project dependencies...", Fore.BLUE)


packages = parse_requirements(requirements_file)

if verbose:
print_status(f"Found {len(packages)} required packages", Fore.BLUE)

# Here you'd implement logic to check installed versions


for package, required_version in packages.items():
if required_version == "latest":
continue

# Sample issue detection (to be replaced with actual version checking)


if package in ["requests", "django", "flask"]: # Just for demo
issues.append({
"type": "dependency",
"severity": "medium",
"description": f"Package {package} has required version
{required_version} but may need updating",
"location": requirements_file,
"framework": "security-best-practices"
})

except Exception as e:
logging.error(f"Error checking dependencies: {str(e)}")
print_status(f"Error checking dependencies: {str(e)}", Fore.RED)

return issues

def analyze_file_types(scan_path: str) -> Dict[str, int]:


"""Analyze and count file types in the scanned directory."""
file_types = {}

for root, _, files in os.walk(scan_path):


for file in files:
file_path = os.path.join(root, file)
try:
file_type = get_file_type(file_path)
file_types[file_type] = file_types.get(file_type, 0) + 1
except Exception as e:
logging.debug(f"Error detecting file type for {file_path}:
{str(e)}")

return file_types

def run_checks(args: argparse.Namespace) -> List[Dict[str, Any]]:


"""Run all requested compliance checks."""
all_issues = []
scan_path = args.scan

if not scan_path:
print_status("No scan path specified. Use --scan to specify a directory.",
Fore.RED)
sys.exit(1)

if not os.path.exists(scan_path):
print_status(f"Path not found: {scan_path}", Fore.RED)
sys.exit(1)

# Normalize path
scan_path = os.path.abspath(scan_path)

# Process exclude patterns


exclude_patterns = []
if args.exclude:
exclude_patterns = [pattern.strip() for pattern in args.exclude.split(',')]
print_status(f"Excluding patterns: {', '.join(exclude_patterns)}",
Fore.YELLOW)

try:
# Get OS info for context
os_info = get_os_info()
logging.info(f"Running on {os_info['system']} {os_info['release']}
{os_info['machine']}")

# Analyze file types if verbose


if args.verbose:
file_types = analyze_file_types(scan_path)
print_status("File types found in scanned directory:", Fore.BLUE)
for file_type, count in sorted(file_types.items(), key=lambda x: x[1],
reverse=True):
print(f" {file_type}: {count}")

# Check for modified files only if incremental scan enabled


if args.incremental:
from file_tracker import FileTracker
tracker = FileTracker(args.cache_dir)
modified_files = tracker.get_modified_files(scan_path)
if args.verbose:
print_status(f"Incremental scan: {len(modified_files)} files
modified since last scan", Fore.BLUE)
if not modified_files:
print_status("No modified files found. Skipping scan.", Fore.GREEN)
return all_issues
with ThreadPoolExecutor(max_workers=args.threads) as executor:
futures = []
if args.verbose:
print_status("Starting compliance scan...", Fore.BLUE)
from scanner import FileScanner
scanner = FileScanner(scan_path, args.framework, args.threads)
if exclude_patterns:
scanner.set_exclude_patterns(exclude_patterns)
scanner.set_max_file_size(args.max_file_size * 1024 * 1024)
futures.append(executor.submit(scanner.scan_for_issues))

if args.check_passwords or args.check_all:
if args.verbose:
print_status("Checking for password issues...", Fore.BLUE)
from password_checker import PasswordChecker
checker = PasswordChecker(scan_path)
futures.append(executor.submit(checker.check_passwords))

if args.check_permissions or args.check_all:
if args.verbose:
print_status("Checking file and directory permissions...",
Fore.BLUE)
from permission_checker import PermissionChecker
perm_checker = PermissionChecker(scan_path, args.framework)
futures.append(executor.submit(perm_checker.check_permissions))
if args.check_outdated or args.check_all:
if args.verbose:
print_status("Checking for outdated software...", Fore.BLUE)
from software_checker import SoftwareChecker
sw_checker = SoftwareChecker(args.verbose)
futures.append(executor.submit(sw_checker.check_outdated_software))

if args.check_dependencies or args.check_all:
futures.append(executor.submit(check_project_dependencies,
"requirements.txt", args.verbose))

# Collect results
for future in futures:
try:
result = future.result()
if result:
all_issues.extend(result)
except Exception as e:
logging.error(f"Error in compliance check: {str(e)}")
print_status(f"Error in compliance check: {str(e)}", Fore.RED)

if args.min_severity:
severity_levels = {"info": 0, "low": 1, "medium": 2, "high": 3,
"critical": 4}
min_level = severity_levels.get(args.min_severity, 1)
all_issues = [issue for issue in all_issues if
severity_levels.get(issue.get("severity", "low"), 1) >= min_level]
if args.incremental:
tracker.update_file_cache(scan_path)

except KeyboardInterrupt:
print_status("\nOperation cancelled by user", Fore.YELLOW)
sys.exit(1)
except Exception as e:
logging.exception("Critical error during checks")
print_status(f"Critical error: {str(e)}", Fore.RED)
sys.exit(1)

return all_issues
def generate_all_reports(issues: List[Dict[str, Any]], base_filename: str, context:
Dict[str, Any]) -> None:
"""Generate reports in all available formats."""
from report_generator import ReportGenerator

formats = ["text", "csv", "json", "pdf", "html", "xml"]


for fmt in formats:
output_file = f"{base_filename}.{fmt}"
try:
ReportGenerator.generate_report(issues, output_file, fmt)
print_status(f"Report saved to {output_file}", Fore.GREEN)
except Exception as e:
logging.error(f"Error generating {fmt} report: {str(e)}")
print_status(f"Error generating {fmt} report: {str(e)}", Fore.RED)

def run_interactive_mode(args: argparse.Namespace) -> None:


"""Run the tool in interactive mode."""
print(f"{Fore.CYAN}Welcome to Compliance Automation Tool v{VERSION} -
Interactive Mode{Style.RESET_ALL}")
print("This mode will guide you through the compliance checking process.")
scan_path = input(f"{Fore.YELLOW}Enter directory to scan [{args.scan or '.'}]:
{Style.RESET_ALL}")
scan_path = scan_path.strip() or args.scan or '.'
frameworks = ["GDPR", "HIPAA", "PCI-DSS", "NIST", "SOC2", "ISO27001"]
print(f"{Fore.YELLOW}Available frameworks:{Style.RESET_ALL}")
for i, framework in enumerate(frameworks, 1):
print(f" {i}. {framework}")
framework_choice = input(f"{Fore.YELLOW}Select framework [1-{len(frameworks)}]
(default: 1): {Style.RESET_ALL}")
try:
framework_idx = int(framework_choice) - 1 if framework_choice.strip() else
0
framework = frameworks[framework_idx]
except (ValueError, IndexError):
framework = "GDPR"
print(f"{Fore.YELLOW}Invalid choice. Using default: {framework}
{Style.RESET_ALL}")
checks = []
print(f"{Fore.YELLOW}Select checks to perform:{Style.RESET_ALL}")
print(" 1. File scan (always enabled)")
print(" 2. Password checks")
print(" 3. Permission checks")
print(" 4. Outdated software checks")
print(" 5. Dependency checks")
print(" 6. All checks")
check_choice = input(f"{Fore.YELLOW}Enter check numbers (comma-separated) or 6
for all: {Style.RESET_ALL}")

check_all = False
if "6" in check_choice or not check_choice.strip():
check_all = True
else:
if "2" in check_choice:
checks.append("password")
if "3" in check_choice:
checks.append("permission")
if "4" in check_choice:
checks.append("software")
if "5" in check_choice:
checks.append("dependency")
formats = ["text", "csv", "json", "pdf", "html", "xml"]
print(f"{Fore.YELLOW}Available output formats:{Style.RESET_ALL}")
for i, fmt in enumerate(formats, 1):
print(f" {i}. {fmt}")
format_choice = input(f"{Fore.YELLOW}Select output format [1-{len(formats)}]
(default: 1): {Style.RESET_ALL}")
try:
format_idx = int(format_choice) - 1 if format_choice.strip() else 0
output_format = formats[format_idx]
except (ValueError, IndexError):
output_format = "text"
print(f"{Fore.YELLOW}Invalid choice. Using default: {output_format}
{Style.RESET_ALL}")
output_file = input(f"{Fore.YELLOW}Enter output file (leave empty for console
output): {Style.RESET_ALL}")
args.scan = scan_path
args.framework = framework
args.check_all = check_all
args.check_passwords = check_all or "password" in checks
args.check_permissions = check_all or "permission" in checks
args.check_outdated = check_all or "software" in checks
args.check_dependencies = check_all or "dependency" in checks
args.output_format = output_format
args.output = output_file if output_file.strip() else None

print(f"{Fore.GREEN}Starting compliance checks...{Style.RESET_ALL}")

def main() -> None:


"""Main function to run the compliance tool."""
# Parse command line arguments
args = parse_arguments()
setup_logging(args.log_file, args.verbose)
logging.info(f"Starting Compliance Automation Tool v{VERSION}")
config = get_config(CONFIG_FILE)
if args.interactive:
run_interactive_mode(args)
if args.save_config:
config.update({
"framework": args.framework,
"threads": args.threads,
"check_passwords": args.check_passwords,
"check_outdated": args.check_outdated,
"check_permissions": args.check_permissions,
"check_dependencies": args.check_dependencies,
"output_format": args.output_format,
"verbose": args.verbose,
"max_file_size": args.max_file_size,
"min_severity": args.min_severity,
"incremental": args.incremental
})
if args.profile != "default":
config["profiles"] = config.get("profiles", {})
config["profiles"][args.profile] = config.copy()
if save_config(config, CONFIG_FILE):
print_status(f"Configuration saved to {CONFIG_FILE}", Fore.GREEN)
else:
print_status(f"Failed to save configuration to {CONFIG_FILE}",
Fore.RED)

if args.profile != "default" and config.get("profiles", {}).get(args.profile):


profile_config = config["profiles"][args.profile]
for key, value in profile_config.items():
if not hasattr(args, key) or getattr(args, key) is None:
setattr(args, key, value)

start_time = time.time()

try:
if not (args.scan or args.check_outdated):
print_status(
"No actions specified. Use --scan, --check-passwords, --check-
outdated, or --check-permissions",
Fore.RED
)
sys.exit(1)

all_issues = run_checks(args) if args.scan else []

os_info = get_os_info()
report_context = {
"os_info": os_info,
"tool_version": VERSION,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"scan_duration": time.time() - start_time,
"framework": args.framework,
"scan_path": args.scan,
"checks_performed": {
"file_scan": True if args.scan else False,
"password_check": args.check_passwords or args.check_all,
"permission_check": args.check_permissions or args.check_all,
"software_check": args.check_outdated or args.check_all,
"dependency_check": args.check_dependencies or args.check_all
}
}
if all_issues:
if args.output:
if args.output_all:
base_filename = os.path.splitext(args.output)[0]
generate_all_reports(all_issues, base_filename, report_context)
else:
from report_generator import ReportGenerator
ReportGenerator.generate_report(all_issues, args.output,
args.output_format, context=report_context)
print_status(f"Report saved to {args.output}", Fore.GREEN)
else:
from report_generator import ReportGenerator
print(ReportGenerator.generate_text(all_issues,
context=report_context))
else:
print_status("No issues found. Compliance check passed!", Fore.GREEN)

print(f"\n{Fore.CYAN}Scan duration: {time.time() -


start_time:.2f}s{Style.RESET_ALL}")
logging.info(f"Compliance check completed in {time.time() -
start_time:.2f}s")

except KeyboardInterrupt:
print_status("\nOperation cancelled by user", Fore.YELLOW)
logging.warning("Operation cancelled by user")
sys.exit(1)
except Exception as e:
logging.exception("Critical error")
print_status(f"Critical error: {str(e)}", Fore.RED)
sys.exit(1)
if __name__ == "__main__":
main()

You might also like