testCaseGenerator / model /generate.py
Syncbuz120's picture
error fixed
76f040f
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import logging
import psutil
import re
import gc
import random
from typing import List, Dict, Any
# Initialize logger
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# List of memory-optimized models
MEMORY_OPTIMIZED_MODELS = [
"gpt2", # ~500MB
"distilgpt2", # ~250MB
"microsoft/DialoGPT-small", # ~250MB
"huggingface/CodeBERTa-small-v1", # Code tasks
]
# Singleton state
_generator_instance = None
# Enhanced pattern matching for comprehensive test case generation
REQUIREMENT_PATTERNS = {
'authentication': {
'keywords': ['login', 'authentication', 'signin', 'sign in', 'password', 'username', 'credential', 'auth'],
'priority': 'High',
'category': 'Security'
},
'authorization': {
'keywords': ['permission', 'role', 'access', 'privilege', 'authorize', 'admin', 'user level'],
'priority': 'High',
'category': 'Security'
},
'data_validation': {
'keywords': ['validate', 'validation', 'input', 'format', 'check', 'verify', 'constraint'],
'priority': 'High',
'category': 'Functional'
},
'database': {
'keywords': ['database', 'db', 'store', 'save', 'persist', 'record', 'data storage', 'crud'],
'priority': 'Medium',
'category': 'Functional'
},
'performance': {
'keywords': ['performance', 'speed', 'time', 'response', 'load', 'concurrent', 'scalability'],
'priority': 'Medium',
'category': 'Performance'
},
'ui_interface': {
'keywords': ['interface', 'ui', 'user interface', 'display', 'screen', 'form', 'button', 'menu'],
'priority': 'Medium',
'category': 'UI/UX'
},
'api': {
'keywords': ['api', 'endpoint', 'service', 'request', 'response', 'rest', 'http'],
'priority': 'High',
'category': 'Integration'
},
'error_handling': {
'keywords': ['error', 'exception', 'failure', 'invalid', 'incorrect', 'wrong'],
'priority': 'High',
'category': 'Error Handling'
},
'reporting': {
'keywords': ['report', 'export', 'generate', 'analytics', 'dashboard', 'chart'],
'priority': 'Medium',
'category': 'Reporting'
},
'security': {
'keywords': ['security', 'encrypt', 'secure', 'ssl', 'https', 'token', 'session'],
'priority': 'High',
'category': 'Security'
}
}
def get_optimal_model_for_memory():
"""Select the best model based on available memory."""
available_memory = psutil.virtual_memory().available / (1024 * 1024) # MB
logger.info(f"Available memory: {available_memory:.1f}MB")
if available_memory < 300:
return None # Use template fallback
elif available_memory < 600:
return "microsoft/DialoGPT-small"
else:
return "distilgpt2"
def load_model_with_memory_optimization(model_name):
"""Load model with low memory settings."""
try:
logger.info(f"Loading {model_name} with memory optimizations...")
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left', use_fast=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="cpu",
low_cpu_mem_usage=True,
use_cache=False,
)
model.eval()
model.gradient_checkpointing_enable()
logger.info(f"✅ Model {model_name} loaded successfully")
return tokenizer, model
except Exception as e:
logger.error(f"❌ Failed to load model {model_name}: {e}")
return None, None
def analyze_requirements(text: str) -> Dict[str, Any]:
"""Analyze requirements text to identify patterns and generate appropriate test cases"""
text_lower = text.lower()
detected_patterns = {}
for pattern_name, pattern_info in REQUIREMENT_PATTERNS.items():
matches = []
for keyword in pattern_info['keywords']:
if keyword in text_lower:
# Find context around the keyword
pattern = rf'.{{0,50}}{re.escape(keyword)}.{{0,50}}'
context_matches = re.findall(pattern, text_lower, re.IGNORECASE)
matches.extend(context_matches)
if matches:
detected_patterns[pattern_name] = {
'matches': matches[:3], # Limit to 3 matches
'priority': pattern_info['priority'],
'category': pattern_info['category']
}
return detected_patterns
def generate_authentication_tests(matches: List[str]) -> List[Dict]:
"""Generate comprehensive authentication test cases"""
base_tests = [
{
"title": "Valid User Login",
"description": "Verify that users can successfully log in with valid credentials",
"preconditions": ["User account exists", "Application is accessible"],
"steps": [
"Navigate to login page",
"Enter valid username",
"Enter valid password",
"Click login button"
],
"expected": "User is successfully authenticated and redirected to dashboard/home page",
"postconditions": ["User session is created", "User is logged in"],
"test_data": "Valid username: testuser@example.com, Valid password: Test@123"
},
{
"title": "Invalid Username Login",
"description": "Verify that login fails with invalid username",
"preconditions": ["Application is accessible"],
"steps": [
"Navigate to login page",
"Enter invalid/non-existent username",
"Enter valid password format",
"Click login button"
],
"expected": "Login fails with appropriate error message 'Invalid credentials'",
"postconditions": ["User remains on login page", "Account security maintained"],
"test_data": "Valid username: testuser@example.com, Invalid password: WrongPass123"
},
{
"title": "Empty Fields Login Attempt",
"description": "Verify validation when login attempted with empty fields",
"preconditions": ["Application is accessible"],
"steps": [
"Navigate to login page",
"Leave username field empty",
"Leave password field empty",
"Click login button"
],
"expected": "Validation errors displayed for required fields",
"postconditions": ["User remains on login page", "Form validation active"],
"test_data": "Username: (empty), Password: (empty)"
},
{
"title": "SQL Injection Attack Prevention",
"description": "Verify that login form prevents SQL injection attacks",
"preconditions": ["Application is accessible"],
"steps": [
"Navigate to login page",
"Enter SQL injection payload in username field",
"Enter any password",
"Click login button"
],
"expected": "Login fails safely without database compromise or error exposure",
"postconditions": ["System security maintained", "No unauthorized access"],
"test_data": "Username: admin'; DROP TABLE users; --, Password: anypass"
}
]
return base_tests
def generate_data_validation_tests(matches: List[str]) -> List[Dict]:
"""Generate comprehensive data validation test cases"""
return [
{
"title": "Valid Data Input Validation",
"description": "Verify system accepts valid data formats correctly",
"preconditions": ["Form/API endpoint is accessible", "User has appropriate permissions"],
"steps": [
"Access the input form/endpoint",
"Enter data in valid format",
"Submit the form/request",
"Verify data is accepted"
],
"expected": "Data is accepted and processed successfully with confirmation message",
"postconditions": ["Data is stored correctly", "User receives success feedback"],
"test_data": "Valid email: user@domain.com, Valid phone: +1-234-567-8900"
},
{
"title": "Invalid Data Format Rejection",
"description": "Verify system rejects invalid data formats",
"preconditions": ["Form/API endpoint is accessible"],
"steps": [
"Access the input form/endpoint",
"Enter data in invalid format",
"Submit the form/request",
"Verify validation error is shown"
],
"expected": "System rejects invalid data with clear error message",
"postconditions": ["Invalid data is not stored", "User guided to correct format"],
"test_data": "Invalid email: notanemail, Invalid phone: 123-abc-defg"
},
{
"title": "Boundary Value Testing",
"description": "Test data validation at boundary values",
"preconditions": ["System has defined data length/value limits"],
"steps": [
"Test with minimum allowed value",
"Test with maximum allowed value",
"Test with value just below minimum",
"Test with value just above maximum"
],
"expected": "Min/max values accepted, out-of-range values rejected appropriately",
"postconditions": ["Boundary validation working correctly"],
"test_data": "Min: 1, Max: 100, Below: 0, Above: 101"
},
{
"title": "Special Characters Handling",
"description": "Verify proper handling of special characters in input",
"preconditions": ["Input fields accept text data"],
"steps": [
"Enter text with special characters (!@#$%^&*)",
"Enter text with unicode characters (émañ)",
"Enter text with HTML tags (<script>)",
"Submit and verify handling"
],
"expected": "Special characters handled safely without breaking functionality",
"postconditions": ["Data integrity maintained", "No XSS vulnerabilities"],
"test_data": "Special: Test!@#$, Unicode: Café, HTML: <b>test</b>"
}
]
def generate_performance_tests(matches: List[str]) -> List[Dict]:
"""Generate comprehensive performance test cases"""
return [
{
"title": "Response Time Under Normal Load",
"description": "Verify system response time meets requirements under normal usage",
"preconditions": ["System is running in production-like environment", "Normal user load"],
"steps": [
"Execute typical user operations",
"Measure response times for key functions",
"Record average response times",
"Compare against SLA requirements"
],
"expected": "All operations complete within specified time limits (e.g., <3 seconds)",
"postconditions": ["Performance baseline established"],
"test_data": "Target: <3 sec for page loads, <1 sec for API calls"
},
{
"title": "Load Testing with Multiple Users",
"description": "Test system performance with concurrent users",
"preconditions": ["Load testing tools configured", "Test environment ready"],
"steps": [
"Simulate 100 concurrent users",
"Execute common user workflows",
"Monitor system resources (CPU, memory)",
"Measure response times and error rates"
],
"expected": "System maintains acceptable performance with <5% error rate",
"postconditions": ["Load capacity documented", "Performance bottlenecks identified"],
"test_data": "Concurrent users: 100, Duration: 30 minutes"
},
{
"title": "Memory Usage Optimization",
"description": "Verify system memory usage remains within acceptable limits",
"preconditions": ["System monitoring tools available"],
"steps": [
"Monitor memory usage during normal operations",
"Execute memory-intensive operations",
"Check for memory leaks over extended periods",
"Verify garbage collection effectiveness"
],
"expected": "Memory usage stays within allocated limits, no memory leaks detected",
"postconditions": ["Memory optimization verified"],
"test_data": "Memory limit: 512MB, Test duration: 2 hours"
}
]
def generate_api_tests(matches: List[str]) -> List[Dict]:
"""Generate comprehensive API test cases"""
return [
{
"title": "Valid API Request Processing",
"description": "Verify API correctly processes valid requests",
"preconditions": ["API endpoint is accessible", "Valid authentication token available"],
"steps": [
"Send GET/POST request with valid parameters",
"Include proper authentication headers",
"Verify response status code",
"Validate response data structure"
],
"expected": "API returns 200 OK with expected data format",
"postconditions": ["Request logged", "Data processed correctly"],
"test_data": "Endpoint: /api/users, Method: GET, Auth: Bearer token123"
},
{
"title": "Invalid API Request Handling",
"description": "Verify API properly handles invalid requests",
"preconditions": ["API endpoint is accessible"],
"steps": [
"Send request with invalid parameters",
"Send request with missing required fields",
"Send malformed JSON in request body",
"Verify error responses"
],
"expected": "API returns appropriate error codes (400, 422) with descriptive messages",
"postconditions": ["Errors logged appropriately", "System remains stable"],
"test_data": "Invalid param: user_id='invalid', Missing: required field 'name'"
},
{
"title": "API Authentication and Authorization",
"description": "Test API security and access controls",
"preconditions": ["API requires authentication"],
"steps": [
"Send request without authentication token",
"Send request with invalid/expired token",
"Send request with valid token but insufficient permissions",
"Verify security responses"
],
"expected": "Unauthorized requests return 401/403 with security maintained",
"postconditions": ["Security audit trail created"],
"test_data": "Valid token: Bearer abc123, Invalid: Bearer expired456"
}
]
def generate_error_handling_tests(matches: List[str]) -> List[Dict]:
"""Generate comprehensive error handling test cases"""
return [
{
"title": "Graceful Error Message Display",
"description": "Verify system displays user-friendly error messages",
"preconditions": ["Error conditions can be triggered"],
"steps": [
"Trigger various error conditions",
"Verify error messages are displayed",
"Check that messages are user-friendly",
"Ensure no technical details exposed"
],
"expected": "Clear, helpful error messages shown without exposing system internals",
"postconditions": ["User experience maintained during errors"],
"test_data": "Error scenarios: network timeout, invalid input, server error"
},
{
"title": "System Recovery After Errors",
"description": "Test system's ability to recover from error states",
"preconditions": ["System can be put into error state"],
"steps": [
"Trigger system error condition",
"Verify error is handled gracefully",
"Attempt normal operations after error",
"Verify system functionality restored"
],
"expected": "System recovers fully and continues normal operation",
"postconditions": ["System stability maintained", "No data corruption"],
"test_data": "Recovery scenarios: database disconnect, memory overflow"
}
]
def generate_template_based_test_cases(srs_text: str) -> List[Dict]:
"""Generate comprehensive template-based test cases using pattern analysis"""
detected_patterns = analyze_requirements(srs_text)
all_test_cases = []
# Generate specific test cases based on detected patterns
for pattern_name, pattern_data in detected_patterns.items():
if pattern_name == 'authentication':
tests = generate_authentication_tests(pattern_data['matches'])
elif pattern_name == 'data_validation':
tests = generate_data_validation_tests(pattern_data['matches'])
elif pattern_name == 'performance':
tests = generate_performance_tests(pattern_data['matches'])
elif pattern_name == 'api':
tests = generate_api_tests(pattern_data['matches'])
elif pattern_name == 'error_handling':
tests = generate_error_handling_tests(pattern_data['matches'])
else:
# Generate generic tests for other patterns
tests = generate_generic_tests(pattern_name, pattern_data)
# Add pattern-specific metadata to each test
for i, test in enumerate(tests):
test['id'] = f"TC_{pattern_name.upper()}_{i+1:03d}"
test['priority'] = pattern_data['priority']
test['category'] = pattern_data['category']
all_test_cases.extend(tests)
# If no specific patterns detected, generate generic functional tests
if not all_test_cases:
all_test_cases = generate_generic_functional_tests(srs_text)
# Limit to reasonable number of test cases
return all_test_cases[:12]
def generate_generic_tests(pattern_name: str, pattern_data: Dict) -> List[Dict]:
"""Generate generic test cases for unspecified patterns"""
return [
{
"title": f"{pattern_name.replace('_', ' ').title()} - Positive Test",
"description": f"Verify {pattern_name.replace('_', ' ')} functionality works correctly",
"preconditions": ["System is accessible", "User has required permissions"],
"steps": [
f"Access {pattern_name.replace('_', ' ')} feature",
"Perform valid operation",
"Verify expected behavior"
],
"expected": f"{pattern_name.replace('_', ' ').title()} functionality works as expected",
"postconditions": ["System state is valid"],
"test_data": "Valid test data as per requirements"
},
{
"title": f"{pattern_name.replace('_', ' ').title()} - Negative Test",
"description": f"Verify {pattern_name.replace('_', ' ')} handles invalid scenarios",
"preconditions": ["System is accessible"],
"steps": [
f"Access {pattern_name.replace('_', ' ')} feature",
"Perform invalid operation",
"Verify error handling"
],
"expected": f"Invalid {pattern_name.replace('_', ' ')} operation handled gracefully",
"postconditions": ["System remains stable"],
"test_data": "Invalid test data to trigger error conditions"
}
]
def generate_generic_functional_tests(srs_text: str) -> List[Dict]:
"""Generate generic functional test cases when no specific patterns are detected"""
return [
{
"id": "TC_FUNC_001",
"title": "Basic System Functionality",
"priority": "High",
"category": "Functional",
"description": "Verify core system functionality works as specified",
"preconditions": ["System is deployed and accessible", "Test environment is configured"],
"steps": [
"Access the system/application",
"Navigate through main features",
"Execute primary use cases",
"Verify all functions work correctly"
],
"expected": "All core functionality operates according to requirements",
"postconditions": ["System demonstrates full functionality"],
"test_data": "Standard test data set as defined in requirements"
},
{
"id": "TC_FUNC_002",
"title": "Input Validation and Processing",
"priority": "High",
"category": "Functional",
"description": "Test system's ability to validate and process various inputs",
"preconditions": ["System accepts user input"],
"steps": [
"Enter valid data in all input fields",
"Submit data and verify processing",
"Enter invalid data and verify rejection",
"Test boundary conditions"
],
"expected": "Valid data processed correctly, invalid data rejected with appropriate messages",
"postconditions": ["Data integrity maintained"],
"test_data": "Mix of valid, invalid, and boundary test data"
},
{
"id": "TC_FUNC_003",
"title": "System Integration and Workflow",
"priority": "Medium",
"category": "Integration",
"description": "Verify end-to-end workflow and system integration",
"preconditions": ["All system components are integrated"],
"steps": [
"Execute complete business workflow",
"Verify data flow between components",
"Test system integration points",
"Validate end-to-end functionality"
],
"expected": "Complete workflow executes successfully with proper data flow",
"postconditions": ["Workflow completion confirmed"],
"test_data": "Complete dataset for end-to-end testing"
}
]
def parse_generated_test_cases(generated_text: str) -> List[Dict]:
"""Parse AI-generated text into structured test cases"""
lines = generated_text.split('\n')
test_cases = []
current_case = {}
case_counter = 1
for line in lines:
line = line.strip()
if line.startswith(('1.', '2.', '3.', 'TC', 'Test')):
if current_case:
test_cases.append(current_case)
current_case = {
"id": f"TC_AI_{case_counter:03d}",
"title": line,
"priority": "Medium",
"category": "Functional",
"description": line,
"preconditions": ["System is accessible"],
"steps": ["Execute the test procedure"],
"expected": "Test should pass according to requirements",
"postconditions": ["System state verified"],
"test_data": "As specified in requirements"
}
case_counter += 1
if current_case:
test_cases.append(current_case)
if not test_cases:
return [{
"id": "TC_AI_001",
"title": "AI Generated Test Case",
"priority": "Medium",
"category": "Functional",
"description": "Auto-generated test case based on AI analysis",
"preconditions": ["System meets specified requirements"],
"steps": ["Review requirements", "Execute test procedure", "Verify results"],
"expected": "Requirements should be met as specified",
"postconditions": ["Test completion verified"],
"test_data": "Test data as defined in requirements"
}]
return test_cases
def generate_with_ai_model(srs_text: str, tokenizer, model) -> List[Dict]:
"""Generate test cases using AI model"""
max_input_length = 300
if len(srs_text) > max_input_length:
srs_text = srs_text[:max_input_length]
prompt = f"""Generate comprehensive test cases for this software requirement:
{srs_text}
Test Cases:
1."""
try:
inputs = tokenizer.encode(
prompt,
return_tensors="pt",
max_length=200,
truncation=True
)
with torch.no_grad():
outputs = model.generate(
inputs,
max_new_tokens=150,
num_return_sequences=1,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
use_cache=False,
)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
del inputs, outputs
torch.cuda.empty_cache() if torch.cuda.is_available() else None
return parse_generated_test_cases(generated_text)
except Exception as e:
logger.error(f"❌ AI generation failed: {e}")
raise
def generate_with_fallback(srs_text: str):
"""Generate test cases with AI model fallback to enhanced templates"""
model_name = get_optimal_model_for_memory()
if model_name:
tokenizer, model = load_model_with_memory_optimization(model_name)
if tokenizer and model:
try:
test_cases = generate_with_ai_model(srs_text, tokenizer, model)
reason = get_algorithm_reason(model_name)
return test_cases, model_name, "transformer (causal LM)", reason
except Exception as e:
logger.warning(f"AI generation failed: {e}, falling back to enhanced templates")
logger.info("⚠️ Using enhanced template-based generation")
test_cases = generate_template_based_test_cases(srs_text)
return test_cases, "Enhanced Template-Based Generator", "pattern-matching + rule-based", "Enhanced template generation with comprehensive pattern analysis and structured test case creation"
# ✅ Function exposed to app.py
def generate_test_cases(srs_text: str) -> List[Dict]:
"""Main function to generate test cases"""
return generate_with_fallback(srs_text)[0]
def get_generator():
"""Get generator instance"""
global _generator_instance
if _generator_instance is None:
class Generator:
def __init__(self):
self.model_name = get_optimal_model_for_memory()
self.tokenizer = None
self.model = None
if self.model_name:
self.tokenizer, self.model = load_model_with_memory_optimization(self.model_name)
def get_model_info(self):
mem = psutil.Process().memory_info().rss / 1024 / 1024
return {
"model_name": self.model_name if self.model_name else "Enhanced Template-Based Generator",
"status": "loaded" if self.model else "enhanced_template_mode",
"memory_usage": f"{mem:.1f}MB",
"optimization": "low_memory_enhanced"
}
_generator_instance = Generator()
return _generator_instance
def monitor_memory():
"""Monitor and manage memory usage"""
mem = psutil.Process().memory_info().rss / 1024 / 1024
logger.info(f"Memory usage: {mem:.1f}MB")
if mem > 450:
gc.collect()
logger.info("Memory cleanup triggered")
def generate_test_cases_and_info(input_text: str) -> Dict[str, Any]:
"""Generate test cases with full information"""
test_cases, model_name, algorithm_used, reason = generate_with_fallback(input_text)
return {
"model": model_name,
"algorithm": algorithm_used,
"reason": reason,
"test_cases": test_cases
}
def get_algorithm_reason(model_name: str) -> str:
"""Get explanation for algorithm selection"""
if model_name == "microsoft/DialoGPT-small":
return ("Selected due to low memory availability; DialoGPT-small provides "
"conversational understanding in limited memory environments with enhanced context processing.")
elif model_name == "distilgpt2":
return ("Selected for its balance between performance and low memory usage. "
"Ideal for small environments needing causal language modeling with good text generation quality.")
elif model_name == "gpt2":
return ("Chosen for general-purpose text generation with moderate memory headroom "
"and superior language understanding capabilities.")
elif model_name is None:
return ("Enhanced template-based generation selected due to memory constraints. "
"Uses comprehensive pattern matching, requirement analysis, and structured test case templates for robust test coverage.")
else:
return ("Model selected based on optimal tradeoff between memory usage, language generation capability, "
"and test case quality requirements.")