# generate_sample_files.py
import pandas as pd
import json
from datetime import datetime
import os

def create_sample_data():
    """Create sample product data for testing"""
    products = [
        {
            "product_id": "PRD-001",
            "product_name": "Baby Rattle Set",
            "manufacturer": "Fisher-Price",
            "model_number": "FP-123",
            "upc": "887961234567",
            "category": "Toys",
            "purchase_date": "2024-01-15",
            "quantity": 50
        },
        {
            "product_id": "PRD-002",
            "product_name": "Infant Sleep Positioner",
            "manufacturer": "Summer Infant",
            "model_number": "SI-456",
            "upc": "012914567890",
            "category": "Baby Products",
            "purchase_date": "2023-11-20",
            "quantity": 25
        },
        {
            "product_id": "PRD-003",
            "product_name": "Graco Pack 'n Play Playard",
            "manufacturer": "Graco",
            "model_number": "1812345",
            "upc": "047406123456",
            "category": "Baby Furniture",
            "purchase_date": "2024-03-10",
            "quantity": 15
        },
        {
            "product_id": "PRD-004",
            "product_name": "Toyota Camry",
            "manufacturer": "Toyota",
            "model_number": "Camry 2023",
            "upc": "N/A",
            "category": "Vehicles",
            "purchase_date": "2023-06-01",
            "quantity": 1
        },
        {
            "product_id": "PRD-005",
            "product_name": "Romaine Lettuce",
            "manufacturer": "Dole",
            "model_number": "N/A",
            "upc": "071430000000",
            "category": "Food",
            "purchase_date": "2024-10-15",
            "quantity": 100
        },
        {
            "product_id": "PRD-006",
            "product_name": "Philips CPAP Machine",
            "manufacturer": "Philips",
            "model_number": "DreamStation",
            "upc": "075020123456",
            "category": "Medical Devices",
            "purchase_date": "2021-05-12",
            "quantity": 10
        },
        {
            "product_id": "PRD-007",
            "product_name": "Samsung Galaxy Note 7",
            "manufacturer": "Samsung",
            "model_number": "SM-N930",
            "upc": "887276123456",
            "category": "Electronics",
            "purchase_date": "2016-09-01",
            "quantity": 5
        },
        {
            "product_id": "PRD-008",
            "product_name": "Infant Swing",
            "manufacturer": "Bright Starts",
            "model_number": "BS-789",
            "upc": "074451234567",
            "category": "Baby Products",
            "purchase_date": "2024-02-20",
            "quantity": 30
        },
        {
            "product_id": "PRD-009",
            "product_name": "Peanut Butter",
            "manufacturer": "Jif",
            "model_number": "N/A",
            "upc": "051500255001",
            "category": "Food",
            "purchase_date": "2022-03-15",
            "quantity": 200
        },
        {
            "product_id": "PRD-010",
            "product_name": "Takata Airbag",
            "manufacturer": "Takata",
            "model_number": "TK-456",
            "upc": "N/A",
            "category": "Auto Parts",
            "purchase_date": "2015-08-10",
            "quantity": 50
        }
    ]
    
    return products

def generate_csv_file():
    """Generate CSV sample file"""
    products = create_sample_data()
    df = pd.DataFrame(products)
    
    filename = "sample_products.csv"
    df.to_csv(filename, index=False)
    print(f"✓ Created {filename}")
    return filename

def generate_excel_file():
    """Generate Excel sample file"""
    products = create_sample_data()
    df = pd.DataFrame(products)
    
    filename = "sample_products.xlsx"
    
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name='Products', index=False)
        
        # Add a second sheet with metadata
        metadata = pd.DataFrame({
            "Field": ["Generated Date", "Total Products", "Purpose", "Categories"],
            "Value": [
                datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                len(products),
                "Product Recall Scanner Testing",
                "Toys, Baby Products, Vehicles, Food, Medical Devices, Electronics, Auto Parts"
            ]
        })
        metadata.to_excel(writer, sheet_name='Metadata', index=False)
    
    print(f"✓ Created {filename}")
    return filename

def generate_json_file():
    """Generate JSON sample file"""
    products = create_sample_data()
    
    filename = "sample_products.json"
    
    output_data = {
        "metadata": {
            "generated_date": datetime.now().isoformat(),
            "total_products": len(products),
            "purpose": "Product Recall Scanner Testing",
            "version": "1.0"
        },
        "products": products
    }
    
    with open(filename, 'w') as f:
        json.dump(output_data, f, indent=2)
    
    print(f"✓ Created {filename}")
    return filename

def generate_small_csv_file():
    """Generate a smaller CSV for quick testing"""
    products = create_sample_data()[:5]  # Only first 5 products
    df = pd.DataFrame(products)
    
    filename = "sample_products_small.csv"
    df.to_csv(filename, index=False)
    print(f"✓ Created {filename}")
    return filename

def generate_problem_products_file():
    """Generate CSV with known recalled products"""
    problem_products = [
        {
            "product_id": "RCL-001",
            "product_name": "Philips DreamStation CPAP",
            "manufacturer": "Philips",
            "model_number": "DreamStation",
            "upc": "075020123456",
            "category": "Medical Devices",
            "purchase_date": "2021-03-15",
            "quantity": 25,
            "notes": "Known recall - foam degradation issue"
        },
        {
            "product_id": "RCL-002",
            "product_name": "Takata Airbag Inflator",
            "manufacturer": "Takata",
            "model_number": "Various",
            "upc": "N/A",
            "category": "Auto Parts",
            "purchase_date": "2015-06-20",
            "quantity": 100,
            "notes": "Known recall - rupture risk"
        },
        {
            "product_id": "RCL-003",
            "product_name": "Fisher-Price Rock 'n Play Sleeper",
            "manufacturer": "Fisher-Price",
            "model_number": "CBV60",
            "upc": "887961567984",
            "category": "Baby Products",
            "purchase_date": "2018-05-10",
            "quantity": 30,
            "notes": "Known recall - safety hazard"
        },
        {
            "product_id": "RCL-004",
            "product_name": "Samsung Galaxy Note 7",
            "manufacturer": "Samsung",
            "model_number": "SM-N930",
            "upc": "887276123456",
            "category": "Electronics",
            "purchase_date": "2016-09-01",
            "quantity": 10,
            "notes": "Known recall - battery fire risk"
        },
        {
            "product_id": "RCL-005",
            "product_name": "Jif Peanut Butter",
            "manufacturer": "J.M. Smucker",
            "model_number": "Various sizes",
            "upc": "051500255001",
            "category": "Food",
            "purchase_date": "2022-05-15",
            "quantity": 500,
            "notes": "Known recall - salmonella contamination 2022"
        }
    ]
    
    df = pd.DataFrame(problem_products)
    filename = "known_recalled_products.csv"
    df.to_csv(filename, index=False)
    print(f"✓ Created {filename}")
    return filename

def generate_safe_products_file():
    """Generate CSV with safe, non-recalled products"""
    safe_products = [
        {
            "product_id": "SAFE-001",
            "product_name": "Generic Office Desk",
            "manufacturer": "OfficeMax",
            "model_number": "OM-2024",
            "upc": "012345678901",
            "category": "Furniture",
            "purchase_date": "2024-06-01",
            "quantity": 10
        },
        {
            "product_id": "SAFE-002",
            "product_name": "Wireless Mouse",
            "manufacturer": "Logitech",
            "model_number": "M720",
            "upc": "097855134646",
            "category": "Electronics",
            "purchase_date": "2024-08-15",
            "quantity": 50
        },
        {
            "product_id": "SAFE-003",
            "product_name": "Cotton T-Shirt",
            "manufacturer": "Hanes",
            "model_number": "5250",
            "upc": "078715013417",
            "category": "Apparel",
            "purchase_date": "2024-09-20",
            "quantity": 100
        }
    ]
    
    df = pd.DataFrame(safe_products)
    filename = "safe_products.csv"
    df.to_csv(filename, index=False)
    print(f"✓ Created {filename}")
    return filename

def create_readme():
    """Create README file explaining the test files"""
    readme_content = """# Product Recall Scanner - Test Files

This directory contains sample product files for testing the Product Recall Scanner agent.

## Files Included

### 1. sample_products.csv
- Standard CSV format
- 10 diverse products across multiple categories
- Mix of potentially recalled and safe products

### 2. sample_products.xlsx
- Excel format with two sheets
- Products sheet: Same 10 products as CSV
- Metadata sheet: File information

### 3. sample_products.json
- JSON format with nested structure
- Includes metadata and products array
- Same 10 products as other files

### 4. sample_products_small.csv
- Smaller CSV with only 5 products
- Useful for quick testing

### 5. known_recalled_products.csv
- Contains products with known recalls
- Useful for validating detection accuracy
- Includes notes about recall reasons

### 6. safe_products.csv
- Contains products unlikely to be recalled
- Generic office and consumer items
- Useful for testing false positive rates

## Product Categories Included

- Baby Products
- Toys
- Vehicles
- Food Products
- Medical Devices
- Electronics
- Auto Parts
- Furniture
- Apparel

## Known Recalls in Test Data

Several products in the sample files have had real recalls:
- Philips DreamStation CPAP (2021 recall)
- Takata Airbags (ongoing recalls since 2013)
- Fisher-Price Rock 'n Play (2019 recall)
- Samsung Galaxy Note 7 (2016 recall)
- Jif Peanut Butter (2022 recall)

## Usage

Upload any of these files to the Product Recall Scanner agent to test:
1. File parsing functionality
2. Recall detection accuracy
3. Result formatting and display
4. Export functionality

## Testing Scenarios

1. **Basic Functionality**: Use sample_products_small.csv
2. **Recall Detection**: Use known_recalled_products.csv
3. **False Positive Check**: Use safe_products.csv
4. **Format Support**: Test CSV, Excel, and JSON versions
5. **Full Dataset**: Use sample_products.csv or .xlsx

Generated: {date}
"""
    
    with open("README_TEST_FILES.md", 'w') as f:
        f.write(readme_content.format(date=datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
    
    print("✓ Created README_TEST_FILES.md")

def main():
    """Generate all sample files"""
    print("Generating sample test files for Product Recall Scanner...\n")
    
    # Create output directory
    output_dir = "sample_product_files"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created directory: {output_dir}\n")
    
    os.chdir(output_dir)
    
    # Generate all files
    generate_csv_file()
    generate_excel_file()
    generate_json_file()
    generate_small_csv_file()
    generate_problem_products_file()
    generate_safe_products_file()
    create_readme()
    
    print(f"\n✅ All sample files created successfully in '{output_dir}' directory!")
    print("\nFiles ready for testing:")
    for file in os.listdir():
        if file.endswith(('.csv', '.xlsx', '.json', '.md')):
            size = os.path.getsize(file)
            print(f"  - {file} ({size} bytes)")

if __name__ == "__main__":
    main()