From 971cdc2f8efb4d1848f9a8e185fb1802eb291bdf Mon Sep 17 00:00:00 2001 From: Esteban Serna Date: Wed, 20 Aug 2025 19:02:02 -0400 Subject: [PATCH] feat: migrate PrintDistinctPKs from Docker to Maven Remove Docker dependencies and create Maven multi-module project for DynamoDB scripts. Addresses security concerns with public Docker images by providing direct executable JARs. - Remove docker-compose.yml and Dockerfiles - Add Maven parent/child module structure under java/ - Update README with Maven build instructions --- .../LoadMaxValues/java/Dockerfile | 19 - .../LoadMaxValues/java/README.md | 14 - .../LoadMaxValues/java/pom.xml | 88 --- .../PrintDistinctPKs/Printer/java/Dockerfile | 19 - .../PrintDistinctPKs/Printer/java/README.md | 18 - scripts/PrintDistinctPKs/Printer/java/pom.xml | 88 --- .../Printer/nodejs/package.json | 11 - scripts/PrintDistinctPKs/README.md | 628 +++++++++--------- scripts/PrintDistinctPKs/java/.gitignore | 15 + scripts/PrintDistinctPKs/java/loader/pom.xml | 33 + .../main/java/org/example/LoadMaxValues.java | 2 +- scripts/PrintDistinctPKs/java/pom.xml | 105 +++ scripts/PrintDistinctPKs/java/printer/pom.xml | 33 + .../java/org/example/PrintDistinctPKs.java | 0 14 files changed, 502 insertions(+), 571 deletions(-) delete mode 100644 scripts/PrintDistinctPKs/LoadMaxValues/java/Dockerfile delete mode 100644 scripts/PrintDistinctPKs/LoadMaxValues/java/README.md delete mode 100644 scripts/PrintDistinctPKs/LoadMaxValues/java/pom.xml delete mode 100644 scripts/PrintDistinctPKs/Printer/java/Dockerfile delete mode 100644 scripts/PrintDistinctPKs/Printer/java/README.md delete mode 100644 scripts/PrintDistinctPKs/Printer/java/pom.xml delete mode 100644 scripts/PrintDistinctPKs/Printer/nodejs/package.json create mode 100644 scripts/PrintDistinctPKs/java/.gitignore create mode 100644 scripts/PrintDistinctPKs/java/loader/pom.xml rename scripts/PrintDistinctPKs/{LoadMaxValues/java => java/loader}/src/main/java/org/example/LoadMaxValues.java (99%) create mode 100644 scripts/PrintDistinctPKs/java/pom.xml create mode 100644 scripts/PrintDistinctPKs/java/printer/pom.xml rename scripts/PrintDistinctPKs/{Printer/java => java/printer}/src/main/java/org/example/PrintDistinctPKs.java (100%) diff --git a/scripts/PrintDistinctPKs/LoadMaxValues/java/Dockerfile b/scripts/PrintDistinctPKs/LoadMaxValues/java/Dockerfile deleted file mode 100644 index f6c03b3..0000000 --- a/scripts/PrintDistinctPKs/LoadMaxValues/java/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -# Use an official Java base image -FROM openjdk:11 - -# Set the working directory -WORKDIR /app - -# Copy your pom.xml and LoadMaxValues.java file into the container -COPY pom.xml . -COPY src/ ./src/ - -# Install Maven -RUN apt-get update && \ - apt-get install -y maven - -# Build the Java application -RUN mvn clean install - -# Set the entry point for the container -ENTRYPOINT ["sh", "-c", "java -jar target/LoadMaxValues-1.0-SNAPSHOT.jar"] diff --git a/scripts/PrintDistinctPKs/LoadMaxValues/java/README.md b/scripts/PrintDistinctPKs/LoadMaxValues/java/README.md deleted file mode 100644 index e8db238..0000000 --- a/scripts/PrintDistinctPKs/LoadMaxValues/java/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# CLI Parameters - -The LaodMaxValues class accepts --region as a required parameter, or you can -pass in environment variables for the AWS_DEFAULT_REGION as is done with docker below. - -# How to build and run using docker - -docker build -t load-max-values . - -docker run --rm -it \ - -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ - -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION \ - load-max-values diff --git a/scripts/PrintDistinctPKs/LoadMaxValues/java/pom.xml b/scripts/PrintDistinctPKs/LoadMaxValues/java/pom.xml deleted file mode 100644 index 3b92910..0000000 --- a/scripts/PrintDistinctPKs/LoadMaxValues/java/pom.xml +++ /dev/null @@ -1,88 +0,0 @@ - - - 4.0.0 - - org.example - LoadMaxValues - 1.0-SNAPSHOT - - - 11 - 11 - UTF-8 - - - - - software.amazon.awssdk - bom - 2.20.48 - pom - import - - - - - - software.amazon.awssdk - sts - - - software.amazon.awssdk - dynamodb - - - software.amazon.awssdk - aws-core - - - - - - org.apache.maven.plugins - maven-jar-plugin - 2.4 - - - - org.example.LoadMaxValues - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.2.4 - - - package - - shade - - - - - org.example.LoadMaxValues - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - - diff --git a/scripts/PrintDistinctPKs/Printer/java/Dockerfile b/scripts/PrintDistinctPKs/Printer/java/Dockerfile deleted file mode 100644 index 528174c..0000000 --- a/scripts/PrintDistinctPKs/Printer/java/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -# Use an official Java base image -FROM openjdk:11 - -# Set the working directory -WORKDIR /app - -# Copy your pom.xml and PrintDistinctPKs.java file into the container -COPY pom.xml . -COPY src/ ./src/ - -# Install Maven -RUN apt-get update && \ - apt-get install -y maven - -# Build the Java application -RUN mvn clean install - -# Set the entry point for the container -ENTRYPOINT ["sh", "-c", "java -jar target/PrintDistinctPKs-1.0-SNAPSHOT.jar"] diff --git a/scripts/PrintDistinctPKs/Printer/java/README.md b/scripts/PrintDistinctPKs/Printer/java/README.md deleted file mode 100644 index 39b5e37..0000000 --- a/scripts/PrintDistinctPKs/Printer/java/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# CLI Parameters - -The LaodMaxValues class accepts --region and ---table-name as a parameter, or you can pass in environment -variables for the AWS_DEFAULT_REGION as is done with docker below. - -# How to build and run using docker - -docker build -t print-distinct-pks . - -docker run --rm -it \ - -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ - -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION \ - -e DYNAMODB_TABLE_NAME=my-table-name \ - print-distinct-pks - -docker rmi -f print-distinct-pks diff --git a/scripts/PrintDistinctPKs/Printer/java/pom.xml b/scripts/PrintDistinctPKs/Printer/java/pom.xml deleted file mode 100644 index 145548b..0000000 --- a/scripts/PrintDistinctPKs/Printer/java/pom.xml +++ /dev/null @@ -1,88 +0,0 @@ - - - 4.0.0 - - org.example - PrintDistinctPKs - 1.0-SNAPSHOT - - - 11 - 11 - UTF-8 - - - - - software.amazon.awssdk - bom - 2.20.48 - pom - import - - - - - - software.amazon.awssdk - sts - - - software.amazon.awssdk - dynamodb - - - software.amazon.awssdk - aws-core - - - - - - org.apache.maven.plugins - maven-jar-plugin - 2.4 - - - - org.example.PrintDistinctPKs - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.2.4 - - - package - - shade - - - - - org.example.PrintDistinctPKs - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - - diff --git a/scripts/PrintDistinctPKs/Printer/nodejs/package.json b/scripts/PrintDistinctPKs/Printer/nodejs/package.json deleted file mode 100644 index 813778d..0000000 --- a/scripts/PrintDistinctPKs/Printer/nodejs/package.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "name": "dynamodb-print-distinct-pks", - "version": "1.0.0", - "description": "A simple script to show an optimal method for printing a distinct list of Primary Keys in a DynamoDB Table", - "main": "print_distinct_pks.js", - "dependencies": { - "@aws-sdk/client-dynamodb": "^3.293.0", - "commander": "^10.0.0" - } -} - diff --git a/scripts/PrintDistinctPKs/README.md b/scripts/PrintDistinctPKs/README.md index 01d2d27..abc9f48 100644 --- a/scripts/PrintDistinctPKs/README.md +++ b/scripts/PrintDistinctPKs/README.md @@ -1,327 +1,329 @@ -# Print Distinct Partition Keys +# Print Distinct Primary Keys -This directory contains tools for working with partition keys in DynamoDB tables, including utilities to print distinct partition keys, load random test data, and test maximum values for different attribute types. +This repository contains tools for analyzing and testing DynamoDB tables by working with partition keys and loading test data with maximum-sized attributes. Multiple language implementations are provided for flexibility. -## Directory Structure +## Overview -### 1. [Printer](./Printer) -Scripts in multiple programming languages to scan a DynamoDB table and print distinct partition keys. +This project provides three main utilities: +1. **Distinct PK Printer**: Prints all distinct partition keys from a DynamoDB table +2. **Max Values Test Data Loader**: Loads test data with maximum DynamoDB attribute sizes for testing and validation +3. **Random Data Loader**: Loads random test data into DynamoDB tables -- **Java**: Implementation in Java -- **Node.js**: Implementation in JavaScript for Node.js -- **Python**: Implementation in Python +## Project Structure -These scripts help you analyze the distribution of data across partition keys, which is useful for identifying potential hot partitions and optimizing table design. +``` +scripts/PrintDistinctPKs/ +├── README.md # This file +├── java/ # Maven multi-module project (recommended) +│ ├── pom.xml # Parent POM +│ ├── printer/ # DynamoDB Distinct PK Printer module +│ │ ├── pom.xml +│ │ └── src/main/java/org/example/PrintDistinctPKs.java +│ └── loader/ # DynamoDB Max Values Test Data Loader module +│ ├── pom.xml +│ └── src/main/java/org/example/LoadMaxValues.java +├── Printer/ # Alternative implementations +│ ├── nodejs/ +│ │ ├── print_distinct_pks.js +│ │ └── package.json +│ └── python/ +│ └── print_distinct_pks.py +├── LoadMaxValues/ # Alternative implementations +│ ├── nodejs/ +│ │ ├── loadMaxValues.js +│ │ └── package.json +│ └── python/ +│ └── load_max_values.py +└── RandomLoader/ # Random data loader + └── load_random_data.py +``` + +## Prerequisites -#### Table Data Model for Printer Scripts +- Java 11 or higher +- Maven 3.6 or higher +- AWS CLI configured with appropriate credentials +- DynamoDB table access permissions + +## Building the Applications -The Printer scripts are designed to work with any DynamoDB table that has a composite key (partition key and sort key). The scripts dynamically determine the key structure from the table's schema: +Build all modules from the java directory: +```bash +cd scripts/PrintDistinctPKs/java +mvn clean package ``` -TableName: -KeySchema: - - AttributeName: pk - KeyType: HASH - - AttributeName: sk - KeyType: RANGE -AttributeDefinitions: - - AttributeName: pk - AttributeType: S - - AttributeName: sk - AttributeType: S + +This creates two executable JAR files: +- `printer/target/DynamoDBDistinctPKPrinter-1.0-SNAPSHOT.jar` +- `loader/target/DynamoDBMaxValuesTestDataLoader-1.0-SNAPSHOT.jar` + +## Usage + +### Java Implementation (Recommended) + +Build the Java applications first: +```bash +cd java +mvn clean package ``` -The scripts support tables with sort keys of any of the three supported DynamoDB key types: -- String (S) -- Number (N) -- Binary (B) +#### Distinct PK Printer -The Printer scripts: -1. Determines the partition key and sort key names from the table's key schema -2. Identifies the sort key's data type -3. Uses the appropriate maximum value for the sort key type when scanning -4. Efficiently retrieves only distinct partition key values +Print all distinct partition keys from a DynamoDB table: -### Using the Printer Scripts +```bash +java -jar printer/target/dynamodb-distinct-pk-printer-1.0-SNAPSHOT.jar [region] +``` -### Prerequisites -- AWS CLI configured with appropriate credentials -- Language-specific dependencies (Java, Node.js, or Python) depending on which scripts you want to use - - -Each language implementation provides the same functionality but with language-specific setup and execution steps: - -#### Java Implementation - -1. Navigate to the Java directory: - ``` - cd Printer/java - ``` - -2. Build the project using Maven: - ``` - mvn clean package - ``` - -3. Run the application: - ``` - java -jar target/PrintDistinctPKs-1.0-SNAPSHOT.jar --table-name --region - ``` - -4. Alternatively, use Docker: - ``` - docker build -t print-distinct-pks . - - docker run --rm -it \ - -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ - -e AWS_DEFAULT_REGION= \ - -e DYNAMODB_TABLE_NAME= \ - print-distinct-pks - ``` - -#### Node.js Implementation - -1. Navigate to the Node.js directory: - ``` - cd Printer/nodejs - ``` - -2. Install dependencies: - ``` - npm install - ``` - -3. Run the script: - ``` - node print_distinct_pks.js --region --table-name - ``` - -#### Python Implementation - -1. Navigate to the Python directory: - ``` - cd Printer/python - ``` - -2. Run the script: - ``` - python print_distinct_pks.py --region --table-name - ``` - -### 2. [RandomLoader](./RandomLoader) -A Python script (`load_random_data.py`) that generates and loads random test data into DynamoDB tables. - -Key features: -- Creates tables with different sort key types (string, number, binary) -- Generates random partition keys and sort keys -- Configurable number of items per partition key -- Useful for testing and benchmarking DynamoDB performance - -#### Table Data Models for RandomLoader - -The RandomLoader script creates three tables with different sort key types: - -1. **String Sort Key Table (`sk-str-test-data`)** - ``` - TableName: sk-str-test-data - KeySchema: - - AttributeName: pk - KeyType: HASH - - AttributeName: sk - KeyType: RANGE - AttributeDefinitions: - - AttributeName: pk - AttributeType: S - - AttributeName: sk - AttributeType: S - BillingMode: PAY_PER_REQUEST - ``` - -2. **Number Sort Key Table (`sk-num-test-data`)** - ``` - TableName: sk-num-test-data - KeySchema: - - AttributeName: pk - KeyType: HASH - - AttributeName: sk - KeyType: RANGE - AttributeDefinitions: - - AttributeName: pk - AttributeType: S - - AttributeName: sk - AttributeType: N - BillingMode: PAY_PER_REQUEST - ``` - -3. **Binary Sort Key Table (`sk-bin-test-data`)** - ``` - TableName: sk-bin-test-data - KeySchema: - - AttributeName: pk - KeyType: HASH - - AttributeName: sk - KeyType: RANGE - AttributeDefinitions: - - AttributeName: pk - AttributeType: S - - AttributeName: sk - AttributeType: B - BillingMode: PAY_PER_REQUEST - ``` - -Each table is populated with random data: -- Random string partition keys (10 characters) -- Between 1 and 10 items per partition key -- Sort keys appropriate for each table type (string, number, or binary) -- Total of approximately 5,000 items per table - - -### Using the RandomLoader -1. Navigate to the RandomLoader directory -2. Review and modify the configuration variables at the top of `load_random_data.py` as needed -3. Run the script: `python load_random_data.py --region ` - - -### 3. [LoadMaxValues](./LoadMaxValues) -Scripts to test the maximum values for different attribute types in DynamoDB. - -- **Java**: Implementation in Java -- **Node.js**: Implementation in JavaScript for Node.js -- **Python**: Implementation in Python - -These scripts are useful for understanding the limits of DynamoDB's data types and ensuring your application handles edge cases correctly. - -#### Table Data Models for LoadMaxValues - -The LoadMaxValues scripts create three tables to test maximum values for different sort key types: - -1. **Maximum String Sort Key Table (`max-str-sk-test-python`)** - ``` - TableName: max-str-sk-test-python - KeySchema: - - AttributeName: pk - KeyType: HASH - - AttributeName: sk - KeyType: RANGE - AttributeDefinitions: - - AttributeName: pk - AttributeType: S - - AttributeName: sk - AttributeType: S - BillingMode: PAY_PER_REQUEST - ``` - - Tests with maximum string value: 256 repetitions of the maximum Unicode code point - -2. **Maximum Number Sort Key Table (`max-num-sk-test-python`)** - ``` - TableName: max-num-sk-test-python - KeySchema: - - AttributeName: pk - KeyType: HASH - - AttributeName: sk - KeyType: RANGE - AttributeDefinitions: - - AttributeName: pk - AttributeType: S - - AttributeName: sk - AttributeType: N - BillingMode: PAY_PER_REQUEST - ``` - - Tests with maximum number value: 9.9999999999999999999999999999999999999E+125 - -3. **Maximum Binary Sort Key Table (`max-bin-sk-test-python`)** - ``` - TableName: max-bin-sk-test-python - KeySchema: - - AttributeName: pk - KeyType: HASH - - AttributeName: sk - KeyType: RANGE - AttributeDefinitions: - - AttributeName: pk - AttributeType: S - - AttributeName: sk - AttributeType: B - BillingMode: PAY_PER_REQUEST - ``` - - Tests with maximum binary value: 1024 bytes of 0xFF - -Each table contains a single item with a fixed partition key ("sample-pk-value") and a sort key set to the maximum value for its data type. - -## Use Cases - -1. **Analyze Partition Key Distribution** - - Identify potential hot partitions - - Verify that your partition key design distributes data evenly - -2. **Generate Test Data** - - Create test tables with specific characteristics - - Populate tables with random data for performance testing - -3. **Test DynamoDB Limits** - - Verify how your application handles maximum values - - Understand the practical limits of different DynamoDB data types - -### Using the LoadMaxValues Scripts - -The LoadMaxValues scripts create tables and test maximum values for different attribute types in DynamoDB. Here are instructions for running the implementations in different languages: - -#### Java Implementation - -1. Navigate to the Java directory: - ``` - cd LoadMaxValues/java - ``` - -2. Build the project using Maven: - ``` - mvn clean package - ``` - -3. Run the application: - ``` - java -jar target/load-max-values-1.0.jar --region - ``` - -4. Alternatively, use Docker: - ``` - docker build -t load-max-values . - - docker run --rm -it \ - -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ - -e AWS_DEFAULT_REGION= \ - load-max-values - ``` - -#### Python Implementation - -1. Navigate to the Python directory: - ``` - cd LoadMaxValues/python - ``` - -2. Run the script: - ``` - python load_max_values.py --region - ``` - -#### Node.js Implementation - -1. Navigate to the Node.js directory: - ``` - cd LoadMaxValues/nodejs - ``` - -2. Install dependencies: - ``` - npm install - ``` - -3. Run the script: - ``` - node load_max_values.js --region - ``` - -The scripts will create three tables with different sort key types (string, number, binary) and insert items with maximum values for each type. +**Parameters:** +- `table-name`: Name of the DynamoDB table to analyze +- `region`: (Optional) AWS region (defaults to us-east-1) + +**Example:** +```bash +java -jar printer/target/dynamodb-distinct-pk-printer-1.0-SNAPSHOT.jar MyTable us-west-2 +``` + +#### Max Values Test Data Loader + +Load test data with maximum DynamoDB attribute sizes: + +```bash +java -jar loader/target/dynamodb-max-values-test-data-loader-1.0-SNAPSHOT.jar [region] +``` + +**Parameters:** +- `table-name`: Name of the DynamoDB table to load test data into +- `region`: (Optional) AWS region (defaults to us-east-1) + +**Example:** +```bash +java -jar loader/target/dynamodb-max-values-test-data-loader-1.0-SNAPSHOT.jar TestTable us-west-2 +``` + +### Node.js Implementation + +Prerequisites: Node.js 14+ and npm installed. + +#### Distinct PK Printer + +```bash +cd Printer/nodejs +npm install +node print_distinct_pks.js [region] +``` + +**Example:** +```bash +node print_distinct_pks.js MyTable us-west-2 +``` + +#### Max Values Test Data Loader + +```bash +cd LoadMaxValues/nodejs +npm install +node loadMaxValues.js [region] +``` + +**Example:** +```bash +node loadMaxValues.js TestTable us-west-2 +``` + +### Python Implementation + +Prerequisites: Python 3.7+ and boto3 installed. + +Install dependencies: +```bash +pip install boto3 +``` + +#### Distinct PK Printer + +```bash +cd Printer/python +python print_distinct_pks.py [region] +``` + +**Example:** +```bash +python print_distinct_pks.py MyTable us-west-2 +``` + +#### Max Values Test Data Loader + +```bash +cd LoadMaxValues/python +python load_max_values.py [region] +``` + +**Example:** +```bash +python load_max_values.py TestTable us-west-2 +``` + +#### Random Data Loader + +```bash +cd RandomLoader +python load_random_data.py [region] [number-of-items] +``` + +**Parameters:** +- `table-name`: Name of the DynamoDB table to load random data into +- `region`: (Optional) AWS region (defaults to us-east-1) +- `number-of-items`: (Optional) Number of random items to create (defaults to 100) + +**Example:** +```bash +python load_random_data.py TestTable us-west-2 500 +``` + +## Authentication + +The applications use the AWS SDK's default credential provider chain, which checks for credentials in the following order: +1. Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`) +2. AWS credentials file (`~/.aws/credentials`) +3. IAM roles for EC2 instances +4. AWS CLI configuration + +## Required IAM Permissions + +### For Distinct PK Printer: +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "dynamodb:Scan", + "dynamodb:DescribeTable" + ], + "Resource": "arn:aws:dynamodb:*:*:table/YourTableName" + } + ] +} +``` + +### For Max Values Test Data Loader: +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "dynamodb:PutItem", + "dynamodb:CreateTable", + "dynamodb:DescribeTable", + "sts:GetCallerIdentity" + ], + "Resource": [ + "arn:aws:dynamodb:*:*:table/YourTableName", + "*" + ] + } + ] +} +``` + +## Features + +### Distinct PK Printer +- Scans entire DynamoDB table to find unique partition keys +- Handles all DynamoDB data types (String, Number, Binary) +- Displays partition key values in human-readable format +- Provides scan progress and timing information +- Supports consistent read operations + +### Max Values Test Data Loader +- Creates test data with maximum DynamoDB attribute sizes +- Tests various attribute types: + - Strings (up to 400KB) + - Numbers (large numeric values) + - Binary data (up to 400KB) + - Boolean values + - Lists and Maps +- Generates unique test records using account ID and timestamp +- Provides detailed logging of operations + +## Development + +### Building Individual Modules + +Build only the printer: +```bash +cd printer +mvn clean package +``` + +Build only the loader: +```bash +cd loader +mvn clean package +``` + +### Code Structure + +Both applications are built using: +- **AWS SDK for Java v2**: Modern, async-capable AWS SDK +- **Maven Shade Plugin**: Creates executable JARs with all dependencies +- **Standard Maven project structure**: Easy to maintain and extend + +## Troubleshooting + +### Common Issues + +1. **"Unable to load AWS credentials"** + - Ensure AWS CLI is configured: `aws configure` + - Or set environment variables: `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` + +2. **"Table not found"** + - Verify table name and region + - Ensure the table exists in the specified region + +3. **"Access denied"** + - Check IAM permissions for the required DynamoDB actions + - Verify the credentials have access to the specific table + +4. **OutOfMemoryError with large tables** + - Increase JVM heap size: `java -Xmx2g -jar ...` + - Consider processing tables in smaller segments + +### Debugging + +Enable detailed AWS SDK logging by adding JVM arguments: +```bash +java -Dorg.slf4j.simpleLogger.defaultLogLevel=debug -jar +``` + +## Migration from Docker + +This project has been migrated from Docker-based execution to direct Maven builds for improved security and simplified deployment. The previous Docker setup used public Docker images, which didn't align with security requirements. The new Maven-based approach: + +- ✅ Eliminates dependency on public Docker images +- ✅ Provides direct executable JARs +- ✅ Simplifies CI/CD integration +- ✅ Reduces security attack surface +- ✅ Improves performance (no container overhead) + +If you previously used Docker commands, here are the equivalent Maven commands: + +**Old Docker approach:** +```bash +docker-compose run printer MyTable us-west-2 +``` + +**New Maven approach:** +```bash +mvn clean package +java -jar printer/target/DynamoDBDistinctPKPrinter-1.0-SNAPSHOT.jar MyTable us-west-2 +``` + +## License + +This project is licensed under the same terms as the aws-dynamodb-examples repository. diff --git a/scripts/PrintDistinctPKs/java/.gitignore b/scripts/PrintDistinctPKs/java/.gitignore new file mode 100644 index 0000000..a6afa05 --- /dev/null +++ b/scripts/PrintDistinctPKs/java/.gitignore @@ -0,0 +1,15 @@ +# Maven build artifacts +target/ +dependency-reduced-pom.xml + +# IDE files +.idea/ +*.iml +.vscode/ +.classpath +.project +.settings/ + +# OS files +.DS_Store +Thumbs.db diff --git a/scripts/PrintDistinctPKs/java/loader/pom.xml b/scripts/PrintDistinctPKs/java/loader/pom.xml new file mode 100644 index 0000000..e64f949 --- /dev/null +++ b/scripts/PrintDistinctPKs/java/loader/pom.xml @@ -0,0 +1,33 @@ + + + 4.0.0 + + + com.amazon.dynamodb.examples + print-distinct-pks-parent + 1.0-SNAPSHOT + + + dynamodb-max-values-test-data-loader + DynamoDB Max Values Test Data Loader + Tool to load test data with maximum DynamoDB attribute sizes for testing and validation + + + + + org.apache.maven.plugins + maven-shade-plugin + + DynamoDBMaxValuesTestDataLoader-${project.version} + + + org.example.LoadMaxValues + + + + + + + diff --git a/scripts/PrintDistinctPKs/LoadMaxValues/java/src/main/java/org/example/LoadMaxValues.java b/scripts/PrintDistinctPKs/java/loader/src/main/java/org/example/LoadMaxValues.java similarity index 99% rename from scripts/PrintDistinctPKs/LoadMaxValues/java/src/main/java/org/example/LoadMaxValues.java rename to scripts/PrintDistinctPKs/java/loader/src/main/java/org/example/LoadMaxValues.java index 2bf1c3b..4fe3114 100644 --- a/scripts/PrintDistinctPKs/LoadMaxValues/java/src/main/java/org/example/LoadMaxValues.java +++ b/scripts/PrintDistinctPKs/java/loader/src/main/java/org/example/LoadMaxValues.java @@ -63,7 +63,7 @@ public static void main(String[] args) { System.exit(1); } - Initialize DynamoDB client. + // Initialize DynamoDB client. dynamoDb = DynamoDbClient.builder().region(awsRegion).build(); // We need to create a string that is encoded in UTF-8 to 1024 bytes of the highest diff --git a/scripts/PrintDistinctPKs/java/pom.xml b/scripts/PrintDistinctPKs/java/pom.xml new file mode 100644 index 0000000..ef2087a --- /dev/null +++ b/scripts/PrintDistinctPKs/java/pom.xml @@ -0,0 +1,105 @@ + + + 4.0.0 + + com.amazon.dynamodb.examples + print-distinct-pks-parent + 1.0-SNAPSHOT + pom + + DynamoDB Print Distinct PKs - Parent + Parent project for DynamoDB tools: distinct partition key printer and max values test data loader + + + printer + loader + + + + 11 + 11 + UTF-8 + 2.20.48 + 3.13.0 + 2.4 + 3.2.4 + 3.2.5 + + + + + + software.amazon.awssdk + bom + ${aws.sdk.version} + pom + import + + + + + + + + software.amazon.awssdk + sts + + + software.amazon.awssdk + dynamodb + + + software.amazon.awssdk + aws-core + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven.compiler.plugin.version} + + + org.apache.maven.plugins + maven-jar-plugin + ${maven.jar.plugin.version} + + + org.apache.maven.plugins + maven-shade-plugin + ${maven.shade.plugin.version} + + + package + + shade + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + ${maven.surefire.plugin.version} + + + + + diff --git a/scripts/PrintDistinctPKs/java/printer/pom.xml b/scripts/PrintDistinctPKs/java/printer/pom.xml new file mode 100644 index 0000000..33ac080 --- /dev/null +++ b/scripts/PrintDistinctPKs/java/printer/pom.xml @@ -0,0 +1,33 @@ + + + 4.0.0 + + + com.amazon.dynamodb.examples + print-distinct-pks-parent + 1.0-SNAPSHOT + + + dynamodb-distinct-pk-printer + DynamoDB Distinct PK Printer + Tool to print distinct partition keys from DynamoDB tables for analysis and monitoring + + + + + org.apache.maven.plugins + maven-shade-plugin + + DynamoDBDistinctPKPrinter-${project.version} + + + org.example.PrintDistinctPKs + + + + + + + diff --git a/scripts/PrintDistinctPKs/Printer/java/src/main/java/org/example/PrintDistinctPKs.java b/scripts/PrintDistinctPKs/java/printer/src/main/java/org/example/PrintDistinctPKs.java similarity index 100% rename from scripts/PrintDistinctPKs/Printer/java/src/main/java/org/example/PrintDistinctPKs.java rename to scripts/PrintDistinctPKs/java/printer/src/main/java/org/example/PrintDistinctPKs.java