diff --git a/examples/gen-ai-fastchat/README.md b/examples/gen-ai-fastchat/README.md new file mode 100644 index 0000000..158acf0 --- /dev/null +++ b/examples/gen-ai-fastchat/README.md @@ -0,0 +1,116 @@ +

+ Intel Logo +

+ +# Intel Cloud Optimization Modules for Terraform + +© Copyright 2022, Intel Corporation + +## AWS M7i EC2 Instance with 4th Gen Xeon(code named Sapphire Rapids) & Intel® Cloud Optimized Recipe for FastChat + +This demo will showcase Large Language Model(LLM) CPU inference using 4th Gen Xeon Scalable Processors on AWS using FastChat. + +## Usage + +**See examples folder ./examples/gen-ai-fastchat** + +variables.tf + +```hcl +variable "region" { + description = "Target AWS region to deploy EC2 in." + type = string + default = "us-east-1" +} +``` +main.tf +```hcl +## Get latest Ubuntu 22.04 AMI in AWS for x86 +data "aws_ami" "ubuntu-linux-2204" { + most_recent = true + owners = ["099720109477"] # Canonical + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] + } + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +module "ec2-vm" { + source = "intel/aws-vm/intel" + key_name = aws_key_pair.TF_key.key_name + instance_type = "m7i.4xlarge" + availability_zone = "us-east-1a" + ami = data.aws_ami.ubuntu-linux-2204.id + user_data = data.cloudinit_config.ansible.rendered + + root_block_device = [{ + volume_size = "100" + }] + + tags = { + Name = "my-test-vm-${random_id.rid.dec}" + Owner = "OwnerName-${random_id.rid.dec}", + Duration = "2" + } +} +``` + + + +Run Terraform +Replace the line below with you own IPV4 CIDR range before running the example. + +```hcl +cidr_blocks = ["a.b.c.d/x"] +``` + +Run the following terraform commands +```hcl +terraform init +terraform plan +terraform apply +``` + +After **terraform apply** completes, wait about 10 mins. During this time, the Ansible recipe will download/install FastChat and the LLM model + +## Running the Demo +1. As mentioned above, **wait ~10 minutes** for the Recipe to download/install FastChat and the LLM model before continuing +2. SSH into newly created AWS EC2 instance. +3. The terraform module creates a key pair and adds the public key to the EC2 instance. It keeps the private key in the same folder from where the **terraform apply** was run. +4. Open command prompt on your computer. Nagivate to the folder from where you ran the **terraform apply** command. +5. Run the ssh command as below: +```hcl +ssh ubuntu@ -i tfkey.private +``` +6. Once you are logged into the EC2 instance, **run `source /usr/local/bin/run_demo.sh`** +7. Your app will be proxied through gradio. See https://xxxxxxx.gradio.live URL that is generated during the run_demo.sh script execution. +8. Open a browser and put the gradio url referenced in the prior step + +

+ Gradio_Output +

+ +## Known Issues + +The demo may initially fail. In this case, run + +```hcl +pip install gradio==3.10 +``` +```hcl +pip install gradio==3.35.2 +``` + +Then, run below command on the terminal of the EC2 instance after you have SSH into the instance: +```hcl +source /usr/local/bin/run_demo.sh +``` + +And navigate again using your browser. + +## Considerations +- The AWS region where this example is run should have a default VPC \ No newline at end of file diff --git a/examples/gen-ai-fastchat/cloud_init.yml b/examples/gen-ai-fastchat/cloud_init.yml new file mode 100644 index 0000000..c203173 --- /dev/null +++ b/examples/gen-ai-fastchat/cloud_init.yml @@ -0,0 +1,15 @@ +#cloud-config +package_update: true +package_upgrade: true + +package: + - git + +ansible: + install_method: distro + package_name: ansible + pull: + url: "https://github.com/intel/optimized-cloud-recipes.git" + playbook_name: "recipes/ai-fastchat-amx-ubuntu/recipe.yml" + + diff --git a/examples/gen-ai-fastchat/main.tf b/examples/gen-ai-fastchat/main.tf new file mode 100644 index 0000000..cfb29ca --- /dev/null +++ b/examples/gen-ai-fastchat/main.tf @@ -0,0 +1,92 @@ +# Provision EC2 Instance on Icelake on Amazon Linux OS in default vpc. It is configured to create the EC2 in +# US-East-1 region. The region is provided in variables.tf in this example folder. + +# This example also create an EC2 key pair. Associate the public key with the EC2 instance. Create the private key +# in the local system where terraform apply is done. Create a new scurity group to open up the SSH port +# 22 to a specific IP CIDR block + +######### PLEASE NOTE TO CHANGE THE IP CIDR BLOCK TO ALLOW SSH FROM YOUR OWN ALLOWED IP ADDRESS FOR SSH ######### + +data "cloudinit_config" "ansible" { + gzip = true + base64_encode = true + + part { + filename = "cloud_init" + content_type = "text/cloud-config" + content = templatefile( + "cloud_init.yml", + {} + ) + } +} + +resource "random_id" "rid" { + byte_length = 5 +} + +# RSA key of size 4096 bits +resource "tls_private_key" "rsa" { + algorithm = "RSA" + rsa_bits = 4096 +} + +resource "aws_key_pair" "TF_key" { + key_name = "TF_key-${random_id.rid.dec}" + public_key = tls_private_key.rsa.public_key_openssh +} + +resource "local_file" "TF_private_key" { + content = tls_private_key.rsa.private_key_pem + filename = "tfkey.private" +} + +resource "aws_security_group" "ssh_security_group" { + description = "security group to configure ports for ssh" + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + + ## CHANGE THE IP CIDR BLOCK BELOW TO ALL YOUR OWN SSH PORT ## + cidr_blocks = ["a.b.c.d/x"] + } +} + +resource "aws_network_interface_sg_attachment" "sg_attachment" { + security_group_id = aws_security_group.ssh_security_group.id + network_interface_id = module.ec2-vm.primary_network_interface_id +} + +## Get latest Ubuntu 22.04 AMI in AWS for x86 +data "aws_ami" "ubuntu-linux-2204" { + most_recent = true + owners = ["099720109477"] # Canonical + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] + } + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +module "ec2-vm" { + source = "intel/aws-vm/intel" + key_name = aws_key_pair.TF_key.key_name + instance_type = "m7i.4xlarge" + availability_zone = "us-east-1a" + ami = data.aws_ami.ubuntu-linux-2204.id + user_data = data.cloudinit_config.ansible.rendered + + root_block_device = [{ + volume_size = "100" + }] + + tags = { + Name = "my-test-vm-${random_id.rid.dec}" + Owner = "OwnerName-${random_id.rid.dec}", + Duration = "2" + } +} \ No newline at end of file diff --git a/examples/gen-ai-fastchat/outputs.tf b/examples/gen-ai-fastchat/outputs.tf new file mode 100644 index 0000000..938d951 --- /dev/null +++ b/examples/gen-ai-fastchat/outputs.tf @@ -0,0 +1,113 @@ +output "id" { + description = "The ID of the instance" + value = try(module.ec2-vm.id, module.ec2-vm.id, "") +} + +output "arn" { + description = "The ARN of the instance" + value = try(module.ec2-vm.arn, "") +} + +output "capacity_reservation_specification" { + description = "Capacity reservation specification of the instance" + value = try(module.ec2-vm.capacity_reservation_specification, "") +} + +output "instance_state" { + description = "The state of the instance. One of: `pending`, `running`, `shutting-down`, `terminated`, `stopping`, `stopped`" + value = try(module.ec2-vm.instance_state, "") +} + +output "outpost_arn" { + description = "The ARN of the Outpost the instance is assigned to" + value = try(module.ec2-vm.outpost_arn, "") +} + +output "password_data" { + description = "Base-64 encoded encrypted password data for the instance. Useful for getting the administrator password for instances running Microsoft Windows. This attribute is only exported if `get_password_data` is true" + value = try(module.ec2-vm.password_data, "") +} + +output "primary_network_interface_id" { + description = "The ID of the instance's primary network interface" + value = try(module.ec2-vm.primary_network_interface_id, "") +} + +output "private_dns" { + description = "The private DNS name assigned to the instance. Can only be used inside the Amazon EC2, and only available if you've enabled DNS hostnames for your VPC" + value = try(module.ec2-vm.private_dns, "") +} + +output "public_dns" { + description = "The public DNS name assigned to the instance. For EC2-VPC, this is only available if you've enabled DNS hostnames for your VPC" + value = try(module.ec2-vm.public_dns, "") +} + +output "public_ip" { + description = "The public IP address assigned to the instance, if applicable. NOTE: If you are using an aws_eip with your instance, you should refer to the EIP's address directly and not use `public_ip` as this field will change after the EIP is attached" + value = try(module.ec2-vm.public_ip, "") +} + +output "private_ip" { + description = "The private IP address assigned to the instance." + value = try(module.ec2-vm.private_ip, "") +} + +output "ipv6_addresses" { + description = "The IPv6 address assigned to the instance, if applicable." + value = try(module.ec2-vm.ipv6_addresses, []) +} + +output "tags_all" { + description = "A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block" + value = try(module.ec2-vm.tags_all, {}) +} + +output "spot_bid_status" { + description = "The current bid status of the Spot Instance Request" + value = try(module.ec2-vm.spot_bid_status, "") +} + +output "spot_request_state" { + description = "The current request state of the Spot Instance Request" + value = try(module.ec2-vm.spot_request_state, "") +} + +output "spot_instance_id" { + description = "The Instance ID (if any) that is currently fulfilling the Spot Instance request" + value = try(module.ec2-vm.spot_instance_id, "") +} + +################################################################################ +# IAM Role / Instance Profile +################################################################################ + +output "iam_role_name" { + description = "The name of the IAM role" + value = try(module.ec2-vm.aws_iam_role.name, null) +} + +output "iam_role_arn" { + description = "The Amazon Resource Name (ARN) specifying the IAM role" + value = try(module.ec2-vm.aws_iam_role.arn, null) +} + +output "iam_role_unique_id" { + description = "Stable and unique string identifying the IAM role" + value = try(module.ec2-vm.aws_iam_role.unique_id, null) +} + +output "iam_instance_profile_arn" { + description = "ARN assigned by AWS to the instance profile" + value = try(module.ec2-vm.aws_iam_instance_profile.arn, null) +} + +output "iam_instance_profile_id" { + description = "Instance profile's ID" + value = try(module.ec2-vm.aws_iam_instance_profile.id, null) +} + +output "iam_instance_profile_unique" { + description = "Stable and unique string identifying the IAM instance profile" + value = try(module.ec2-vm.aws_iam_instance_profile.unique_id, null) +} \ No newline at end of file diff --git a/examples/gen-ai-fastchat/providers.tf b/examples/gen-ai-fastchat/providers.tf new file mode 100644 index 0000000..260a2e3 --- /dev/null +++ b/examples/gen-ai-fastchat/providers.tf @@ -0,0 +1,4 @@ +provider "aws" { + # Environment Variables used for Authentication + region = var.region +} \ No newline at end of file diff --git a/examples/gen-ai-fastchat/variables.tf b/examples/gen-ai-fastchat/variables.tf new file mode 100644 index 0000000..be91e69 --- /dev/null +++ b/examples/gen-ai-fastchat/variables.tf @@ -0,0 +1,5 @@ +variable "region" { + description = "Target AWS region to deploy EC2 in." + type = string + default = "us-east-1" +} \ No newline at end of file diff --git a/examples/gen-ai-fastchat/versions.tf b/examples/gen-ai-fastchat/versions.tf new file mode 100644 index 0000000..5814fe2 --- /dev/null +++ b/examples/gen-ai-fastchat/versions.tf @@ -0,0 +1,13 @@ +terraform { + required_version = ">=1.3.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.60.0" + } + cloudinit = { + source = "hashicorp/cloudinit" + version = ">=2.2.0" + } + } +} \ No newline at end of file diff --git a/images/gradio.png b/images/gradio.png new file mode 100644 index 0000000..0460d98 Binary files /dev/null and b/images/gradio.png differ