From a0838015f23934a8166f3b9d6104e63d8d33e5d9 Mon Sep 17 00:00:00 2001 From: Steve Fowler Date: Thu, 27 Jun 2024 18:36:57 -0500 Subject: [PATCH 1/2] update readme --- examples/gen-ai-xeon-opea-chatqna/README.md | 133 ++++++++++++++++++ .../gen-ai-xeon-opea-chatqna/cloud_init.yml | 16 +++ .../gen-ai-xeon-opea-chatqna/environment.txt | 17 +++ examples/gen-ai-xeon-opea-chatqna/main.tf | 99 +++++++++++++ examples/gen-ai-xeon-opea-chatqna/outputs.tf | 113 +++++++++++++++ .../gen-ai-xeon-opea-chatqna/providers.tf | 4 + .../gen-ai-xeon-opea-chatqna/variables.tf | 117 +++++++++++++++ examples/gen-ai-xeon-opea-chatqna/versions.tf | 13 ++ 8 files changed, 512 insertions(+) create mode 100644 examples/gen-ai-xeon-opea-chatqna/README.md create mode 100644 examples/gen-ai-xeon-opea-chatqna/cloud_init.yml create mode 100644 examples/gen-ai-xeon-opea-chatqna/environment.txt create mode 100644 examples/gen-ai-xeon-opea-chatqna/main.tf create mode 100644 examples/gen-ai-xeon-opea-chatqna/outputs.tf create mode 100644 examples/gen-ai-xeon-opea-chatqna/providers.tf create mode 100644 examples/gen-ai-xeon-opea-chatqna/variables.tf create mode 100644 examples/gen-ai-xeon-opea-chatqna/versions.tf diff --git a/examples/gen-ai-xeon-opea-chatqna/README.md b/examples/gen-ai-xeon-opea-chatqna/README.md new file mode 100644 index 0000000..e80d5de --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/README.md @@ -0,0 +1,133 @@ +

+ Intel Logo +

+ +# Intel® Optimized Cloud Modules for Terraform + +© Copyright 2024, Intel Corporation + +## AWS M7i EC2 Instance with 4th Generation Intel® Xeon® Scalable Processor (Sapphire Rapids) & Open Platform for Enterprise AI (OPEA) ChatQnA Example + +This demo will showcase Retrieval Augmented Generation (RAG) CPU inference using 4th Gen Xeon Scalable Processors on AWS using the OPEA ChatQnA Example. For more information about OPEA, go [here](https://opea.dev/). For more information on this specific example, go [here](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA). + +## Usage + +### variables.tf + +Modify the region to target a specific AWS Region + +```hcl +variable "region" { + description = "Target AWS region to deploy EC2 in." + type = string + default = "us-east-1" +} +``` + +Modify the Huggingface Token variable to your specific Huggingface Token, for information on creating a Huggingface token go [here](https://huggingface.co/docs/hub/en/security-tokens) + +```hcl +variable "huggingface_token" { + description = "Huggingface Token" + default = " " + type = string +} +``` + +### main.tf + +Modify settings in this file to choose your AMI as well as instance size and other details around the instance that will be created + +```hcl +## Get latest Ubuntu 22.04 AMI in AWS for x86 +data "aws_ami" "ubuntu-linux-2204" { + most_recent = true + owners = ["099720109477"] # Canonical + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] + } + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +module "ec2-vm" { + source = "intel/aws-vm/intel" + key_name = aws_key_pair.TF_key.key_name + instance_type = "m7i.8xlarge" + availability_zone = "us-east-1a" + ami = data.aws_ami.ubuntu-linux-2204.id + user_data = data.cloudinit_config.ansible.rendered + + root_block_device = [{ + volume_size = "100" + }] + + tags = { + Name = "my-test-vm-${random_id.rid.dec}" + Owner = "OwnerName-${random_id.rid.dec}", + Duration = "2" + } +} +``` + +Run the Terraform Commands below to deploy the demos. + +```Shell +terraform init +terraform plan +terraform apply +``` + +## Running the Demo using AWS CloudShell + +Open your AWS account and click the Cloudshell prompt +At the command prompt enter in in these command prompts to install Terraform into the AWS Cloudshell + +```Shell +git clone https://github.com/tfutils/tfenv.git ~/.tfenv +mkdir ~/bin +ln -s ~/.tfenv/bin/* ~/bin/ +tfenv install 1.3.0 +tfenv use 1.3.0 +``` + +Download and run the [OPEA ChatQnA on Xeon](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) Terraform Module by typing this command + +```Shell +git clone https://github.com/intel/terraform-intel-aws-vm.git +``` + +Change into the `examples/gen-ai-xeon-opea-chatqna` example folder + +```Shell +cd terraform-intel-aws-vm/examples/gen-ai-xeon-opea-chatqna +``` + +Run the Terraform Commands below to deploy the demos. + +```Shell +terraform init +terraform plan +terraform apply +``` + +After the Terraform module successfully creates the EC2 instance, **wait ~15 minutes** for the recipe to build and launch the containers before continuing. + +## Accessing the Demo + +You can access the demos using the following: + +- OPEA ChatQnA: `http://yourpublicip:5174` + +- Note: This module is created using the m7i.16xlarge instance size, you can change your instance type by modifying the **instance_type = "m7i.16xlarge"** in the main.tf under the **ec2-vm module** section of the code. If you just change to an 8xlarge and then run **terraform apply** the module will destroy the old instance and rebuild with a larger instance size. + +## Deleting the Demo + +To delete the demo, run `terraform destroy` to delete all resources created. + +## Considerations + +- The AWS region where this example is run should have a default VPC diff --git a/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml b/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml new file mode 100644 index 0000000..932bee0 --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml @@ -0,0 +1,16 @@ +#cloud-config +package_update: true +package_upgrade: true + +package: + - git + +runcmd: + - apt install ansible -y + - git clone https://github.com/intel/optimized-cloud-recipes.git /tmp/optimized-cloud-recipes + - cd /tmp/optimized-cloud-recipes/recipes/ai-opea-chatqna-xeon + - cp opea.sh /etc/profile.d/opea.sh + - echo 'export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}' | sudo tee -a /etc/profile.d/opea.sh + - chmod +x /etc/profile.d/opea.sh + - source /etc/profile.d/opea.sh + - ansible-playbook recipe.yml \ No newline at end of file diff --git a/examples/gen-ai-xeon-opea-chatqna/environment.txt b/examples/gen-ai-xeon-opea-chatqna/environment.txt new file mode 100644 index 0000000..1056e88 --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/environment.txt @@ -0,0 +1,17 @@ + - export http_proxy= + - export https_proxy= + - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + - export RERANK_MODEL_ID="BAAI/bge-reranker-large" + - export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" + - export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090" + - export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" + - export TGI_LLM_ENDPOINT="http://${host_ip}:8008" + - export REDIS_URL="redis://${host_ip}:6379" + - export INDEX_NAME="rag-redis" + - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + - export MEGA_SERVICE_HOST_IP=${host_ip} + - export EMBEDDING_SERVICE_HOST_IP=${host_ip} + - export RETRIEVER_SERVICE_HOST_IP=${host_ip} + - export RERANK_SERVICE_HOST_IP=${host_ip} + - export LLM_SERVICE_HOST_IP=${host_ip} + - export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna" \ No newline at end of file diff --git a/examples/gen-ai-xeon-opea-chatqna/main.tf b/examples/gen-ai-xeon-opea-chatqna/main.tf new file mode 100644 index 0000000..3ce5c7d --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/main.tf @@ -0,0 +1,99 @@ +# Provision EC2 Instance on Icelake on Amazon Linux OS in default vpc. It is configured to create the EC2 in +# US-East-1 region. The region is provided in variables.tf in this example folder. + +# This example also create an EC2 key pair. Associate the public key with the EC2 instance. Create the private key +# in the local system where terraform apply is done. Create a new scurity group to open up the SSH port +# 22 to a specific IP CIDR block + +######### PLEASE NOTE TO CHANGE THE IP CIDR BLOCK TO ALLOW SSH FROM YOUR OWN ALLOWED IP ADDRESS FOR SSH ######### + +data "cloudinit_config" "ansible" { + gzip = true + base64_encode = true + + part { + filename = "cloud_init" + content_type = "text/cloud-config" + content = templatefile( + "cloud_init.yml", + { + HUGGINGFACEHUB_API_TOKEN=var.huggingface_token + } + ) + } +} + +data "aws_ami" "ubuntu-linux-2204" { + most_recent = true + owners = ["099720109477"] # Canonical + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] + } + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +resource "random_id" "rid" { + byte_length = 5 +} + +# RSA key of size 4096 bits +resource "tls_private_key" "rsa" { + algorithm = "RSA" + rsa_bits = 4096 +} + +resource "aws_key_pair" "TF_key" { + key_name = "TF_key-${random_id.rid.dec}" + public_key = tls_private_key.rsa.public_key_openssh +} + +resource "local_file" "TF_private_key" { + content = tls_private_key.rsa.private_key_pem + filename = "tfkey.private" +} +resource "aws_security_group" "ssh_security_group" { + description = "security group to configure ports for ssh" + name_prefix = "ssh_security_group" +} + +# Modify the `ingress_rules` variable in the variables.tf file to allow the required ports for your CIDR ranges +resource "aws_security_group_rule" "ingress_rules" { + count = length(var.ingress_rules) + type = "ingress" + security_group_id = aws_security_group.ssh_security_group.id + from_port = var.ingress_rules[count.index].from_port + to_port = var.ingress_rules[count.index].to_port + protocol = var.ingress_rules[count.index].protocol + cidr_blocks = [var.ingress_rules[count.index].cidr_blocks] +} + +resource "aws_network_interface_sg_attachment" "sg_attachment" { + count = length(module.ec2-vm) + security_group_id = aws_security_group.ssh_security_group.id + network_interface_id = module.ec2-vm[count.index].primary_network_interface_id +} + +# Modify the `vm_count` variable in the variables.tf file to create the required number of EC2 instances +module "ec2-vm" { + count = var.vm_count + source = "intel/aws-vm/intel" + key_name = aws_key_pair.TF_key.key_name + instance_type = "m7i.16xlarge" + availability_zone = "us-east-1d" + ami = data.aws_ami.ubuntu-linux-2204.id + user_data = data.cloudinit_config.ansible.rendered + + root_block_device = [{ + volume_size = "1000" + }] + + tags = { + Name = "my-test-vm-${count.index}-${random_id.rid.dec}" + Owner = "owner-${random_id.rid.dec}", + Duration = "2" + } +} \ No newline at end of file diff --git a/examples/gen-ai-xeon-opea-chatqna/outputs.tf b/examples/gen-ai-xeon-opea-chatqna/outputs.tf new file mode 100644 index 0000000..24448ce --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/outputs.tf @@ -0,0 +1,113 @@ +output "id" { + description = "The ID of the instance" + value = try(module.ec2-vm.*.id, module.ec2-vm.*.id, "") +} + +output "arn" { + description = "The ARN of the instance" + value = try(module.ec2-vm.*.arn, "") +} + +output "capacity_reservation_specification" { + description = "Capacity reservation specification of the instance" + value = try(module.ec2-vm.*.capacity_reservation_specification, "") +} + +output "instance_state" { + description = "The state of the instance. One of: `pending`, `running`, `shutting-down`, `terminated`, `stopping`, `stopped`" + value = try(module.ec2-vm.*.instance_state, "") +} + +output "outpost_arn" { + description = "The ARN of the Outpost the instance is assigned to" + value = try(module.ec2-vm.*.outpost_arn, "") +} + +output "password_data" { + description = "Base-64 encoded encrypted password data for the instance. Useful for getting the administrator password for instances running Microsoft Windows. This attribute is only exported if `get_password_data` is true" + value = try(module.ec2-vm.*.password_data, "") +} + +output "primary_network_interface_id" { + description = "The ID of the instance's primary network interface" + value = try(module.ec2-vm.*.primary_network_interface_id, "") +} + +output "private_dns" { + description = "The private DNS name assigned to the instance. Can only be used inside the Amazon EC2, and only available if you've enabled DNS hostnames for your VPC" + value = try(module.ec2-vm.*.private_dns, "") +} + +output "public_dns" { + description = "The public DNS name assigned to the instance. For EC2-VPC, this is only available if you've enabled DNS hostnames for your VPC" + value = try(module.ec2-vm.*.public_dns, "") +} + +output "public_ip" { + description = "The public IP address assigned to the instance, if applicable. NOTE: If you are using an aws_eip with your instance, you should refer to the EIP's address directly and not use `public_ip` as this field will change after the EIP is attached" + value = try(module.ec2-vm.*.public_ip, "") +} + +output "private_ip" { + description = "The private IP address assigned to the instance." + value = try(module.ec2-vm.*.private_ip, "") +} + +output "ipv6_addresses" { + description = "The IPv6 address assigned to the instance, if applicable." + value = try(module.ec2-vm.*.ipv6_addresses, []) +} + +output "tags_all" { + description = "A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block" + value = try(module.ec2-vm.*.tags_all, {}) +} + +output "spot_bid_status" { + description = "The current bid status of the Spot Instance Request" + value = try(module.ec2-vm.*.spot_bid_status, "") +} + +output "spot_request_state" { + description = "The current request state of the Spot Instance Request" + value = try(module.ec2-vm.*.spot_request_state, "") +} + +output "spot_instance_id" { + description = "The Instance ID (if any) that is currently fulfilling the Spot Instance request" + value = try(module.ec2-vm.*.spot_instance_id, "") +} + +################################################################################ +# IAM Role / Instance Profile +################################################################################ + +output "iam_role_name" { + description = "The name of the IAM role" + value = try(module.ec2-vm.*.aws_iam_role.name, null) +} + +output "iam_role_arn" { + description = "The Amazon Resource Name (ARN) specifying the IAM role" + value = try(module.ec2-vm.*.aws_iam_role.arn, null) +} + +output "iam_role_unique_id" { + description = "Stable and unique string identifying the IAM role" + value = try(module.ec2-vm.*.aws_iam_role.unique_id, null) +} + +output "iam_instance_profile_arn" { + description = "ARN assigned by AWS to the instance profile" + value = try(module.ec2-vm.*.aws_iam_instance_profile.arn, null) +} + +output "iam_instance_profile_id" { + description = "Instance profile's ID" + value = try(module.ec2-vm.*.aws_iam_instance_profile.id, null) +} + +output "iam_instance_profile_unique" { + description = "Stable and unique string identifying the IAM instance profile" + value = try(module.ec2-vm.*.aws_iam_instance_profile.unique_id, null) +} \ No newline at end of file diff --git a/examples/gen-ai-xeon-opea-chatqna/providers.tf b/examples/gen-ai-xeon-opea-chatqna/providers.tf new file mode 100644 index 0000000..260a2e3 --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/providers.tf @@ -0,0 +1,4 @@ +provider "aws" { + # Environment Variables used for Authentication + region = var.region +} \ No newline at end of file diff --git a/examples/gen-ai-xeon-opea-chatqna/variables.tf b/examples/gen-ai-xeon-opea-chatqna/variables.tf new file mode 100644 index 0000000..26c0f99 --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/variables.tf @@ -0,0 +1,117 @@ +variable "region" { + description = "Target AWS region to deploy EC2 in." + type = string + default = "us-east-1" +} + +# Variable to add ingress rules to the security group. Replace the default values with the required ports and CIDR ranges. +variable "ingress_rules" { + type = list(object({ + from_port = number + to_port = number + protocol = string + cidr_blocks = string + })) + default = [ + { + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + + }, + { + from_port = 6379 + to_port = 6379 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + + }, + { + from_port = 8001 + to_port = 8001 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 6006 + to_port = 6006 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 6007 + to_port = 6007 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 6000 + to_port = 6000 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 7000 + to_port = 7000 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 8808 + to_port = 8808 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 8000 + to_port = 8000 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 9009 + to_port = 9009 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 9000 + to_port = 9000 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 8888 + to_port = 8888 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 5173 + to_port = 5173 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + }, + { + from_port = 5174 + to_port = 5174 + protocol = "tcp" + cidr_blocks = "0.0.0.0/0" + } + ] +} + +# Variable for how many VMs to build +variable "vm_count" { + description = "Number of VMs to build." + type = number + default = 1 +} + +# Variable for Huggingface Token +variable "huggingface_token" { + description = "Huggingface Token" + default = " " + type = string +} \ No newline at end of file diff --git a/examples/gen-ai-xeon-opea-chatqna/versions.tf b/examples/gen-ai-xeon-opea-chatqna/versions.tf new file mode 100644 index 0000000..ff6e689 --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/versions.tf @@ -0,0 +1,13 @@ +terraform { + required_version = ">=1.3.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.31" + } + cloudinit = { + source = "hashicorp/cloudinit" + version = ">=2.2.0" + } + } +} \ No newline at end of file From a878503a7c10b1b88290143a6570c1261470a57d Mon Sep 17 00:00:00 2001 From: Steve Fowler Date: Fri, 28 Jun 2024 09:42:07 -0500 Subject: [PATCH 2/2] fix typo --- examples/gen-ai-xeon-opea-chatqna/cloud_init.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml b/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml index 932bee0..cc166fa 100644 --- a/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml +++ b/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml @@ -8,7 +8,7 @@ package: runcmd: - apt install ansible -y - git clone https://github.com/intel/optimized-cloud-recipes.git /tmp/optimized-cloud-recipes - - cd /tmp/optimized-cloud-recipes/recipes/ai-opea-chatqna-xeon + - cd /tmp/optimized-cloud-recipes/recipes/ai-opea-chatqna-xeon - cp opea.sh /etc/profile.d/opea.sh - echo 'export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}' | sudo tee -a /etc/profile.d/opea.sh - chmod +x /etc/profile.d/opea.sh