diff --git a/examples/gcp-linux-fastchat-simple/README.md b/examples/gcp-linux-fastchat-simple/README.md index e45999f..bd542af 100644 --- a/examples/gcp-linux-fastchat-simple/README.md +++ b/examples/gcp-linux-fastchat-simple/README.md @@ -6,53 +6,40 @@ © Copyright 2023, Intel Corporation -## GCP C3 4th Gen Xeon(code named Sapphire Rapids) & Intel® Cloud Optimized Recipe for FastChat +## GCP C3 4th Gen Xeon(code named Sapphire Rapids) & Intel® Optimized Cloud Recipe for FastChat This demo will showcase Large Language Model(LLM) CPU inference using 4th Gen Xeon Scalable Processors on GCP. ## Usage -You may need to change the appropriate IAM settings as described here: https://cloud.google.com/docs/terraform/get-started-with-terraform - 1. Log on to GCP Portal 2. Enter the GCP Cloud Shell (terminal button on top right of page) 3. Run the following commands in order: -`git clone https://github.com/intel/terraform-intel-gcp-vm.git` -`cd terraform-intel-gcp-vm/examples/gcp-linux-fastchat-simple` +```bash +git clone https://github.com/intel/terraform-intel-gcp-vm.git +cd terraform-intel-gcp-vm/examples/gcp-linux-fastchat-simple +terraform init +terraform apply -`terraform init` +# (enter your GCP project ID and "yes" to confirm) -`terraform apply` +``` ## Running the Demo - 1. **Wait ~10 minutes** for the Recipe to download/install FastChat and the LLM model before continuing 2. SSH into newly created GCP VM 3. **Run `source /usr/local/bin/run_demo.sh`** -4. If you did not choose to open a port, your app will be proxied through gradio. See https://xxxxxxx.gradio.live URL that is generated during the gradio run. -5. Otherwise, if you choose to open port 7860 in main.tf: on your computer, open a browser and navigate to **http://:7860**. Get your Public IP from the "Compute Engine" section of the GCP console. +4. On your local computer, open a browser and navigate to **http://:7860**. Get your Public IP from the "Compute Engine" section of the VM in the GCP console. +5. Or use the https://xxxxxxx.gradio.live URL that is generated during the demo startup(see on screen logs)

- Gradio_Output + Gradio_Output

- ---- KNOWN ISSUE --- - -The demo may initially fail. In this case, run - -`pip install gradio==3.10` - -`pip install gradio==3.35.2` - -Then, run: - -`source /usr/local/bin/run_demo.sh` - -And navigate again using your browser. +Note: You may need permissions to run Terraform https://cloud.google.com/docs/terraform/get-started-with-terraform ## Pre-requisites for running on a Workstation (bypass if using GCP Cloud Shell) diff --git a/examples/gcp-linux-fastchat-simple/main.tf b/examples/gcp-linux-fastchat-simple/main.tf index 18249cf..85f444b 100644 --- a/examples/gcp-linux-fastchat-simple/main.tf +++ b/examples/gcp-linux-fastchat-simple/main.tf @@ -10,9 +10,9 @@ module "linux_vm" { project = var.project boot_image_project = "ubuntu-os-cloud" boot_image_family = "ubuntu-2204-lts" - name = "intel-fastchat-test" + name = "intel-fastchat" zone = "us-central1-a" - machine_type = "c3-highmem-22" + machine_type = "c3-standard-44" tags = ["fschat"] user_data = templatefile("./cloud_init.yml", {}) access_config = [{ @@ -22,18 +22,16 @@ module "linux_vm" { }, ] } -# FastChat Gradio Webserver can be accessed by the Gradio Proxy. See https://xxxxxxx.gradio.live URL that is generated during the gradio run -# Otherwise uncomment this to allow access to the VM Public IP on Port 7860, http://:7860 -# resource "google_compute_firewall" "rules" { -# project = var.project -# name = "fastchat-firewall" -# network = "default" -# description = "Allows access to FastChat Webserver." +resource "google_compute_firewall" "rules" { + project = var.project + name = "fastchat-firewall" + network = "default" + description = "Allows access to FastChat Webserver." -# allow { -# protocol = "tcp" -# ports = ["7860"] -# } -# source_ranges = [ "0.0.0.0/0" ] -# target_tags = ["fschat"] -# } + allow { + protocol = "tcp" + ports = ["7860"] + } + source_ranges = [ "0.0.0.0/0" ] + target_tags = ["fschat"] +}