opea-project · lvliang-intel · Sep 29, 2025 · Sep 23, 2025 · Sep 23, 2025 · Sep 24, 2025
@@ -1,30 +1,17 @@
 FROM python:3.11-slim
 
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
-
-RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
-    libjemalloc-dev \
-    libmagic1 \
-    libglib2.0-0 \
-    poppler-utils \
-    tesseract-ocr
-
-RUN apt-get update && apt-get install -y gnupg wget git
-RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
-    gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
-RUN echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
-    tee /etc/apt/sources.list.d/intel-gpu-jammy.list
-RUN apt-get update && apt-get install -y \
-    intel-opencl-icd intel-level-zero-gpu \
-    intel-level-zero-gpu-raytracing \
-    intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
-    libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
-    libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
-    mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo 
+RUN apt-get update && apt-get install -y gnupg2 wget git
+RUN apt-get remove -y libze-intel-gpu1 libigc1 libigdfcl1 libze-dev || true; \
+    apt-get update; \
+    apt-get install -y curl
+RUN curl -sL 'https://keyserver.ubuntu.com/pks/lookup?fingerprint=on&op=get&search=0x0C0E6AF955CE463C03FC51574D098D70AFBE5E1F' | tee /etc/apt/trusted.gpg.d/driver.asc
+RUN echo -e "Types: deb\nURIs: https://ppa.launchpadcontent.net/kobuk-team/intel-graphics/ubuntu/\nSuites: plucky\nComponents: main\nSigned-By: /etc/apt/trusted.gpg.d/driver.asc" > /etc/apt/sources.list.d/driver.sources
+RUN apt-get update && apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && apt-get install -y libze-dev intel-ocloc libze-intel-gpu-raytracing
 
 RUN useradd -m -s /bin/bash user && \
     mkdir -p /home/user && \
-    chown -R user /home/user/ 
+    chown -R user /home/user/
 
 RUN mkdir /templates && \
     chown -R user /templates
@@ -33,17 +20,19 @@ RUN chown -R user /templates/default_prompt.txt
 
 COPY ./edgecraftrag /home/user/edgecraftrag
 
-RUN mkdir -p /home/user/ui_cache 
+RUN mkdir -p /home/user/ui_cache
 ENV UI_UPLOAD_PATH=/home/user/ui_cache
 
 USER user
 
 WORKDIR /home/user/edgecraftrag
-RUN pip install --no-cache-dir --upgrade pip setuptools==70.0.0 && \
-    pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt
+RUN pip3 install --no-cache-dir --upgrade setuptools==70.0.0 --break-system-packages && \
+    pip3 install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt --break-system-packages
+
+RUN pip3 install --no-cache-dir docarray==0.40.0 --break-system-packages
 
 WORKDIR /home/user/
 RUN git clone https://github.com/openvinotoolkit/openvino.genai.git genai
 ENV PYTHONPATH="$PYTHONPATH:/home/user/genai/tools/llm_bench"
 
-ENTRYPOINT ["python", "-m", "edgecraftrag.server"]
+ENTRYPOINT ["python3", "-m", "edgecraftrag.server"]
@@ -5,6 +5,12 @@ Retrieval-Augmented Generation system for edge solutions. It is designed to
 curate the RAG pipeline to meet hardware requirements at edge with guaranteed
 quality and performance.
 
+## What's New
+
+1. Support Intel Arc B60 for model inference
+2. support KBadmin for knowledge base management
+3. support Experience Injection module in UI
+
 ## Table of contents
 
 1. [Architecture](#architecture)

@@ -10,25 +10,27 @@ This document outlines the deployment process for Edge Craft Retrieval-Augmented
 
 This section describes how to quickly deploy and test the EdgeCraftRAG service manually on Intel® Arc® platform. The basic steps are:
 
-1. [Prerequisites](#prerequisites)
-2. [Access the Code](#access-the-code)
-3. [Prepare models](#prepare-models)
-4. [Prepare env variables and configurations](#prepare-env-variables-and-configurations)
-5. [Configure the Deployment Environment](#configure-the-deployment-environment)
-6. [Deploy the Service Using Docker Compose](#deploy-the-service-using-docker-compose)
-7. [Access UI](#access-ui)
-8. [Cleanup the Deployment](#cleanup-the-deployment)
+1. [Prerequisites](#1-prerequisites)
+2. [Access the Code](#2-access-the-code)
+3. [Prepare models](#3-prepare-models)
+4. [Prepare env variables and configurations](#4-prepare-env-variables-and-configurations)
+5. [Deploy the Service on Arc A770 Using Docker Compose](#5-deploy-the-service-on-intel-gpu-using-docker-compose)
+6. [Access UI](#6-access-ui)
+7. [Cleanup the Deployment](#7-cleanup-the-deployment)
 
-### Prerequisites
+### 1. Prerequisites
 
 EC-RAG supports vLLM deployment(default method) and local OpenVINO deployment for Intel Arc GPU. Prerequisites are shown as below:  
 Hardware: Intel Arc A770  
 OS: Ubuntu Server 22.04.1 or newer (at least 6.2 LTS kernel)  
 Driver & libraries: please to [Installing GPUs Drivers](https://dgpu-docs.intel.com/driver/installation-rolling.html#installing-gpu-drivers) for detailed driver & libraries setup
 
+Hardware: Intel Arc B60  
+please to [Install Native Environment](https://github.com/intel/llm-scaler/tree/main/vllm#11-install-native-environment) for detailed setup
+
 Below steps are based on **vLLM** as inference engine, if you want to choose **OpenVINO**, please refer to [OpenVINO Local Inference](../../../../docs/Advanced_Setup.md#openvino-local-inference)
 
-### Access the Code
+### 2. Access the Code
 
 Clone the GenAIExample repository and access the EdgeCraftRAG Intel® Arc® platform Docker Compose files and supporting scripts:
 
@@ -43,7 +45,7 @@ Checkout a released version, such as v1.3:
 git checkout v1.3
 ```
 
-### Prepare models
+### 3. Prepare models
 
 ```bash
 # Prepare models for embedding, reranking:
@@ -62,7 +64,7 @@ modelscope download --model $LLM_MODEL --local_dir "${MODEL_PATH}/${LLM_MODEL}"
 # huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}"
 ```
 
-### Prepare env variables and configurations
+### 4. Prepare env variables and configurations
 
 Below steps are for single Intel Arc GPU inference, if you want to setup multi Intel Arc GPUs inference, please refer to [Multi-ARC Setup](../../../../docs/Advanced_Setup.md#multi-arc-setup)
 
@@ -77,32 +79,23 @@ export HOST_IP=$ip_address # Your host ip
 export VIDEOGROUPID=$(getent group video | cut -d: -f3)
 export RENDERGROUPID=$(getent group render | cut -d: -f3)
 
-# If you have a proxy configured, uncomment below line
-# export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server
-# export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server
+# If you have a proxy configured, execute below line
+export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server
+export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server
 # If you have a HF mirror configured, it will be imported to the container
 # export HF_ENDPOINT=https://hf-mirror.com # your HF mirror endpoint"
 
 # Make sure all 3 folders have 1000:1000 permission, otherwise
-# chown 1000:1000 ${MODEL_PATH} ${PWD} # the default value of DOC_PATH and TMPFILE_PATH is PWD ,so here we give permission to ${PWD}
+chown 1000:1000 ${MODEL_PATH} ${PWD} # the default value of DOC_PATH and TMPFILE_PATH is PWD ,so here we give permission to ${PWD}
 # In addition, also make sure the .cache folder has 1000:1000 permission, otherwise
-# chown 1000:1000 -R $HOME/.cache
+chown 1000:1000 -R $HOME/.cache
 ```
 
 For more advanced env variables and configurations, please refer to [Prepare env variables for vLLM deployment](../../../../docs/Advanced_Setup.md#prepare-env-variables-for-vllm-deployment)
 
-#### Generate nginx config file
-
-```bash
-export VLLM_SERVICE_PORT_0=8100 # You can set your own port for vllm service
-# Generate your nginx config file
-# nginx-conf-generator.sh requires 2 parameters: DP_NUM and output filepath
-bash nginx/nginx-conf-generator.sh 1 nginx/nginx.conf
-# set NGINX_CONFIG_PATH
-export NGINX_CONFIG_PATH="${PWD}/nginx/nginx.conf"
-```
+### 5. Deploy the Service on Intel GPU Using Docker Compose
 
-### Deploy the Service Using Docker Compose
+set Milvus DB and chat history round for inference:
 
 ```bash
 # EC-RAG support Milvus as persistent database, by default milvus is disabled, you can choose to set MILVUS_ENABLED=1 to enable it
@@ -112,12 +105,45 @@ export MILVUS_ENABLED=0
 
 # EC-RAG support chat history round setting, by default chat history is disabled, you can set CHAT_HISTORY_ROUND to control it
 # export CHAT_HISTORY_ROUND= # change to your preference
+```
+
+#### option a. Deploy the Service on Arc A770 Using Docker Compose
+
+```bash
+export VLLM_SERVICE_PORT_0=8100 # You can set your own port for vllm service
+# Generate your nginx config file
+# nginx-conf-generator.sh requires 2 parameters: DP_NUM and output filepath
+bash nginx/nginx-conf-generator.sh 1 nginx/nginx.conf
+# set NGINX_CONFIG_PATH
+export NGINX_CONFIG_PATH="${PWD}/nginx/nginx.conf"
 
 # Launch EC-RAG service with compose
 docker compose -f docker_compose/intel/gpu/arc/compose_vllm.yaml up -d
 ```
 
-### Access UI
+#### option b. Deploy the Service on Arc B60 Using Docker Compose
+
+```bash
+# Besides MILVUS_ENABLED and CHAT_HISTORY_ROUND, below environments are exposed for vLLM config, you can change them to your preference:
+# export VLLM_SERVICE_PORT_B60=8086
+# export DTYPE=float16
+# export TP=1 # for multi GPU, you can change TP value
+# export DP=1
+# export ZE_AFFINITY_MASK=0 # for multi GPU, you can export ZE_AFFINITY_MASK=0,1,2...
+# export ENFORCE_EAGER=1
+# export TRUST_REMOTE_CODE=1
+# export DISABLE_SLIDING_WINDOW=1
+# export GPU_MEMORY_UTIL=0.8
+# export NO_ENABLE_PREFIX_CACHING=1
+# export MAX_NUM_BATCHED_TOKENS=8192
+# export DISABLE_LOG_REQUESTS=1
+# export MAX_MODEL_LEN=49152
+# export BLOCK_SIZE=64
+# export QUANTIZATION=fp8
+docker compose -f docker_compose/intel/gpu/arc/compose_vllm_b60.yaml up -d
+```
+
+### 6. Access UI
 
 Open your browser, access http://${HOST_IP}:8082
 
@@ -126,12 +152,13 @@ Open your browser, access http://${HOST_IP}:8082
 Below is the UI front page, for detailed operations on UI and EC-RAG settings, please refer to [Explore_Edge_Craft_RAG](../../../../docs/Explore_Edge_Craft_RAG.md)
 ![front_page](../../../../assets/img/front_page.png)
 
-### Cleanup the Deployment
+### 7. Cleanup the Deployment
 
 To stop the containers associated with the deployment, execute the following command:
 
 ```
 docker compose -f docker_compose/intel/gpu/arc/compose_vllm.yaml down
+# or docker compose -f docker_compose/intel/gpu/arc/compose_vllm_b60.yaml down
 ```
 
 All the EdgeCraftRAG containers will be stopped and then removed on completion of the "down" command.

@@ -71,7 +71,6 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_ENDPOINT: ${HF_ENDPOINT}
       vLLM_ENDPOINT: ${vLLM_ENDPOINT:-http://${HOST_IP}:${NGINX_PORT:-8086}}
       ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
       MAX_MODEL_LEN: ${MAX_MODEL_LEN:-5000}
@@ -80,7 +79,6 @@ services:
       - ${MODEL_PATH:-${PWD}}:/home/user/models
       - ${DOC_PATH:-${PWD}}:/home/user/docs
       - ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
-      - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
       - ${PROMPT_PATH:-${PWD}}:/templates/custom
     restart: always
     ports:

@@ -72,17 +72,15 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_ENDPOINT: ${HF_ENDPOINT}
       vLLM_ENDPOINT: ${vLLM_ENDPOINT:-http://${HOST_IP}:${NGINX_PORT:-8086}}
       LLM_MODEL: ${LLM_MODEL}
       ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
-      MAX_MODEL_LEN: ${MAX_MODEL_LEN:-5000}
+      MAX_MODEL_LEN: ${MAX_MODEL_LEN:-10240}
       CHAT_HISTORY_ROUND: ${CHAT_HISTORY_ROUND:-0}
     volumes:
       - ${MODEL_PATH:-${PWD}}:/home/user/models
       - ${DOC_PATH:-${PWD}}:/home/user/docs
       - ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
-      - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
       - ${PROMPT_PATH:-${PWD}}:/templates/custom
     restart: always
     ports:
@@ -157,7 +155,6 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_ENDPOINT: ${HF_ENDPOINT}
       MODEL_PATH: "/llm/models"
       SERVED_MODEL_NAME: ${LLM_MODEL}
       TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1}