|
147 | 147 | },
|
148 | 148 | {
|
149 | 149 | "cell_type": "code",
|
150 |
| - "execution_count": 1, |
| 150 | + "execution_count": 2, |
151 | 151 | "id": "1b2c3f4e",
|
152 | 152 | "metadata": {},
|
153 |
| - "outputs": [ |
154 |
| - { |
155 |
| - "name": "stdout", |
156 |
| - "output_type": "stream", |
157 |
| - "text": [ |
158 |
| - "LLM config will be updated\n" |
159 |
| - ] |
160 |
| - } |
161 |
| - ], |
| 153 | + "outputs": [], |
162 | 154 | "source": [
|
163 | 155 | "import os\n",
|
164 | 156 | "from pathlib import Path\n",
|
|
880 | 872 | },
|
881 | 873 | {
|
882 | 874 | "cell_type": "code",
|
883 |
| - "execution_count": null, |
| 875 | + "execution_count": 16, |
884 | 876 | "id": "d0bab20b",
|
885 | 877 | "metadata": {},
|
886 | 878 | "outputs": [],
|
|
918 | 910 | "id": "e11e73cf",
|
919 | 911 | "metadata": {},
|
920 | 912 | "outputs": [
|
921 |
| - { |
922 |
| - "name": "stderr", |
923 |
| - "output_type": "stream", |
924 |
| - "text": [ |
925 |
| - "D:\\openvino_notebooks\\openvino_env\\Lib\\site-packages\\openvino\\runtime\\__init__.py:10: DeprecationWarning: The `openvino.runtime` module is deprecated and will be removed in the 2026.0 release. Please replace `openvino.runtime` with `openvino`.\n", |
926 |
| - " warnings.warn(\n" |
927 |
| - ] |
928 |
| - }, |
929 | 913 | {
|
930 | 914 | "data": {
|
931 | 915 | "application/vnd.jupyter.widget-view+json": {
|
|
1198 | 1182 | "\n",
|
1199 | 1183 | "[back to top ⬆️](#Table-of-contents:)\n",
|
1200 | 1184 | "\n",
|
1201 |
| - "Now a local rerank model of OpenVINO can be accelerated on NPU by using the `OpenVINOReranker` class without PyTorch requirements.\n", |
| 1185 | + "Now a local rerank model of OpenVINO can be run via [GenAI `TextRerankPipeline`](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/rag) through `OpenVINOGenAIReranker` and accelerated on NPU by using the `OpenVINOReranker` class without PyTorch requirements.\n", |
1202 | 1186 | "\n",
|
1203 | 1187 | "> **Note**: Rerank can be skipped in RAG.\n"
|
1204 | 1188 | ]
|
1205 | 1189 | },
|
1206 | 1190 | {
|
1207 | 1191 | "cell_type": "code",
|
1208 |
| - "execution_count": 25, |
| 1192 | + "execution_count": null, |
1209 | 1193 | "id": "b67b39f2-8394-45fb-9b2b-ea63e267a2d3",
|
1210 | 1194 | "metadata": {},
|
1211 | 1195 | "outputs": [],
|
1212 | 1196 | "source": [
|
1213 |
| - "from ov_langchain_helper import OpenVINOReranker\n", |
| 1197 | + "from ov_langchain_helper import OpenVINOReranker, OpenVINOGenAIReranker\n", |
1214 | 1198 | "\n",
|
1215 | 1199 | "rerank_model_name = rerank_model_id.value\n",
|
1216 | 1200 | "rerank_model_kwargs = {\"device_name\": rerank_device.value}\n",
|
|
1235 | 1219 | " top_n=rerank_top_n,\n",
|
1236 | 1220 | " )\n",
|
1237 | 1221 | "else:\n",
|
1238 |
| - " reranker = OpenVINOReranker(\n", |
| 1222 | + " reranker = OpenVINOGenAIReranker.from_model_path(\n", |
1239 | 1223 | " model_path=rerank_model_name,\n",
|
1240 |
| - " model_kwargs=rerank_model_kwargs,\n", |
1241 |
| - " top_n=rerank_top_n,\n", |
| 1224 | + " device=rerank_device.value,\n", |
| 1225 | + " config_kwargs={\"top_n\": rerank_top_n},\n", |
1242 | 1226 | " )"
|
1243 | 1227 | ]
|
1244 | 1228 | },
|
|
1567 | 1551 | "\n",
|
1568 | 1552 | " if rerank_device.value == \"NPU\":\n",
|
1569 | 1553 | " vector_search_top_k = vector_search_top_k_npu\n",
|
1570 |
| - " if vector_rerank_top_n > vector_search_top_k:\n", |
1571 |
| - " gr.Warning(\"Search top k must >= Rerank top n\")\n", |
| 1554 | + " if vector_rerank_top_n > vector_search_top_k:\n", |
| 1555 | + " gr.Warning(\"Search top k must >= Rerank top n\")\n", |
1572 | 1556 | "\n",
|
1573 | 1557 | " documents = []\n",
|
1574 | 1558 | " for doc in docs:\n",
|
|
1586 | 1570 | " search_kwargs = {\"k\": vector_search_top_k}\n",
|
1587 | 1571 | " retriever = db.as_retriever(search_kwargs=search_kwargs, search_type=search_method)\n",
|
1588 | 1572 | " if run_rerank:\n",
|
1589 |
| - " reranker.top_n = vector_rerank_top_n\n", |
| 1573 | + " if rerank_device.value == \"NPU\":\n", |
| 1574 | + " reranker.top_n = vector_rerank_top_n\n", |
1590 | 1575 | " retriever = ContextualCompressionRetriever(base_compressor=reranker, base_retriever=retriever)\n",
|
1591 | 1576 | " prompt = PromptTemplate.from_template(rag_prompt_template)\n",
|
1592 | 1577 | " combine_docs_chain = create_stuff_documents_chain(llm, prompt)\n",
|
|
1623 | 1608 | " retriever = db.as_retriever(search_kwargs=search_kwargs, search_type=search_method)\n",
|
1624 | 1609 | " if run_rerank:\n",
|
1625 | 1610 | " retriever = ContextualCompressionRetriever(base_compressor=reranker, base_retriever=retriever)\n",
|
1626 |
| - " reranker.top_n = vector_rerank_top_n\n", |
| 1611 | + " if rerank_device.value == \"NPU\":\n", |
| 1612 | + " reranker.top_n = vector_rerank_top_n\n", |
1627 | 1613 | " rag_chain = create_retrieval_chain(retriever, combine_docs_chain)\n",
|
1628 | 1614 | "\n",
|
1629 | 1615 | " return \"Vector database is Ready\"\n",
|
|
1735 | 1721 | " update_retriever_fn=update_retriever,\n",
|
1736 | 1722 | " model_name=llm_model_id.value,\n",
|
1737 | 1723 | " language=model_language.value,\n",
|
| 1724 | + " rerank_device=rerank_device.value,\n", |
1738 | 1725 | ")\n",
|
1739 | 1726 | "\n",
|
1740 | 1727 | "try:\n",
|
|
0 commit comments