From 6dbb5ae5e16d014a60a4d1b0aedf56f488d7f757 Mon Sep 17 00:00:00 2001 From: Varin Thakur Date: Tue, 7 Oct 2025 13:24:23 +0000 Subject: [PATCH 1/3] Update documentation for sap --- .../chains/sap_hana_sparql_qa_chain.mdx | 339 ++++++++++++++++++ .../graphs/sap_hana_rdf_graph.mdx | 224 ++++++++++++ src/oss/python/integrations/providers/sap.mdx | 29 ++ .../self_query/hanavector_self_query.mdx | 161 +++++++++ 4 files changed, 753 insertions(+) create mode 100644 src/oss/python/integrations/chains/sap_hana_sparql_qa_chain.mdx create mode 100644 src/oss/python/integrations/graphs/sap_hana_rdf_graph.mdx create mode 100644 src/oss/python/integrations/retrievers/self_query/hanavector_self_query.mdx diff --git a/src/oss/python/integrations/chains/sap_hana_sparql_qa_chain.mdx b/src/oss/python/integrations/chains/sap_hana_sparql_qa_chain.mdx new file mode 100644 index 000000000..b35138032 --- /dev/null +++ b/src/oss/python/integrations/chains/sap_hana_sparql_qa_chain.mdx @@ -0,0 +1,339 @@ +--- +title: QA over Knowledge Graphs with SAP HANA Cloud Knowledge Graph Engine +--- +## Setup and Installation + +To use this feature, install the `langchain-hana` package: + +```python +pip install langchain_hana +``` + +And then, create a connection to your SAP HANA Cloud instance. + +```python +import os + +from dotenv import load_dotenv +from hdbcli import dbapi + +# Load environment variables if needed +load_dotenv() + +# Establish connection to SAP HANA Cloud +connection = dbapi.connect( + address=os.environ.get("HANA_DB_ADDRESS"), + port=os.environ.get("HANA_DB_PORT"), + user=os.environ.get("HANA_DB_USER"), + password=os.environ.get("HANA_DB_PASSWORD"), + autocommit=True, + sslValidateCertificate=False, +) +``` + +`HanaSparqlQAChain` ties together: + +1. **Schema-aware SPARQL generation** +2. **Query execution** against SAP HANA +3. **Natural-language answer formatting** + +## Initialization + +You need: + +* An **LLM** to generate and interpret queries +* A **`HanaRdfGraph`** (with connection, `graph_uri`, and ontology) + +Follow the steps here [HanaRdfGraph](/oss/integrations/graphs/sap_hana_rdf_graph) to know more about creating a `HanaRdfGraph` instance. + +Import the HanaSparqlQAChain + +```python +from langchain_hana import HanaSparqlQAChain +``` + +```python +qa_chain = HanaSparqlQAChain.from_llm( + llm=llm, graph=graph, allow_dangerous_requests=True, verbose=True +) +``` + +## Pipeline Overview + +1. **SPARQL Generation** + * Uses `SPARQL_GENERATION_SELECT_PROMPT` + * Inputs: + * `schema` (Turtle from `graph.get_schema`) + * `prompt` (user’s question) +2. **Query Post-processing** + * Extracts the SPARQL code from the llm output. + * Inject `FROM ` if missing + * Ensure required common prefixes are declared (`rdf:`, `rdfs:`, `owl:`, `xsd:`) +3. **Execution** + * Calls `graph.query(generated_sparql)` +4. **Answer Formulation** x + * Uses `SPARQL_QA_PROMPT` + * Inputs: + * `context` (raw query results) + * `prompt` (original question) + +## Prompt Templates + +### "SPARQL Generation" prompt + +The `sparql_generation_prompt` is used to guide the LLM in generating a SPARQL query from the user question and the provided schema. + +### Answering prompt + +The `qa_prompt` instructs the LLM to create a natural language answer based solely on the database results. + +The default prompts can be found here: [`prompts.py`](https://github.com/SAP/langchain-integration-for-sap-hana-cloud/blob/main/langchain_hana/chains/graph_qa/prompts.py) + +## Customizing Prompts + +You can override the defaults at initialization: + +```python +qa_chain = HanaSparqlQAChain.from_llm( + llm=llm, + graph=graph, + allow_dangerous_requests=True, + verbose=True, + sparql_generation_prompt=YOUR_SPARQL_PROMPT, + qa_prompt=YOUR_QA_PROMPT +) +``` + +Or swap them afterward: + +```python +qa_chain.sparql_generation_chain.prompt = YOUR_SPARQL_PROMPT +qa_chain.qa_chain.prompt = YOUR_QA_PROMPT +``` + +> - `sparql_generation_prompt` must have the input variables: `["schema", "prompt"]` +> - `qa_prompt` must have the input variables: `["context", "prompt"]` + +## Example: Question Answering over a “Movies” Knowledge Graph + +**Prerequisite**: +You must have an SAP HANA Cloud instance with the **triple store** feature enabled. +For detailed instructions, refer to: [Enable Triple Store](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-knowledge-graph-guide/enable-triple-store/)
+Load the `kgdocu_movies` example data. See [Knowledge Graph Example](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-knowledge-graph-guide/knowledge-graph-example). + +Below we’ll: + +1. Instantiate the `HanaRdfGraph` pointing at our “movies” data graph +2. Wrap it in a `HanaSparqlQAChain` powered by an LLM +3. Ask natural-language questions and print out the chain’s responses + +This demonstrates how the LLM generates SPARQL under the hood, executes it against SAP HANA, and returns a human-readable answer. + +We'll use the `sap-ai-sdk-gen` package. Currently still installed via: + +`pip install "sap-ai-sdk-gen[all]"` + +Please check [sap-ai-sdk-gen](https://pypi.org/project/sap-ai-sdk-gen/) for future releases. + +First, create a connection to your SAP HANA Cloud instance. + +```python +import os + +from dotenv import load_dotenv +from hdbcli import dbapi + +# Load environment variables if needed +load_dotenv() + +# Establish connection to SAP HANA Cloud +connection = dbapi.connect( + address=os.environ.get("HANA_DB_ADDRESS"), + port=os.environ.get("HANA_DB_PORT"), + user=os.environ.get("HANA_DB_USER"), + password=os.environ.get("HANA_DB_PASSWORD"), + autocommit=True, + sslValidateCertificate=False, +) +``` + +Then, set up the knowledge graph instance + +```python +from gen_ai_hub.proxy.langchain.openai import ChatOpenAI +from langchain_hana import HanaRdfGraph, HanaSparqlQAChain + +# from langchain_openai import ChatOpenAI # or your chosen LLM +``` + +```python +# Set up the Knowledge Graph +graph_uri = "kgdocu_movies" + +graph = HanaRdfGraph( + connection=connection, + graph_uri=graph_uri, + auto_extract_ontology=True +) +``` + +```python +# a basic graph schema is extracted from the data graph. This schema will guide the LLM to generate a proper SPARQL query. +print(graph.get_schema) +``` + +```output +@prefix owl: . +@prefix rdfs: . +@prefix xsd: . + + a owl:ObjectProperty ; + rdfs:label "acted_in" ; + rdfs:domain ; + rdfs:range . + + a owl:DatatypeProperty ; + rdfs:label "dateOfBirth" ; + rdfs:domain ; + rdfs:range xsd:dateTime . + + a owl:ObjectProperty ; + rdfs:label "directed" ; + rdfs:domain ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:label "genre" ; + rdfs:domain ; + rdfs:range . + + a owl:ObjectProperty ; + rdfs:label "placeOfBirth" ; + rdfs:domain ; + rdfs:range . + + a owl:DatatypeProperty ; + rdfs:label "title" ; + rdfs:domain ; + rdfs:range xsd:string . + +rdfs:label a owl:DatatypeProperty ; + rdfs:label "label" ; + rdfs:domain , + , + , + ; + rdfs:range xsd:string . + + a owl:Class ; + rdfs:label "Director" . + + a owl:Class ; + rdfs:label "Genre" . + + a owl:Class ; + rdfs:label "Place" . + + a owl:Class ; + rdfs:label "Actor" . + + a owl:Class ; + rdfs:label "Film" . +``` + +After that, initialise the LLM. + +```python +# Initialize the LLM +llm = ChatOpenAI(proxy_model_name="gpt-4o", temperature=0) +``` + +Then, we create a SPARQL QA Chain + +```python +# Create a SPARQL QA Chain +chain = HanaSparqlQAChain.from_llm( + llm=llm, + verbose=True, + allow_dangerous_requests=True, + graph=graph, +) +``` + +```python +# output = chain.invoke("Which movies are in the data?") +# output = chain.invoke("In which movies did Keanu Reeves and Carrie-Anne Moss play in together") +# output = chain.invoke("which movie genres are in the data?") +# output = chain.invoke("which are the two most assigned movie genres?") +# output = chain.invoke("where were the actors of "Blade Runner" born?") +# output = chain.invoke("which actors acted together in a movie and were born in the same city?") +output = chain.invoke("which actors acted in Blade Runner?") + +print(output["result"]) +``` + +```output + + +> Entering new HanaSparqlQAChain chain... +Generated SPARQL: +\`\`\` +PREFIX kg: +PREFIX rdf: +PREFIX rdfs: +SELECT ?actor ?actorLabel +WHERE { + ?movie rdf:type kg:Film . + ?movie kg:title ?movieTitle . + ?actor kg:acted_in ?movie . + ?actor rdfs:label ?actorLabel . + FILTER(?movieTitle = "Blade Runner") +} +\`\`\` +Final SPARQL: + +PREFIX kg: +PREFIX rdf: +PREFIX rdfs: +SELECT ?actor ?actorLabel + +FROM +WHERE { + ?movie rdf:type kg:Film . + ?movie kg:title ?movieTitle . + ?actor kg:acted_in ?movie . + ?actor rdfs:label ?actorLabel . + FILTER(?movieTitle = "Blade Runner") +} + +Full Context: +actor,actorLabel +http://www.wikidata.org/entity/Q1353691,Morgan Paull +http://www.wikidata.org/entity/Q1372770,William Sanderson +http://www.wikidata.org/entity/Q358990,James Hong +http://www.wikidata.org/entity/Q498420,M. Emmet Walsh +http://www.wikidata.org/entity/Q81328,Q81328 +http://www.wikidata.org/entity/Q723780,Brion James +http://www.wikidata.org/entity/Q207596,Daryl Hannah +http://www.wikidata.org/entity/Q1691628,Joe Turkel +http://www.wikidata.org/entity/Q236702,Joanna Cassidy +http://www.wikidata.org/entity/Q213574,Rutger Hauer +http://www.wikidata.org/entity/Q3143555,Hy Pyke +http://www.wikidata.org/entity/Q211415,Edward James Olmos +http://www.wikidata.org/entity/Q230736,Sean Young + + +> Finished chain. +The actors who acted in Blade Runner are Morgan Paull, William Sanderson, James Hong, M. Emmet Walsh, Brion James, Daryl Hannah, Joe Turkel, Joanna Cassidy, Rutger Hauer, Hy Pyke, Edward James Olmos, and Sean Young. +``` + +## What’s happening under the hood? + +1. **SPARQL Generation** + The chain invokes the LLM with your Turtle-formatted ontology (`graph.get_schema`) and the user’s question using the `SPARQL_GENERATION_SELECT_PROMPT`. The LLM then emits a valid `SELECT` query tailored to your schema. +2. **Pre-processing & Execution** + * **Extract & clean**: Pull the raw SPARQL text out of the LLM’s response. + * **Inject graph context**: Add `FROM ` if it’s missing and ensure common prefixes (`rdf:`, `rdfs:`, `owl:`, `xsd:`) are declared. + * **Run on HANA**: Execute the finalized query via `HanaRdfGraph.query()` over your named graph. +3. **Answer Formulation** + The returned CSV (or Turtle) results feed into the LLM again—this time with the `SPARQL_QA_PROMPT`. The LLM produces a concise, human-readable answer strictly based on the retrieved data, without hallucination. diff --git a/src/oss/python/integrations/graphs/sap_hana_rdf_graph.mdx b/src/oss/python/integrations/graphs/sap_hana_rdf_graph.mdx new file mode 100644 index 000000000..4e1333d8f --- /dev/null +++ b/src/oss/python/integrations/graphs/sap_hana_rdf_graph.mdx @@ -0,0 +1,224 @@ +--- +title: SAP HANA Cloud Knowledge Graph Engine +--- +[SAP HANA Cloud Knowledge Graph](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-knowledge-graph-guide/sap-hana-cloud-sap-hana-database-knowledge-graph-engine-guide) is a fully integrated knowledge graph solution within the `SAP HANA Cloud` database." + +## Setup & Installation + +You must have an SAP HANA Cloud instance with the **triple store** feature enabled., +For detailed instructions, refer to: [Enable Triple Store](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-knowledge-graph-guide/enable-triple-store/) + +To use SAP HANA Knowledge Graph Engine and/or Vector Store Engine with LangChain, install the `langchain-hana` package: + +```python +pip install langchain_hana +``` + +First, create a connection to your SAP HANA Cloud instance. + +```python +import os + +from dotenv import load_dotenv +from hdbcli import dbapi + +# Load environment variables if needed +load_dotenv() + +# Establish connection to SAP HANA Cloud +connection = dbapi.connect( + address=os.environ.get("HANA_DB_ADDRESS"), + port=os.environ.get("HANA_DB_PORT"), + user=os.environ.get("HANA_DB_USER"), + password=os.environ.get("HANA_DB_PASSWORD"), + autocommit=True, + sslValidateCertificate=False, +) +``` + +Then, import the `HanaRdfGraph` Class + +```python +from langchain_hana import HanaRdfGraph +``` + +## Creating a HanaRdfGraph Instance + +The constructor requires: + +* **`connection`**: an active `hdbcli.dbapi.connect(...)` instance +* **`graph_uri`**: the named graph (or `"DEFAULT"`) where your RDF data lives +* **One of**: + 1. **`ontology_query`**: a SPARQL CONSTRUCT to extract schema triples + 2. **`ontology_uri`**: a hosted ontology graph URI + 3. **`ontology_local_file`** + **`ontology_local_file_format`**: a local Turtle/RDF file + 4. **`auto_extract_ontology=True`** (not recommended for production—see note) + +`graph_uri` vs. Ontology + +* **`graph_uri`**: + The named graph in your SAP HANA Cloud instance that contains your instance data (sometimes 100k+ triples). + If `None` or `"DEFAULT"` is provided, the default graph is used. +* **Ontology**: a lean schema (typically ~50-100 triples) describing classes, properties, domains, ranges, labels, comments, and subclass relationships. The ontology guides SPARQL generation and result interpretation. + +### Creating a graph instance with **DEFAULT** Graph + +More info on the DEFAULT graph can be found at [DEFAULT Graph and Named Graphs](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-knowledge-graph-guide/default-graph-and-named-graphs). + +```python + +graph = HanaRdfGraph( + connection=connection, + auto_extract_ontology=True, +) + +# graph = HanaRdfGraph( +# connection=connection, +# graph_uri="DEFAULT", +# auto_extract_ontology=True, +# ) + +# graph = HanaRdfGraph( +# connection=connection, +# graph_uri="", +# auto_extract_ontology=True, +# ) +``` + +### Creating a graph instance with a `graph_uri` + +```python +graph = HanaRdfGraph( + connection=connection, + graph_uri="http://example.org/movies", + auto_extract_ontology=True, +) +``` + +### Creating a graph instance with a remote `ontology_uri` + +Load the schema directly from a hosted graph URI. + +```python +graph = HanaRdfGraph( + connection=connection, + ontology_uri="", +) +``` + +### Creating a graph instance with a custom `ontology_query` + +Use a custom `CONSTRUCT` query to selectively extract schema triples. + +```python +ontology_query = """ + owl: + rdf: + rdfs: + xsd: + UCT {?cls rdf:type owl:Class . ?cls rdfs:label ?clsLabel . ?rel rdf:type ?propertyType . ?rel rdfs:label ?relLabel . ?rel rdfs:domain ?domain . ?rel rdfs:range ?range .} + kgdocu_movies> + { # get properties + ISTINCT ?domain ?rel ?relLabel ?propertyType ?range + + obj . + ain . + j a ?rangeClass .} + != rdf:type) + I(?obj) = true, owl:ObjectProperty, owl:DatatypeProperty) AS ?propertyType) + E(?rangeClass, DATATYPE(?obj)) AS ?range) + l) AS ?uriStr) # Convert URI to string + CE(?uriStr, "^.*[/#]", "") AS ?relLabel) + + get classes + NCT ?cls ?clsLabel + + s:subClassOf* ?cls . + cls)) . + AS ?uriStr) # Convert URI to string + ?uriStr, "^.*[/#]", "") AS ?clsLabel) + + + +""" + +# can provide the graph_uri param as well if needed +graph = HanaRdfGraph( + connection=connection, + ontology_query=ontology_query, +) +``` + +### Creating a graph instance with a Local rdf file + +(`ontology_local_file` + `ontology_local_file_format`): Load the schema from a local RDF ontology file. + +Supported RDF formats are `Turtle`, `RDF/XML`, `JSON-LD`, `N-Triples`, `Notation-3`, `Trig`, `Trix`, `N-Quads`. + +```python +graph = HanaRdfGraph( + connection=connection, + ontology_local_file="", # e.g., "ontology.ttl" + ontology_local_file_format="", # e.g., "Turtle", "RDF/XML", "JSON-LD", "N-Triples", "Notation-3", "Trig", "Trix", "N-Quads" +) +``` + +### Auto extraction of ontology + +(`auto_extract_ontology=True`): Infer schema information directly from your instance data. + +```python +graph = HanaRdfGraph( + connection=connection, + graph_uri="", + auto_extract_ontology=True, +) +``` + +> **Note**: Auto-extraction is **not** recommended for production—it omits important triples like `rdfs:label`, `rdfs:comment`, and `rdfs:subClassOf` in general. + +## Executing SPARQL Queries + +You can use the `query()` method to execute arbitrary SPARQL queries (`SELECT`, `ASK`, `CONSTRUCT`, etc.) on the data graph. + +The function has the following parameters + +* **query**: the SPARQL query string. +* **content_type**: the response format for the output (Default is CSV) + +Please use the following strings for the respective formats. + +* CSV: `"sparql-results+xml"` +* JSON: `"sparql-results+json"` +* XML: `"sparql-results+csv"` +* TSV: `"sparql-results+tsv"` + +> **Note**: CONSTRUCT and ASK Queries return `turtle` and `boolean` formats respectively. + +The given query lists all tuples in my DEFAULT graph. + +```python +query = """ + SELECT ?s ?p ?o + WHERE { + ?s ?p ?o . + } +""" + +result = graph.query(query) +print(result) +``` + +```output +s,p,o +http://example.com/Puppet,http://www.w3.org/2000/01/rdf-schema#label,Puppet +http://example.com/show,http://www.w3.org/2000/01/rdf-schema#domain,http://example.com/Puppet +http://example.com/name,http://www.w3.org/2000/01/rdf-schema#domain,http://example.com/Puppet +http://example.com/show,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2002/07/owl#DatatypeProperty +http://example.com/show,http://www.w3.org/2000/01/rdf-schema#range,http://www.w3.org/2001/XMLSchema#string +http://example.com/name,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2002/07/owl#DatatypeProperty +http://example.com/name,http://www.w3.org/2000/01/rdf-schema#range,http://www.w3.org/2001/XMLSchema#string +http://example.com/Puppet,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2002/07/owl#Class +http://example.com/name,http://www.w3.org/2000/01/rdf-schema#label,name +http://example.com/show,http://www.w3.org/2000/01/rdf-schema#label,show +``` diff --git a/src/oss/python/integrations/providers/sap.mdx b/src/oss/python/integrations/providers/sap.mdx index bd5e8ad8e..96b7cf5d9 100644 --- a/src/oss/python/integrations/providers/sap.mdx +++ b/src/oss/python/integrations/providers/sap.mdx @@ -31,3 +31,32 @@ See a [usage example](/oss/integrations/vectorstores/sap_hanavector). ```python from langchain_hana import HanaDB ``` + +## Self Query Retriever +>[SAP HANA Cloud Vector Engine](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-vector-engine-guide/sap-hana-cloud-sap-hana-database-vector-engine-guide) +> also provides a Self Query Retriever implementation using the `HanaTranslator` Class. + +See a [usage example](/oss/integrations/retrievers/self_query/hanavector_self_query). + +```python +from langchain_hana import HanaTranslator +``` + +## Graph +>[SAP HANA Cloud Knowledge Graph Engine](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-knowledge-graph-guide/sap-hana-cloud-sap-hana-database-knowledge-graph-engine-guide) +> provides support to utilise knowledge graphs through the `HanaRdfGraph` Class. + +See a [usage example](/oss/integrations/graphs/sap_hana_rdf_graph). + +```python +from langchain_hana import HanaRdfGraph +``` + +## Chains +A `SparqlQAChain` is also provided which can be used with `HanaRdfGraph` for SPARQL-QA tasks. + +See a [usage example](/oss/integrations/chains/sap_hana_sparql_qa_chain). + +```python +from langchain_hana import HanaSparqlQAChain +``` diff --git a/src/oss/python/integrations/retrievers/self_query/hanavector_self_query.mdx b/src/oss/python/integrations/retrievers/self_query/hanavector_self_query.mdx new file mode 100644 index 000000000..45335589b --- /dev/null +++ b/src/oss/python/integrations/retrievers/self_query/hanavector_self_query.mdx @@ -0,0 +1,161 @@ +--- +title: Self Querying with SAP HANA Cloud Vector Engine +--- +For more information on how to setup the SAP HANA vector store, take a look at the [documentation](/oss/integrations/vectorstores/sap_hanavector). + +We use the same setup here: + +```python +import os + +# Use OPENAI_API_KEY env variable +# os.environ["OPENAI_API_KEY"] = "Your OpenAI API key" +from hdbcli import dbapi + +# Use connection settings from the environment +connection = dbapi.connect( + address=os.environ.get("HANA_DB_ADDRESS"), + port=os.environ.get("HANA_DB_PORT"), + user=os.environ.get("HANA_DB_USER"), + password=os.environ.get("HANA_DB_PASSWORD"), + autocommit=True, + sslValidateCertificate=False, +) +``` + +To be able to self query with good performance we create additional metadata fields +for our vectorstore table in HANA: + +```python +# Create custom table with attribute +cur = connection.cursor() +cur.execute("DROP TABLE LANGCHAIN_DEMO_SELF_QUERY", ignoreErrors=True) +cur.execute( + ( + """CREATE TABLE "LANGCHAIN_DEMO_SELF_QUERY" ( + "name" NVARCHAR(100), "is_active" BOOLEAN, "id" INTEGER, "height" DOUBLE, + "VEC_TEXT" NCLOB, + "VEC_META" NCLOB, + "VEC_VECTOR" REAL_VECTOR + )""" + ) +) +``` + +Let's add some documents. + +```python +from langchain_community.vectorstores.hanavector import HanaDB +from langchain_core.documents import Document +from langchain_openai import OpenAIEmbeddings + +embeddings = OpenAIEmbeddings() + +# Prepare some test documents +docs = [ + Document( + page_content="First", + metadata={"name": "adam", "is_active": True, "id": 1, "height": 10.0}, + ), + Document( + page_content="Second", + metadata={"name": "bob", "is_active": False, "id": 2, "height": 5.7}, + ), + Document( + page_content="Third", + metadata={"name": "jane", "is_active": True, "id": 3, "height": 2.4}, + ), +] + +db = HanaDB( + connection=connection, + embedding=embeddings, + table_name="LANGCHAIN_DEMO_SELF_QUERY", + specific_metadata_columns=["name", "is_active", "id", "height"], +) + +# Delete already existing documents from the table +db.delete(filter={}) +db.add_documents(docs) +``` + +## Self querying + +Now for the main act: here is how to construct a SelfQueryRetriever for HANA vectorstore: + +```python +from langchain.chains.query_constructor.schema import AttributeInfo +from langchain.retrievers.self_query.base import SelfQueryRetriever +from langchain_community.query_constructors.hanavector import HanaTranslator +from langchain_openai import ChatOpenAI + +llm = ChatOpenAI(model="gpt-3.5-turbo") + +metadata_field_info = [ + AttributeInfo( + name="name", + description="The name of the person", + type="string", + ), + AttributeInfo( + name="is_active", + description="Whether the person is active", + type="boolean", + ), + AttributeInfo( + name="id", + description="The ID of the person", + type="integer", + ), + AttributeInfo( + name="height", + description="The height of the person", + type="float", + ), +] + +document_content_description = "A collection of persons" + +hana_translator = HanaTranslator() + +retriever = SelfQueryRetriever.from_llm( + llm, + db, + document_content_description, + metadata_field_info, + structured_query_translator=hana_translator, +) +``` + +Let's use this retriever to prepare a (self) query for a person: + +```python +query_prompt = "Which person is not active?" + +docs = retriever.invoke(input=query_prompt) +for doc in docs: + print("-" * 80) + print(doc.page_content, " ", doc.metadata) +``` + +We can also take a look at how the query is being constructed: + +```python +from langchain.chains.query_constructor.base import ( + StructuredQueryOutputParser, + get_query_constructor_prompt, +) + +prompt = get_query_constructor_prompt( + document_content_description, + metadata_field_info, +) +output_parser = StructuredQueryOutputParser.from_components() +query_constructor = prompt | llm | output_parser + +sq = query_constructor.invoke(input=query_prompt) + +print("Structured query: ", sq) + +print("Translated for hana vector store: ", hana_translator.visit_structured_query(sq)) +``` From 981e4c1340dca256d0b75d4ce4d7f0ef15af43c4 Mon Sep 17 00:00:00 2001 From: VarinThakur01 Date: Wed, 8 Oct 2025 16:55:45 +0200 Subject: [PATCH 2/3] Remove unneccessary parameter from db instance creation --- src/oss/python/integrations/chains/sap_hana_sparql_qa_chain.mdx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/oss/python/integrations/chains/sap_hana_sparql_qa_chain.mdx b/src/oss/python/integrations/chains/sap_hana_sparql_qa_chain.mdx index b35138032..86c85ef46 100644 --- a/src/oss/python/integrations/chains/sap_hana_sparql_qa_chain.mdx +++ b/src/oss/python/integrations/chains/sap_hana_sparql_qa_chain.mdx @@ -27,7 +27,6 @@ connection = dbapi.connect( user=os.environ.get("HANA_DB_USER"), password=os.environ.get("HANA_DB_PASSWORD"), autocommit=True, - sslValidateCertificate=False, ) ``` From 471cfe83ea25c95180ee891fd224531007e17ba4 Mon Sep 17 00:00:00 2001 From: VarinThakur01 Date: Wed, 8 Oct 2025 16:56:54 +0200 Subject: [PATCH 3/3] Remove unneccessary parameters in db connection creation --- src/oss/python/integrations/graphs/sap_hana_rdf_graph.mdx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/oss/python/integrations/graphs/sap_hana_rdf_graph.mdx b/src/oss/python/integrations/graphs/sap_hana_rdf_graph.mdx index 4e1333d8f..587394787 100644 --- a/src/oss/python/integrations/graphs/sap_hana_rdf_graph.mdx +++ b/src/oss/python/integrations/graphs/sap_hana_rdf_graph.mdx @@ -32,7 +32,6 @@ connection = dbapi.connect( user=os.environ.get("HANA_DB_USER"), password=os.environ.get("HANA_DB_PASSWORD"), autocommit=True, - sslValidateCertificate=False, ) ```