Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
848 changes: 814 additions & 34 deletions libs/partners/qdrant/langchain_qdrant/qdrant.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import uuid

import pytest
from qdrant_client import AsyncQdrantClient, models

from langchain_qdrant import QdrantVectorStore, RetrievalMode
from tests.integration_tests.common import (
ConsistentFakeEmbeddings,
ConsistentFakeSparseEmbeddings,
)
from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes


@pytest.mark.parametrize("location", qdrant_locations())
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
async def test_async_add_texts_basic(
location: str, retrieval_mode: RetrievalMode
) -> None:
"""Test async basic add_texts functionality."""
collection_name = uuid.uuid4().hex

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=retrieval_mode,
sparse_embedding=ConsistentFakeSparseEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)

# Add initial texts
texts1 = ["foo", "bar"]
ids1 = await vec_store.aadd_texts(texts1)
assert len(ids1) == 2

# Add more texts
texts2 = ["baz", "qux"]
ids2 = await vec_store.aadd_texts(texts2)
assert len(ids2) == 2

# Verify all texts are in the collection
async_client = vec_store.client
assert isinstance(async_client, AsyncQdrantClient)
count_result = await async_client.count(collection_name)
assert 4 == count_result.count

# Test search functionality
results = await vec_store.asimilarity_search("foo", k=1)
assert len(results) == 1
assert results[0].page_content == "foo"


@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_add_texts_with_filters(location: str) -> None:
"""Test async add_texts and search with filters."""
collection_name = uuid.uuid4().hex

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)

texts = ["Red apple", "Blue apple", "Green apple"]
metadatas = [
{"color": "red", "type": "fruit"},
{"color": "blue", "type": "fruit"},
{"color": "green", "type": "fruit"},
]

await vec_store.aadd_texts(texts, metadatas=metadatas)

# Test search with filter
filter_condition = models.Filter(
must=[
models.FieldCondition(
key="metadata.color", match=models.MatchValue(value="red")
)
]
)

results = await vec_store.asimilarity_search("apple", k=3, filter=filter_condition)

assert len(results) == 1
assert results[0].page_content == "Red apple"
assert results[0].metadata["color"] == "red"


@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_add_texts_with_custom_ids(location: str) -> None:
"""Test async add_texts with custom IDs."""
collection_name = uuid.uuid4().hex

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)

texts = ["First document", "Second document"]
custom_ids = [
"fa38d572-4c31-4579-aedc-1960d79df6df",
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
]

returned_ids = await vec_store.aadd_texts(texts, ids=custom_ids)

# Should return the same IDs we provided
assert returned_ids == custom_ids

# Verify documents can be retrieved by custom IDs
docs = await vec_store.aget_by_ids(custom_ids)
assert len(docs) == 2

contents = [doc.page_content for doc in docs]
assert "First document" in contents
assert "Second document" in contents
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import uuid

import pytest
from langchain_core.documents import Document
from qdrant_client import AsyncQdrantClient

from langchain_qdrant import QdrantVectorStore, RetrievalMode
from tests.integration_tests.common import (
ConsistentFakeEmbeddings,
ConsistentFakeSparseEmbeddings,
assert_documents_equals,
)
from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes


@pytest.mark.parametrize("location", qdrant_locations())
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
async def test_async_vectorstore_from_texts(
location: str, retrieval_mode: RetrievalMode
) -> None:
"""Test end to end QdrantVectorStore async construction from texts."""
collection_name = uuid.uuid4().hex

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=retrieval_mode,
sparse_embedding=ConsistentFakeSparseEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)

# Add texts using async method
await vec_store.aadd_texts(["Lorem ipsum dolor sit amet", "Ipsum dolor sit amet"])

# Verify count using AsyncQdrantClient
async_client = vec_store.client
assert isinstance(async_client, AsyncQdrantClient)
count_result = await async_client.count(collection_name)
assert 2 == count_result.count


@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_qdrant_similarity_search(location: str) -> None:
"""Test QdrantVectorStore async similarity search."""
collection_name = uuid.uuid4().hex

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)

await vec_store.aadd_texts(["foo", "bar", "baz"])

# Test async similarity search
output = await vec_store.asimilarity_search("foo", k=1)
assert len(output) == 1
# Use assert_documents_equals which doesn't assume ordering
assert_documents_equals(actual=output, expected=[Document(page_content="foo")])


@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_qdrant_delete(location: str) -> None:
"""Test QdrantVectorStore async delete functionality."""
collection_name = uuid.uuid4().hex
texts = ["foo", "bar", "baz"]
ids = [
"fa38d572-4c31-4579-aedc-1960d79df6df",
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
"b4c1aa36-d6ab-4fb2-8a94-56674fd27485",
]

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)

await vec_store.aadd_texts(texts, ids=ids)

async_client = vec_store.client
assert isinstance(async_client, AsyncQdrantClient)

# Verify all texts are added
count_result = await async_client.count(collection_name)
assert 3 == count_result.count

# Delete one document
result = await vec_store.adelete([ids[1]]) # Delete the second document
assert result is True

# Verify deletion
count_result = await async_client.count(collection_name)
assert 2 == count_result.count


@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_qdrant_add_documents(location: str) -> None:
"""Test QdrantVectorStore async add documents functionality."""
collection_name = uuid.uuid4().hex

documents = [
Document(page_content="foo", metadata={"page": 1}),
Document(page_content="bar", metadata={"page": 2}),
Document(page_content="baz", metadata={"page": 3}),
]

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)

# Test async add documents
ids = await vec_store.aadd_documents(documents)
assert len(ids) == 3
assert all(isinstance(id_, str) for id_ in ids)

async_client = vec_store.client
assert isinstance(async_client, AsyncQdrantClient)

# Verify documents are added
count_result = await async_client.count(collection_name)
assert 3 == count_result.count
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import uuid

import pytest
from langchain_core.documents import Document

from langchain_qdrant import QdrantVectorStore, RetrievalMode
from tests.integration_tests.common import ConsistentFakeEmbeddings
from tests.integration_tests.fixtures import qdrant_locations


@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_max_marginal_relevance_search_basic(location: str) -> None:
"""Test basic async max marginal relevance search functionality."""
collection_name = uuid.uuid4().hex

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=RetrievalMode.DENSE, # MMR only works with dense
collection_name=collection_name,
client_options={"location": location},
)

texts = ["apple", "banana", "cherry", "apple pie", "apple juice"]
await vec_store.aadd_texts(texts)

# Test basic MMR search
results = await vec_store.amax_marginal_relevance_search("apple", k=3, fetch_k=5)

assert len(results) <= 3
assert all(isinstance(doc, Document) for doc in results)

# First result should be most similar
assert "apple" in results[0].page_content.lower()


@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_max_marginal_relevance_search_by_vector(location: str) -> None:
"""Test async MMR search by vector."""
collection_name = uuid.uuid4().hex

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=RetrievalMode.DENSE,
collection_name=collection_name,
client_options={"location": location},
)

texts = ["apple", "banana", "cherry", "apple pie"]
await vec_store.aadd_texts(texts)

# Get embedding for search
embedding = ConsistentFakeEmbeddings().embed_query("apple")

# Test MMR by vector
results = await vec_store.amax_marginal_relevance_search_by_vector(
embedding, k=2, fetch_k=4
)

assert len(results) <= 2
assert all(isinstance(doc, Document) for doc in results)


@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_max_marginal_relevance_search_with_score_by_vector(
location: str,
) -> None:
"""Test async MMR search with score by vector."""
collection_name = uuid.uuid4().hex

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=RetrievalMode.DENSE,
collection_name=collection_name,
client_options={"location": location},
)

texts = ["apple", "banana", "cherry", "apple pie", "apple juice"]
await vec_store.aadd_texts(texts)

# Get embedding for search
embedding = ConsistentFakeEmbeddings().embed_query("apple")

# Test MMR with scores by vector
results = await vec_store.amax_marginal_relevance_search_with_score_by_vector(
embedding, k=3, fetch_k=5
)

assert len(results) <= 3
for doc, score in results:
assert isinstance(doc, Document)
assert isinstance(score, float)
assert score >= 0.0


@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_max_marginal_relevance_search_empty_collection(
location: str,
) -> None:
"""Test async MMR search on empty collection."""
collection_name = uuid.uuid4().hex

vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=RetrievalMode.DENSE,
collection_name=collection_name,
client_options={"location": location},
)

# Search in empty collection
results = await vec_store.amax_marginal_relevance_search(
"anything", k=5, fetch_k=10
)

assert len(results) == 0
Loading