diff --git a/.gitignore b/.gitignore index 4642f4e8e..275a8ade8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ +.vscode/ +Release/ +Debug/ +datasets/ +build/ +data/ # Prerequisites *.d diff --git a/AnnService/inc/Core/Common/QueryResultSet.h b/AnnService/inc/Core/Common/QueryResultSet.h index 35fc9b554..25975161a 100644 --- a/AnnService/inc/Core/Common/QueryResultSet.h +++ b/AnnService/inc/Core/Common/QueryResultSet.h @@ -4,9 +4,12 @@ #ifndef _SPTAG_COMMON_QUERYRESULTSET_H_ #define _SPTAG_COMMON_QUERYRESULTSET_H_ +#include "inc/Core/Common.h" +#include "inc/Core/CommonDataStructure.h" #include "inc/Core/SearchQuery.h" #include "DistanceUtils.h" #include +#include #include "IQuantizer.h" namespace SPTAG @@ -36,9 +39,19 @@ class QueryResultSet : public QueryResult } QueryResultSet(const QueryResultSet& other) : QueryResult(other) - { + { } + // QueryResultSet(const T*_target, int _K, bool _withResult) { + // m_withResultVector = _withResult; + // if(m_withResultVector) { + // m_resultVectors.resize(_K); + // } + // for(auto& ptr : m_resultVectors) { + // ptr = std::shared_ptr(new T(), [](T* p) { delete p; }); + // } + // } + ~QueryResultSet() { } @@ -86,11 +99,46 @@ class QueryResultSet : public QueryResult return false; } + // bool NeedResultVector() const { + // return m_withResultVector; + // } + + // void RemoveResultVector() { + // if(m_withResultVector) { + // m_withResultVector = false; + // m_resultVectors.clear(); + // } + // } + + // if we want to use spread search, the query result should be copied to the result set + bool AddPoint(const SizeType index, float dist, ByteArray& vector) { + if (dist < m_results[0].Dist || (dist == m_results[0].Dist && index < m_results[0].VID)) + { + m_results[0].VID = index; + m_results[0].Dist = dist; + m_results[0].Vector = vector; + // if(data != nullptr) // && m_withResultVector) + // { + // // copy data to m_resultVectors[0] + // // since we have already allocated memory for each result vector, we can directly copy data to it + // // memcpy(m_resultVectors[0].get(), data, sizeof(T)); + // memcpy() + // } + Heapify(m_resultNum); + return true; + } + return false; + } + inline void SortResult() { for (int i = m_resultNum - 1; i >= 0; i--) { std::swap(m_results[0], m_results[i]); + // if(m_withResultVector) + // { + // std::swap(m_resultVectors[0], m_resultVectors[i]); + // } Heapify(i); } } @@ -100,6 +148,17 @@ class QueryResultSet : public QueryResult std::reverse(m_results.Data(), m_results.Data() + m_resultNum); } + // std::shared_ptr GetVector(int idx) const + // { + // if (idx < m_resultNum) return m_resultVectors[idx]; + // return nullptr; + // } + ByteArray GetVector(int idx) const + { + if (idx < m_resultNum) return m_results[idx].Vector; + return ByteArray(); + } + private: void Heapify(int count) { @@ -110,15 +169,30 @@ class QueryResultSet : public QueryResult if (m_results[parent] < m_results[next]) { std::swap(m_results[next], m_results[parent]); + // if(m_withResultVector) + // { + // std::swap(m_resultVectors[next], m_resultVectors[parent]); + // } parent = next; next = (parent << 1) + 1; } else break; } - if (next == maxidx && m_results[parent] < m_results[next]) std::swap(m_results[parent], m_results[next]); + if (next == maxidx && m_results[parent] < m_results[next]) + { + std::swap(m_results[parent], m_results[next]); + // if(m_withResultVector) + // { + // std::swap(m_resultVectors[parent], m_resultVectors[next]); + // } + } } + + // bool m_withResultVector = false; + // std::vector> m_resultVectors; }; } } + #endif // _SPTAG_COMMON_QUERYRESULTSET_H_ diff --git a/AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h b/AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h index 0b84f0167..895ac3754 100644 --- a/AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h @@ -4,6 +4,7 @@ #ifndef _SPTAG_SPANN_EXTRASEARCHER_H_ #define _SPTAG_SPANN_EXTRASEARCHER_H_ +#include "inc/Core/CommonDataStructure.h" #include "inc/Helper/VectorSetReader.h" #include "inc/Helper/AsyncFileReader.h" #include "IExtraSearcher.h" @@ -125,7 +126,9 @@ namespace SPTAG if (p_exWorkSpace->m_deduper.CheckAndSet(vectorID)) continue; \ (this->*m_parseEncoding)(p_index, listInfo, (ValueType*)(p_postingListFullData + offsetVector));\ auto distance2leaf = p_index->ComputeDistance(queryResults.GetQuantizedTarget(), p_postingListFullData + offsetVector); \ - queryResults.AddPoint(vectorID, distance2leaf); \ + SPTAG::ByteArray tmpVector = SPTAG::ByteArray::Alloc(sizeof(ValueType)* (m_vectorInfoSize - sizeof(int))); \ + memcpy(tmpVector.Data(), p_postingListFullData + offsetVector, sizeof(ValueType)* (m_vectorInfoSize - sizeof(int))); \ + queryResults.AddPoint(vectorID, distance2leaf, tmpVector); \ } \ #define ProcessPostingOffset() \ @@ -137,7 +140,9 @@ namespace SPTAG if (p_exWorkSpace->m_deduper.CheckAndSet(vectorID)) continue; \ (this->*m_parseEncoding)(p_index, listInfo, (ValueType*)(p_postingListFullData + offsetVector));\ auto distance2leaf = p_index->ComputeDistance(queryResults.GetQuantizedTarget(), p_postingListFullData + offsetVector); \ - queryResults.AddPoint(vectorID, distance2leaf); \ + SPTAG::ByteArray tmpVector = SPTAG::ByteArray::Alloc(sizeof(ValueType)* (m_vectorInfoSize - sizeof(int))); \ + memcpy(tmpVector.Data(), p_postingListFullData + offsetVector, sizeof(ValueType)* (m_vectorInfoSize - sizeof(int))); \ + queryResults.AddPoint(vectorID, distance2leaf, tmpVector);\ foundResult = true;\ break;\ } \ diff --git a/AnnService/inc/Core/SPANN/IExtraSearcher.h b/AnnService/inc/Core/SPANN/IExtraSearcher.h index 064fbfcac..904a9d996 100644 --- a/AnnService/inc/Core/SPANN/IExtraSearcher.h +++ b/AnnService/inc/Core/SPANN/IExtraSearcher.h @@ -8,6 +8,7 @@ #include "inc/Core/VectorIndex.h" #include "inc/Helper/AsyncFileReader.h" +#include "inc/Helper/VectorSetReader.h" #include #include @@ -229,6 +230,7 @@ namespace SPTAG { virtual bool CheckValidPosting(SizeType postingID) = 0; virtual ErrorCode GetPostingDebug(ExtraWorkSpace* p_exWorkSpace, std::shared_ptr p_index, SizeType vid, std::vector& VIDs, std::shared_ptr& vecs) = 0; + }; } // SPANN } // SPTAG diff --git a/AnnService/inc/Core/SPANN/Index.h b/AnnService/inc/Core/SPANN/Index.h index 15ed7d4ee..00ccc0bb2 100644 --- a/AnnService/inc/Core/SPANN/Index.h +++ b/AnnService/inc/Core/SPANN/Index.h @@ -141,6 +141,7 @@ namespace SPTAG ErrorCode SearchDiskIndex(QueryResult& p_query, SearchStats* p_stats = nullptr) const; bool SearchDiskIndexIterative(QueryResult& p_headQuery, QueryResult& p_query, ExtraWorkSpace* extraWorkspace) const; + ErrorCode SearchDoc2Doc(QueryResult &p_query, COMMON::QueryResultSet* p_results, std::unique_ptr workSpace) const; ErrorCode DebugSearchDiskIndex(QueryResult& p_query, int p_subInternalResultNum, int p_internalResultNum, SearchStats* p_stats = nullptr, std::set* truth = nullptr, std::map>* found = nullptr) const; ErrorCode UpdateIndex(); diff --git a/AnnService/inc/Core/SPANN/Options.h b/AnnService/inc/Core/SPANN/Options.h index 443508f32..6f29e86b0 100644 --- a/AnnService/inc/Core/SPANN/Options.h +++ b/AnnService/inc/Core/SPANN/Options.h @@ -122,6 +122,9 @@ namespace SPTAG { int m_debugBuildInternalResultNum; bool m_enableADC; int m_iotimeout; + bool m_spreadSearch; + int m_doc2docRounds; + int m_doc2docResults; // Iterative int m_headBatch; diff --git a/AnnService/inc/Core/SPANN/ParameterDefinitionList.h b/AnnService/inc/Core/SPANN/ParameterDefinitionList.h index a9068d91d..ca407abf0 100644 --- a/AnnService/inc/Core/SPANN/ParameterDefinitionList.h +++ b/AnnService/inc/Core/SPANN/ParameterDefinitionList.h @@ -120,7 +120,9 @@ DefineSSDParameter(m_enableADC, bool, false, "EnableADC") DefineSSDParameter(m_recall_analysis, bool, false, "RecallAnalysis") DefineSSDParameter(m_debugBuildInternalResultNum, int, 64, "DebugBuildInternalResultNum") DefineSSDParameter(m_iotimeout, int, 30, "IOTimeout") - +DefineSSDParameter(m_spreadSearch, bool, true, "SpreadSearch") +DefineSSDParameter(m_doc2docRounds, int, 2, "doc2docRounds") +DefineSSDParameter(m_doc2docResults, int, 32, "Doc2DocResults") // Iterative DefineSSDParameter(m_headBatch, int, 32, "IterativeSearchHeadBatch") diff --git a/AnnService/inc/Core/SearchResult.h b/AnnService/inc/Core/SearchResult.h index 8c458fe3b..d356474e5 100644 --- a/AnnService/inc/Core/SearchResult.h +++ b/AnnService/inc/Core/SearchResult.h @@ -5,6 +5,8 @@ #define _SPTAG_SEARCHRESULT_H_ #include "CommonDataStructure.h" +#include "inc/Core/Common.h" +#include namespace SPTAG { @@ -67,6 +69,7 @@ namespace SPTAG SizeType VID; float Dist; ByteArray Meta; + ByteArray Vector; bool RelaxedMono; BasicResult() : VID(-1), Dist(MaxDist), RelaxedMono(false) {} @@ -75,6 +78,8 @@ namespace SPTAG BasicResult(SizeType p_vid, float p_dist, ByteArray p_meta) : VID(p_vid), Dist(p_dist), Meta(p_meta), RelaxedMono(false) {} BasicResult(SizeType p_vid, float p_dist, ByteArray p_meta, bool p_relaxedMono) : VID(p_vid), Dist(p_dist), Meta(p_meta), RelaxedMono(p_relaxedMono) {} + BasicResult(SizeType p_vid, float p_dist, ByteArray p_meta, ByteArray p_vector) : VID(p_vid), Dist(p_dist), Meta(p_meta), Vector(p_vector), RelaxedMono(false) {} + BasicResult(SizeType p_vid, float p_dist, ByteArray p_meta, ByteArray p_vector, bool p_relaxedMono) : VID(p_vid), Dist(p_dist), Meta(p_meta), Vector(p_vector), RelaxedMono(p_relaxedMono) {} }; } // namespace SPTAG diff --git a/AnnService/src/Core/SPANN/SPANNIndex.cpp b/AnnService/src/Core/SPANN/SPANNIndex.cpp index aecd3ac23..88a017f2f 100644 --- a/AnnService/src/Core/SPANN/SPANNIndex.cpp +++ b/AnnService/src/Core/SPANN/SPANNIndex.cpp @@ -1,10 +1,17 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "inc/Core/Common.h" +#include "inc/Core/CommonDataStructure.h" +#include "inc/Core/SPANN/IExtraSearcher.h" #include "inc/Core/SPANN/Index.h" #include "inc/Helper/VectorSetReaders/MemoryReader.h" #include "inc/Core/SPANN/ExtraFullGraphSearcher.h" #include +#include +#include +#include +#include #include "inc/Core/ResultIterator.h" #include "inc/Core/SPANN/SPANNResultIterator.h" #pragma warning(disable:4242) // '=' : conversion from 'int' to 'short', possible loss of data @@ -199,9 +206,11 @@ namespace SPTAG p_queryResults = (COMMON::QueryResultSet*) & p_query; else p_queryResults = new COMMON::QueryResultSet((const T*)p_query.GetTarget(), m_options.m_searchInternalResultNum); - + // p_queryResults = new COMMON::QueryResultSet((const T*)p_query.GetTarget(), m_options.m_searchInternalResultNum); + // auto t1 = std::chrono::high_resolution_clock::now(); m_index->SearchIndex(*p_queryResults); - + [[maybe_unused]] int visited_postings = 0; + // std::set searchedPostingIDs; if (m_extraSearcher != nullptr) { auto workSpace = m_workSpaceFactory->GetWorkSpace(); if (!workSpace) { @@ -221,7 +230,15 @@ namespace SPTAG if (res->VID == -1) break; auto postingID = res->VID; + // if (res->Vector.Data() == nullptr) { + // we should find this vector in the m_index + res->Vector = ByteArray::Alloc(m_index->GetFeatureDim() * sizeof(T)); + auto tmp = m_index->GetSample(res->VID); + std::memcpy(res->Vector.Data(), tmp, m_index->GetFeatureDim() * sizeof(T)); + // } + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "PostingID: %d\n", postingID); res->VID = static_cast((m_vectorTranslateMap.get())[res->VID]); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "VID: %d\n", res->VID); if (res->VID == MaxSize) { res->VID = -1; res->Dist = MaxDist; @@ -232,12 +249,26 @@ namespace SPTAG (limitDist > 0.1 && res->Dist > limitDist) || !m_extraSearcher->CheckValidPosting(postingID)) continue; + ++visited_postings; workSpace->m_postingIDs.emplace_back(postingID); } + // record searchedd postings, if we want to conduct doc2doc search we should avoid visiting them again + // searchedPostingIDs.insert(workSpace->m_postingIDs.begin(), workSpace->m_postingIDs.end()); p_queryResults->Reverse(); m_extraSearcher->SearchIndex(workSpace.get(), *p_queryResults, m_index, nullptr); - m_workSpaceFactory->ReturnWorkSpace(std::move(workSpace)); + if(!m_options.m_spreadSearch) + { + m_workSpaceFactory->ReturnWorkSpace(std::move(workSpace)); + } + else + { + // We may need to do doc2doc search search, so don't sort the result here + // Instead we put it after the doc2doc search stage + + // Use this workspace for doc2doc round, so don't return it + SearchDoc2Doc(p_query, p_queryResults, std::move(workSpace)); + } p_queryResults->SortResult(); } @@ -257,6 +288,115 @@ namespace SPTAG return ErrorCode::Success; } + template + ErrorCode Index::SearchDoc2Doc(QueryResult &p_query, COMMON::QueryResultSet* p_queryResults, std::unique_ptr workSpace) const { + // Use this set to store the visited posting ids, avoid visiting them again + std::unordered_set visited(workSpace->m_postingIDs.begin(), workSpace->m_postingIDs.end()); + // Run doc2doc search for multiple rounds + for (int d = 0; d < m_options.m_doc2docRounds; ++d) { + // Store the doc2doc searched postings in this set + auto p_queryIntermediateResults = new COMMON::QueryResultSet((const T*)p_query.GetTarget(), m_options.m_searchInternalResultNum); + workSpace->m_postingIDs.clear(); + // Search doc2docResults number of new centroids for each centroid in the previous round + auto p_newResultsToQuery = new COMMON::QueryResultSet((const T*)p_query.GetTarget(), min(m_options.m_doc2docResults, p_queryResults->GetResultNum())); + for (int i = 0; i < p_newResultsToQuery->GetResultNum(); ++i) { + p_newResultsToQuery->GetResult(i)->VID = -1; + p_newResultsToQuery->GetResult(i)->Dist = MaxDist; + } + p_queryResults->SortResult(); + std::set skippedIdx; + for (auto i = 0; i < min(m_options.m_doc2docResults, p_queryResults->GetResultNum()); ++i) { + auto res = p_queryResults->GetResult(i); + // Use this res to build a new query to search on m_index + if (res->VID == -1) break; + bool goodRNG = true; + for (int k = 0; k < i; ++k) { + // apply rng rule to filter out closed intermidiate results + if (skippedIdx.find(k) == skippedIdx.end() + && m_options.m_rngFactor * m_fComputeDistance((const T*)p_queryResults->GetVector(k).Data(), (const T*)res->Vector.Data(), m_index->GetFeatureDim()) < res->Dist) { + goodRNG = false; + skippedIdx.insert(i); + break; + } + } + if (!goodRNG) continue; + + p_queryIntermediateResults->SetTarget((const T*)res->Vector.Data(), nullptr); + m_index->SearchIndex(*p_queryIntermediateResults); + + float limitDist = p_queryIntermediateResults->GetResult(0)->Dist * m_options.m_maxDistRatio; + int foundDocs = 0; + for (int j = 0; j < p_queryIntermediateResults->GetResultNum(); ++j) + { + auto res = p_queryIntermediateResults->GetResult(j); + if (res->VID == -1) break; + + auto postingID = res->VID; + + res->VID = static_cast((m_vectorTranslateMap.get())[res->VID]); + if (res->VID == MaxSize) { + res->VID = -1; + res->Dist = MaxDist; + } + + // Don't do disk reads for irrelevant pages + if (workSpace->m_postingIDs.size() >= m_options.m_searchInternalResultNum || + (limitDist > 0.1 && res->Dist > limitDist) || + !m_extraSearcher->CheckValidPosting(postingID)) + continue; + // Don't visit the same posting again + if (visited.find(postingID) != visited.end()) continue; + + // Priviously the distance is the distance to the intermediate query + // Replace the distance with the distance to the original query + res->Dist = m_fComputeDistance((const T*)m_index->GetSample(postingID), (const T*)p_query.GetTarget(), m_index->GetFeatureDim()); + + // Add the new centroid to the new results to query, + // Increase the foundDocs count if the centroid is added successfully + if(p_newResultsToQuery->AddPoint(postingID, res->Dist)) { + ++foundDocs; + } + visited.insert(postingID); + } + // If we found less than 2 new centroids, stop the current round + if(foundDocs <= 2) break; + + p_queryIntermediateResults->Reset(); + } + for(int i = 0; i < p_newResultsToQuery->GetResultNum(); ++i) { + if (p_newResultsToQuery->GetResult(i)->VID == -1) break; + + workSpace->m_postingIDs.emplace_back(p_newResultsToQuery->GetResult(i)->VID); + } + + p_queryResults->Reverse(); + + std::set oldResults; + for (int i = 0; i < p_queryResults->GetResultNum(); ++i) { + if(p_queryResults->GetResult(i)->VID == -1) break; + oldResults.insert(p_queryResults->GetResult(i)->VID); + + } + m_extraSearcher->SearchIndex(workSpace.get(), *p_queryResults, m_index, nullptr); + int foundNewNNCount = 0; + // Count the number of new NNs found in this doc2doc round + for (int i = 0; i < p_queryResults->GetResultNum(); ++i) { + if(p_queryResults->GetResult(i)->VID == -1) break; + if(oldResults.find(p_queryResults->GetResult(i)->VID) == oldResults.end()) { + ++foundNewNNCount; + } + } + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Found %d new NNs in round %d from %d new centroids\n", foundNewNNCount, d, workSpace->m_postingIDs.size()); + delete p_queryIntermediateResults; + delete p_newResultsToQuery; + // If we cannot find more new NNs, stop the doc2doc search + if(foundNewNNCount == 0) break; + if (workSpace->m_postingIDs.size() == 0) break; + } + m_workSpaceFactory->ReturnWorkSpace(std::move(workSpace)); + return ErrorCode::Success; + } + template ErrorCode Index::SearchIndexIterative(QueryResult& p_headQuery, QueryResult& p_query, COMMON::WorkSpace* p_indexWorkspace, diff --git a/AnnService/src/IndexSearcher/main.cpp b/AnnService/src/IndexSearcher/main.cpp index 2c61f520b..0db44e906 100644 --- a/AnnService/src/IndexSearcher/main.cpp +++ b/AnnService/src/IndexSearcher/main.cpp @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "inc/Core/Common.h" #include "inc/Helper/VectorSetReader.h" #include "inc/Helper/SimpleIniReader.h" #include "inc/Helper/CommonHelper.h" @@ -40,6 +41,8 @@ class SearcherOptions : public Helper::ReaderOptions AddOptionalOption(m_debugQuery, "-q", "--debugquery", "Debug query number."); AddOptionalOption(m_enableADC, "-adc", "--adc", "Enable ADC Distance computation"); AddOptionalOption(m_outputformat, "-of", "--ouputformat", "0: TXT 1: BINARY."); + AddOptionalOption(m_truthType, "-rt", "--truthtype", "truth file type. (TXT, XVEC or DEFAULT)"); + AddOptionalOption(m_spreadSearch, "-ss", "--spreadsearch", "Spread search."); } ~SearcherOptions() {} @@ -58,6 +61,8 @@ class SearcherOptions : public Helper::ReaderOptions VectorFileType m_dataFileType = VectorFileType::DEFAULT; + TruthFileType m_truthType = TruthFileType::DEFAULT; + int m_withMeta = 0; int m_K = 32; @@ -73,6 +78,8 @@ class SearcherOptions : public Helper::ReaderOptions bool m_enableADC = false; int m_outputformat = 0; + + bool m_spreadSearch = false; }; template @@ -165,7 +172,14 @@ int Process(std::shared_ptr options, VectorIndex& index) if (index.GetIndexAlgoType() == IndexAlgoType::SPANN) { int SPANNInternalResultNum; if (SPTAG::Helper::Convert::ConvertStringTo(index.GetParameter("SearchInternalResultNum", "BuildSSDIndex").c_str(), SPANNInternalResultNum)) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPANN Internal result number: %d\n", SPANNInternalResultNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Internal result number: %d\n", internalResultNum); internalResultNum = max(internalResultNum, SPANNInternalResultNum); + } + // internalResultNum = 1024; + // index.SetParameter("SearchInternalResultNum", std::to_string(internalResultNum).c_str(), "BuildSSDIndex"); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Final Internal result number: %d\n", internalResultNum); } std::vector results(options->m_batch, QueryResult(NULL, internalResultNum, options->m_withMeta != 0)); std::vector latencies(options->m_batch, 0); @@ -173,11 +187,13 @@ int Process(std::shared_ptr options, VectorIndex& index) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[query]\t\t[maxcheck]\t[avg] \t[99%] \t[95%] \t[recall] \t[qps] \t[mem]\n"); std::vector totalAvg(maxCheck.size(), 0.0), total99(maxCheck.size(), 0.0), total95(maxCheck.size(), 0.0), totalRecall(maxCheck.size(), 0.0), totalLatency(maxCheck.size(), 0.0); + for (int startQuery = 0; startQuery < queryVectors->Count(); startQuery += options->m_batch) { int numQuerys = min(options->m_batch, queryVectors->Count() - startQuery); + for (SizeType i = 0; i < numQuerys; i++) results[i].SetTarget(queryVectors->GetVector(startQuery + i)); - if (ftruth != nullptr) COMMON::TruthSet::LoadTruth(ftruth, truth, numQuerys, truthDim, options->m_truthK, (options->m_truthFile.find("bin") != std::string::npos)? TruthFileType::DEFAULT : TruthFileType::TXT); + if (ftruth != nullptr) COMMON::TruthSet::LoadTruth(ftruth, truth, numQuerys, truthDim, options->m_truthK, options->m_truthType); for (int mc = 0; mc < maxCheck.size(); mc++) @@ -190,7 +206,7 @@ int Process(std::shared_ptr options, VectorIndex& index) std::vector threads; threads.reserve(options->m_threadNum); auto batchstart = std::chrono::high_resolution_clock::now(); - + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Spread Search Status: %d\n", options->m_spreadSearch); for (std::uint32_t i = 0; i < options->m_threadNum; i++) { threads.emplace_back([&, i] { NumaStrategy ns = (index.GetIndexAlgoType() == IndexAlgoType::SPANN)? NumaStrategy::SCATTER: NumaStrategy::LOCAL; // Only for SPANN, we need to avoid IO threads overlap with search threads. @@ -250,6 +266,7 @@ int Process(std::shared_ptr options, VectorIndex& index) GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)); unsigned long long peakWSS = pmc.PeakWorkingSetSize / 1000000000; #endif + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[query]\t\t[maxcheck]\t[avg] \t[99%] \t[95%] \t[recall] \t[qps] \t[mem]\n"); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "%d-%d\t%s\t%.4f\t%.4f\t%.4f\t%.4f\t\t%.4f\t\t%lluGB\n", startQuery, (startQuery + numQuerys), maxCheck[mc].c_str(), timeMean, l99, l95, recall, (numQuerys / batchLatency), peakWSS); totalAvg[mc] += timeMean * numQuerys; total95[mc] += l95 * numQuerys; @@ -349,6 +366,7 @@ int main(int argc, char** argv) exit(1); } + std::shared_ptr vecIndex; auto ret = SPTAG::VectorIndex::LoadIndex(options->m_indexFolder, vecIndex); if (SPTAG::ErrorCode::Success != ret || nullptr == vecIndex) @@ -385,6 +403,7 @@ int main(int argc, char** argv) for (const auto& iter : iniReader.GetParameters(sections[i])) { vecIndex->SetParameter(iter.first.c_str(), iter.second.c_str(), sections[i]); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Set [%s]%s = %s\n", sections[i].c_str(), iter.first.c_str(), iter.second.c_str()); } } diff --git a/CMakeLists.txt b/CMakeLists.txt index d6da2a832..b5a962b84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,10 +14,14 @@ function(CXX_COMPILER_DUMPVERSION _OUTPUT_VERSION) set(${_OUTPUT_VERSION} ${COMPILER_VERSION} PARENT_SCOPE) endfunction() +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + if(NOT WIN32) CXX_COMPILER_DUMPVERSION(CXX_COMPILER_VERSION) endif() + + if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") # require at least gcc 5.0 if (CXX_COMPILER_VERSION VERSION_LESS 5.0) diff --git a/buildtext2image10m.ini b/buildtext2image10m.ini new file mode 100644 index 000000000..29ea50fb2 --- /dev/null +++ b/buildtext2image10m.ini @@ -0,0 +1,63 @@ +[Base] +ValueType=Float +DistCalcMethod=InnerProduct +IndexAlgoType=BKT +Dim=200 +VectorPath=/home/v-zikaiwang/SPTAG/datasets/text2image1B/base.10M.fbin +VectorType=DEFAULT +QueryPath=/home/v-zikaiwang/SPTAG/datasets/text2image1B/query.heldout.30K.fbin +QueryType=DEFAULT +WarmupPath=/home/v-zikaiwang/SPTAG/datasets/text2image1B/query.heldout.30K.fbin +WarmupType=DEFAULT +TruthPath=/home/v-zikaiwang/SPTAG/datasets/text2image1B/gt100-heldout.30K.fbin +TruthType=DEFAULT +IndexDirectory=text2image10m + +[SelectHead] +isExecute=true +TreeNumber=1 +BKTKmeansK=32 +BKTLeafSize=8 +SamplesNumber=1000 +SaveBKT=false +SelectThreshold=50 +SplitFactor=6 +SplitThreshold=100 +Ratio=0.16 +NumberOfThreads=64 +BKTLambdaFactor=-1 + +[BuildHead] +isExecute=true +NeighborhoodSize=32 +TPTNumber=32 +TPTLeafSize=2000 +MaxCheck=8192 +MaxCheckForRefineGraph=8192 +RefineIterations=3 +NumberOfThreads=64 +BKTLambdaFactor=-1 + +[BuildSSDIndex] +isExecute=true +BuildSsdIndex=true +InternalResultNum=64 +ReplicaCount=8 +PostingPageLimit=12 +NumberOfThreads=64 +MaxCheck=8192 +TmpDir=/tmp/ + +[SearchSSDIndex] +isExecute=true +BuildSsdIndex=false +InternalResultNum=64 +NumberOfThreads=1 +HashTableExponent=4 +ResultNum=10 +MaxCheck=2048 +MaxDistRatio=8.0 +SearchPostingPageLimit=12 +SpreadSearch=true +doc2docRounds=2 +Doc2DocResults=32 \ No newline at end of file diff --git a/data.ipynb b/data.ipynb new file mode 100644 index 000000000..fba886c7d --- /dev/null +++ b/data.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-0.05 0.05 0.15 0.25 0.35 0.45 0.55 0.65 0.75 0.85 0.95 1.05]\n", + "[ 2495. 386. 300. 391. 595. 1001. 1866. 4364. 7879. 17623.\n", + " 63100.]\n", + "[0.02495 0.02881 0.03181 0.03572 0.04167 0.05168 0.07034 0.11398 0.19277\n", + " 0.369 1. ]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "80 + 4 rounds Average Recall@10: 0.903927, qps: 117.375000\n", + "[-0.05 0.05 0.15 0.25 0.35 0.45 0.55 0.65 0.75 0.85 0.95 1.05]\n", + "[ 2623. 403. 358. 467. 716. 1256. 2272. 5021. 8930. 18759.\n", + " 59195.]\n", + "[0.02623 0.03026 0.03384 0.03851 0.04567 0.05823 0.08095 0.13116 0.22046\n", + " 0.40805 1. ]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "80 + 4 rounds rng Average Recall@10: 0.892664, qps: 145.169700\n", + "[-0.05 0.05 0.15 0.25 0.35 0.45 0.55 0.65 0.75 0.85 0.95 1.05]\n", + "[ 2782. 412. 322. 404. 604. 1031. 1780. 4053. 7309. 16390.\n", + " 64913.]\n", + "[0.02782 0.03194 0.03516 0.0392 0.04524 0.05555 0.07335 0.11388 0.18697\n", + " 0.35087 1. ]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "q2d-160 Average Recall@10: 0.904002, qps: 138.753100\n", + "[1. 0.97505 0.97119 0.96819 0.96428 0.95833 0.94832 0.92966 0.88602\n", + " 0.80723 0.631 ]\n", + "[1. 0.97377 0.96974 0.96616 0.96149 0.95433 0.94177 0.91905 0.86884\n", + " 0.77954 0.59195]\n", + "[1. 0.97218 0.96806 0.96484 0.9608 0.95476 0.94445 0.92665 0.88612\n", + " 0.81303 0.64913]\n", + "80 + 4 rounds average recall: 0.903927, qps: 117.375000\n", + "80 + 4 rounds rng average recall: 0.892664, qps: 145.169700\n", + "q2d-160 average recall: 0.904002, qps: 138.753100\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "import re\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Function to extract recall values from log file\n", + "def extract_recall(log_file):\n", + " recall_values = []\n", + " qps = 0\n", + " with open(log_file, 'r') as file:\n", + " for line in file:\n", + " match = re.search(r'recall:(\\d+\\.\\d+)', line)\n", + " if match:\n", + " recall_values.append(round(float(match.group(1)), 1))\n", + " # average latency is on the 2-nd last line\n", + " if line.startswith(\"[1] 0-100000\"):\n", + " qps = float(line.split()[-1]) \n", + " return recall_values, qps\n", + "\n", + "# Function to plot recall distribution\n", + "def plot_recall_distribution(recall_values, case):\n", + " # num_bins = 11\n", + " bins = np.arange(-0.05, 1.15, 0.1)\n", + " \n", + " counts, bins, patches = plt.hist(recall_values, bins=bins, edgecolor='grey', color='skyblue', zorder=10)\n", + " \n", + " print(bins)\n", + " print(counts)\n", + " total_count = sum(counts)\n", + " percentages = (counts / total_count) * 100\n", + " # calcualte cdf of counts\n", + " cdf = np.cumsum(counts)\n", + " cdf = cdf / cdf[-1]\n", + " # print cdf one by one\n", + " # print(\"cdf\")\n", + " print(cdf)\n", + " cdf = 1 - cdf\n", + " plt.title('Recall@10 Distribution of SPANN Index, Text2Image1M, %s'%case)\n", + " plt.xlabel('Recall@10')\n", + " plt.ylabel('Frequency of Queries')\n", + " \n", + "\n", + " # Set tick positions\n", + " tick_positions = bins[:-1] + (bins[1] - bins[0]) / 2\n", + " \n", + " plt.xticks(tick_positions, [f'{x:.1f}' for x in tick_positions], rotation=45)\n", + " for percent, count, x in zip(percentages, counts, tick_positions):\n", + " plt.text(x, count, f'{percent:.2f}%', ha='center', va='bottom', fontsize=8)\n", + " # plt.text(x, count, str(int(count)), ha='center', va='bottom')\n", + "\n", + " plt.grid(axis='y', alpha=0.5, linestyle='--', zorder=0)\n", + " plt.show()\n", + " return cdf\n", + "\n", + "# Function to round the values to the nearest 0.1\n", + "def round_to_nearest_tenth(values):\n", + " return np.round(values * 10) / 10\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "# read files from result logs and extract the parameters from file name after \"each_\"\n", + "\n", + "# rbs = float(\"%.4f\"%(1 - cdf_spann[7]))\n", + "# print(rbs)\n", + "# tags.append(\"SPANN\")\n", + "cdfs = []\n", + "tags = []\n", + "\n", + "# cases = ['doc2doc-64', 'doc2doc-128', 'query2doc-64', 'query2doc-128']\n", + "# cases = ['64', '32', '16', '8']\n", + "# cases = ['auto-64', '4-64', '4-64', '16-64', '32-64', 'q2d-64']\n", + "cases = ['80 + 4 rounds', '80 + 4 rounds rng', 'q2d-160']\n", + "# logs = ['Release/searchlog-text2image1M-d2d-8-64', 'Release/searchlog-text2image1M-d2d-128', 'Release/searchlog-text2image1M-q2d-64', 'Release/searchlog-text2image1M-q2d-128']\n", + "# logs = ['Release/searchlog-text2image10M-d2d-64', 'Release/searchlog-text2image10M-d2d-128', 'Release/searchlog-text2image10M-q2d-64', 'Release/searchlog-text2image10M-q2d-128']\n", + "# logs = ['Release/searchlog-text2image10M-q2d-64', 'Release/searchlog-text2image10M32', 'Release/searchlog-text2image10M16', 'Release/searchlog-text2image10M8']\n", + "# logs = [\"Release/searchlog-text2image1M-d2d-auto-64\", 'Release/searchlog-text2image1M-d2d-4-64', 'Release/searchlog-text2image1M-d2d-4-64', 'Release/searchlog-text2image1M-d2d-16-64', 'Release/searchlog-text2image1M-d2d-32-64', 'Release/searchlog-text2image1M-q2d-64']\n", + "logs = ['Release/searchlog-text2image10M-d2d-internal80', 'Release/searchlog-text2image10M-d2d-internal80-rng', 'Release/searchlog-text2image10M-q2d-160']\n", + "avg_recalls = []\n", + "qpss = []\n", + "for i in range(len(cases)):\n", + " log_file = logs[i]\n", + " recall_values, qps = extract_recall(log_file)\n", + " cdf = plot_recall_distribution(recall_values, cases[i])\n", + " avg_recall = np.mean(recall_values)\n", + " print(\"%s Average Recall@10: %f, qps: %f\"%(cases[i], avg_recall, qps))\n", + " avg_recalls.append(avg_recall)\n", + " qpss.append(qps)\n", + "\n", + " cdfs.append(np.insert(cdf, 0, 1)[:-1])\n", + " tags.append(cases[i])\n", + "\n", + "\n", + "for i in range(len(cdfs)):\n", + " # plot a line for the cdf\n", + " print(cdfs[i])\n", + " plt.plot(np.arange(0.0, 1.1, 0.1), cdfs[i], linestyle='-', marker='o')\n", + "plt.xticks(np.arange(0, 1.1, 0.1))\n", + "plt.yticks(np.arange(0, 1.1, 0.1))\n", + "plt.legend(tags)\n", + "\n", + "# plt.yticks(np.arange(0.0, 1.1, 0.1))\n", + "plt.ylim(0.9, 1)\n", + "plt.yticks(np.arange(0.9, 1.01, 0.01))\n", + "\n", + "# add a grid line at x=0.8\n", + "plt.axvline(x=0.8, color='black', linestyle='--', linewidth=1)\n", + "# plt.axhline(x=0.8, color='black', linestyle='--', linewidth=1)\n", + "plt.grid(axis='y', alpha=0.5, linestyle='--', zorder=0)\n", + "plt.grid(axis='x', alpha=0.5, linestyle='--', zorder=0)\n", + "# plt.title('Cumulative Recall@10 Distribution of FAISS HNSW Index, SIFT1M')\n", + "plt.ylabel('Robustness')\n", + "plt.xlabel('Recall@10')\n", + "for i in range(len(cases)):\n", + " print(\"%s average recall: %f, qps: %f\"%(cases[i], avg_recalls[i], qpss[i]))\n", + "plt.show()\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.19" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/datasets/SPACEV1B/LICENSE b/datasets/SPACEV1B/LICENSE deleted file mode 100644 index bc492e532..000000000 --- a/datasets/SPACEV1B/LICENSE +++ /dev/null @@ -1,47 +0,0 @@ -# Open Use of Data Agreement v1.0 - -This is the Open Use of Data Agreement, Version 1.0 (the "O-UDA"). Capitalized terms are defined in Section 5. Data Provider and you agree as follows: - -1. **Provision of the Data** - - 1.1. You may use, modify, and distribute the Data made available to you by the Data Provider under this O-UDA if you follow the O-UDA's terms. - - 1.2. Data Provider will not sue you or any Downstream Recipient for any claim arising out of the use, modification, or distribution of the Data provided you meet the terms of the O-UDA. - - 1.3 This O-UDA does not restrict your use, modification, or distribution of any portions of the Data that are in the public domain or that may be used, modified, or distributed under any other legal exception or limitation. - -2. **No Restrictions on Use or Results** - - 2.1. The O-UDA does not impose any restriction with respect to: - - 2.1.1. the use or modification of Data; or - - 2.1.2. the use, modification, or distribution of Results. - -3. **Redistribution of Data** - - 3.1. You may redistribute the Data under terms of your choice, so long as: - - 3.1.1. You include with any Data you redistribute all credit or attribution information that you received with the Data, and your terms require any Downstream Recipient to do the same; and - - 3.1.2. Your terms include a warranty disclaimer and limitation of liability for Upstream Data Providers at least as broad as those contained in Section 4.2 and 4.3 of the O-UDA. - -4. **No Warranty, Limitation of Liability** - - 4.1. Data Provider does not represent or warrant that it has any rights whatsoever in the Data. - - 4.2. THE DATA IS PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - 4.3. NEITHER DATA PROVIDER NOR ANY UPSTREAM DATA PROVIDER SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE DATA OR RESULTS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -5. **Definitions** - - 5.1. "Data" means the material you receive under the O-UDA in modified or unmodified form, but not including Results. - - 5.2. "Data Provider" means the source from which you receive the Data and with whom you enter into the O-UDA. - - 5.3. "Downstream Recipient" means any person or persons who receives the Data directly or indirectly from you in accordance with the O-UDA. - - 5.4. "Result" means anything that you develop or improve from your use of Data that does not include more than a de minimis portion of the Data on which the use is based. Results may include de minimis portions of the Data necessary to report on or explain use that has been conducted with the Data, such as figures in scientific papers, but do not include more. Artificial intelligence models trained on Data (and which do not include more than a de minimis portion of Data) are Results. - - 5.5. "Upstream Data Providers" means the source or sources from which the Data Provider directly or indirectly received, under the terms of the O-UDA, material that is included in the Data. diff --git a/datasets/SPACEV1B/README.md b/datasets/SPACEV1B/README.md deleted file mode 100644 index 034c684a7..000000000 --- a/datasets/SPACEV1B/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# SPACEV1B: A billion-Scale vector dataset for text descriptors -This is a dataset released by Microsoft from SpaceV, Bing web vector search scenario, for large scale vector search related research usage. It consists of more than one billion document vectors -and 29K+ query vectors encoded by Microsoft SpaceV Superior model. This model is trained to capture generic intent representation for both documents and queries. -The goal is to match the query vector to the closest document vectors in order to achieve topk relevant documents for each query. - -## Introduction - -This dataset contains: - - * [vectors.bin](vectors.bin): It contains 1,402,020,720 100-dimensional int8-type document descriptors. - * [query.bin](query.bin): It contains 29,316 100-dimensional int8-type query descriptors. - * [truth.bin](truth.bin): It contains 100 nearest ground truth(include vector ids and distances) of 29,316 queries according to L2 distance. - * [query_log.bin](query_log.bin): It contains 94,162 100-dimensional int8-type history query descriptors. - -## How to read the vectors, queries, and truth - -```python -import struct -import numpy as np -import os - -part_count = len(os.listdir('vectors.bin')) -for i in range(1, part_count + 1): - fvec = open(os.path.join('vectors.bin', 'vectors_%d.bin' % i), 'rb') - if i == 1: - vec_count = struct.unpack('i', fvec.read(4))[0] - vec_dimension = struct.unpack('i', fvec.read(4))[0] - vecbuf = bytearray(vec_count * vec_dimension) - vecbuf_offset = 0 - while True: - part = fvec.read(1048576) - if len(part) == 0: break - vecbuf[vecbuf_offset: vecbuf_offset + len(part)] = part - vecbuf_offset += len(part) - fvec.close() -X = np.frombuffer(vecbuf, dtype=np.int8).reshape((vec_count, vec_dimension)) - -fq = open('query.bin', 'rb') -q_count = struct.unpack('i', fq.read(4))[0] -q_dimension = struct.unpack('i', fq.read(4))[0] -queries = np.frombuffer(fq.read(q_count * q_dimension), dtype=np.int8).reshape((q_count, q_dimension)) - -ftruth = open('truth.bin', 'rb') -t_count = struct.unpack('i', ftruth.read(4))[0] -topk = struct.unpack('i', ftruth.read(4))[0] -truth_vids = np.frombuffer(ftruth.read(t_count * topk * 4), dtype=np.int32).reshape((t_count, topk)) -truth_distances = np.frombuffer(ftruth.read(t_count * topk * 4), dtype=np.float32).reshape((t_count, topk)) -``` - -## License - -The entire dataset is under [O-UDA license](LICENSE) \ No newline at end of file diff --git a/datasets/SPACEV1B/query.bin b/datasets/SPACEV1B/query.bin deleted file mode 100644 index bba5be6bc..000000000 --- a/datasets/SPACEV1B/query.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a531887daca721f633da2b92dacc0aa4c3512aef8ae671031e825ce7134a9e6 -size 2931608 diff --git a/datasets/SPACEV1B/query_log.bin b/datasets/SPACEV1B/query_log.bin deleted file mode 100644 index 25102b7f4..000000000 --- a/datasets/SPACEV1B/query_log.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d4768ea964b595302b08f0b9ef484d7003b2e7848f2f7baf9e2b4bbb3bb36c49 -size 9416208 diff --git a/datasets/SPACEV1B/truth.bin b/datasets/SPACEV1B/truth.bin deleted file mode 100644 index 127acd278..000000000 --- a/datasets/SPACEV1B/truth.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:94d385c2f34b1f2899f276d87c14a67ef969dcb473e44753261af05ad62461bc -size 23452808 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_1.bin b/datasets/SPACEV1B/vectors.bin/vectors_1.bin deleted file mode 100644 index 459d509f8..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_1.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:802f735e6bd1472bacdd93eedf8d1305291659117c2b5e3ca9ac38feb89509af -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_10.bin b/datasets/SPACEV1B/vectors.bin/vectors_10.bin deleted file mode 100644 index f2cc9f7fd..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_10.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca7053c8582853c7a9984b56be8b9954cd8cbeda159b9b63fa5a56f776561be2 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_11.bin b/datasets/SPACEV1B/vectors.bin/vectors_11.bin deleted file mode 100644 index 260aec2f3..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_11.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b42ee98dbdf215d9c03e99d3c3ac401e615433d70119c7411c689884159773d1 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_12.bin b/datasets/SPACEV1B/vectors.bin/vectors_12.bin deleted file mode 100644 index dc8ddcada..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_12.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0dd7f1a25a6aa53622fa3d66a35af760bccd187a82bbe39a0bf79c681b646289 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_13.bin b/datasets/SPACEV1B/vectors.bin/vectors_13.bin deleted file mode 100644 index 40261a566..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_13.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69d09f33ba230ee9b38e6fc1bf949c7f57b755c49ae8cdaf13ebf14b12272fe5 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_14.bin b/datasets/SPACEV1B/vectors.bin/vectors_14.bin deleted file mode 100644 index 9374d4aea..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_14.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d575a0ec126bfd3ffb6a3dfb7c36ad94c71a398c0a77b0462620feaa7181cbfb -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_15.bin b/datasets/SPACEV1B/vectors.bin/vectors_15.bin deleted file mode 100644 index 91ffea633..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_15.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0286768443fbdd55ec0ffd25cecf85121aafef6c65226dc2008dda2ece5c7c8e -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_16.bin b/datasets/SPACEV1B/vectors.bin/vectors_16.bin deleted file mode 100644 index 85affbcfd..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_16.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b600774407a1446fcac0748c7beb5fa4214737bf406c2a162ccbb84254c5041d -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_17.bin b/datasets/SPACEV1B/vectors.bin/vectors_17.bin deleted file mode 100644 index efc0af471..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_17.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5fb2167810380da46d894428ff8f2c61de05c9616a6343f1fa6d084759abb062 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_18.bin b/datasets/SPACEV1B/vectors.bin/vectors_18.bin deleted file mode 100644 index 78fadc980..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_18.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d2e69b2f20fb99063944379667b8b9dc33d672079fe6e744d83076c2de5b1ecc -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_19.bin b/datasets/SPACEV1B/vectors.bin/vectors_19.bin deleted file mode 100644 index ad536394f..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_19.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1942f09f15c34c1e141a2bdb31138efcfda9a9fe5671c757e6b6a2e509864d65 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_2.bin b/datasets/SPACEV1B/vectors.bin/vectors_2.bin deleted file mode 100644 index adc632530..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_2.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c8f772d430aa817bbd60319486dce7390924a7b71c3038312c89ab93a6382d43 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_20.bin b/datasets/SPACEV1B/vectors.bin/vectors_20.bin deleted file mode 100644 index 5e75632cb..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_20.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2becd2dcedb147d6c3927fa3b6f2d0f5ede03870efad8c8d04055e25694d60b2 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_21.bin b/datasets/SPACEV1B/vectors.bin/vectors_21.bin deleted file mode 100644 index 308424ed3..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_21.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0cc41b1a8f6b3fb8ef87dd918b8f12433f1a4bac5711260e6722e9172de14754 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_22.bin b/datasets/SPACEV1B/vectors.bin/vectors_22.bin deleted file mode 100644 index d5e137c2d..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_22.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8cb1780b449120a124d71d4a6e5268c1ac2ec57bf27bb37c23a859b41702f3b8 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_23.bin b/datasets/SPACEV1B/vectors.bin/vectors_23.bin deleted file mode 100644 index 3ad0234a6..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_23.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b04df15421deb05670f21f34a26b4c0d108e5dd0bc8e3024701e46a90bd2d222 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_24.bin b/datasets/SPACEV1B/vectors.bin/vectors_24.bin deleted file mode 100644 index eb4ea18c6..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_24.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ec09e657776aec02783886de7448642a85ddb004d70df0d10537a7837d076f5 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_25.bin b/datasets/SPACEV1B/vectors.bin/vectors_25.bin deleted file mode 100644 index 49d0e7963..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_25.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a4a3c0ccf4da399397f01748b54f686a416cd5719c126ec93ce9effa124debe3 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_26.bin b/datasets/SPACEV1B/vectors.bin/vectors_26.bin deleted file mode 100644 index 416d5aa75..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_26.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:20c541f53d1e0772a270df12543dc0d715b4edb17ca415ebdf45065d2440525f -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_27.bin b/datasets/SPACEV1B/vectors.bin/vectors_27.bin deleted file mode 100644 index dc04c379a..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_27.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69c820cd585cb7883e6489e1e96f8a943be7800bb21e25ed88cff1b63a11a660 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_28.bin b/datasets/SPACEV1B/vectors.bin/vectors_28.bin deleted file mode 100644 index cfeff8cd4..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_28.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d82070201cb0b34fa38579118ad01ba13efb62d8b45e30867ab763e7794ced54 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_29.bin b/datasets/SPACEV1B/vectors.bin/vectors_29.bin deleted file mode 100644 index 05e30128b..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_29.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a11ce9f677543ab2f4b71d199e41e2026020c23a47147f536cbdd93a4266fcd8 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_3.bin b/datasets/SPACEV1B/vectors.bin/vectors_3.bin deleted file mode 100644 index 61e39be36..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_3.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:787c77685a1603d4aa7f2b4b7f9e19ddff2d828ea21554aca52c82e8d7a6a755 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_30.bin b/datasets/SPACEV1B/vectors.bin/vectors_30.bin deleted file mode 100644 index 5b6f9030a..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_30.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e426936bcc481e878b6f7ec32c1cb85438d23f63c0dd27330149f64053475a8d -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_31.bin b/datasets/SPACEV1B/vectors.bin/vectors_31.bin deleted file mode 100644 index d7d461021..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_31.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:351d25192805efc17f8fc588c872b907ca2b907ca9cb0d3f1413ea1d8fc09a8f -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_32.bin b/datasets/SPACEV1B/vectors.bin/vectors_32.bin deleted file mode 100644 index bd1bde4e6..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_32.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:142c672c7c1f1e0df93ac87a3d39466c9a7fb199309761ba21771bb1f5d2de86 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_33.bin b/datasets/SPACEV1B/vectors.bin/vectors_33.bin deleted file mode 100644 index 3b0c28771..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_33.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e7bb8962700f6b240481d462a6e0f1a997d085857690cf63b5b5be9270e17b5e -size 2763118568 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_4.bin b/datasets/SPACEV1B/vectors.bin/vectors_4.bin deleted file mode 100644 index c09f47ea0..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_4.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d2f3526062048781bdc36d3b0c80ce49c26a4874206109ff79064de2b3acc4dc -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_5.bin b/datasets/SPACEV1B/vectors.bin/vectors_5.bin deleted file mode 100644 index 40ce70e39..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_5.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7b94a7e08eb31cf2fbe7d24b1e12b23f1341a75aeed96e4885f288333fdfdfbc -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_6.bin b/datasets/SPACEV1B/vectors.bin/vectors_6.bin deleted file mode 100644 index 8cac0cb9f..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_6.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a2b2699313be8e6ca09dc67b3267b6bf42ab2ac350c1198041b2fa9778b7905c -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_7.bin b/datasets/SPACEV1B/vectors.bin/vectors_7.bin deleted file mode 100644 index 089611283..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_7.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7960bac8a7579fcaf27a17271a49d0d61f13bbf6397712f513892e84fe89a7d0 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_8.bin b/datasets/SPACEV1B/vectors.bin/vectors_8.bin deleted file mode 100644 index 4240d9cd8..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_8.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f968cbd5bd1eb53722373ffedeb6bed9659a69a01df030e39f67a755418ca45 -size 4294967295 diff --git a/datasets/SPACEV1B/vectors.bin/vectors_9.bin b/datasets/SPACEV1B/vectors.bin/vectors_9.bin deleted file mode 100644 index d075a1a59..000000000 --- a/datasets/SPACEV1B/vectors.bin/vectors_9.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f50e029134bc17703be6bc69748953ef2c9bb81b29d2286bda4d006d8bfce34c -size 4294967295 diff --git a/docs/Tutorial.ipynb b/docs/Tutorial.ipynb index 0cc1c0927..e1e59c542 100644 --- a/docs/Tutorial.ipynb +++ b/docs/Tutorial.ipynb @@ -17,7 +17,28 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "\n", + "# Print the current PYTHONPATH\n", + "print(os.environ.get('PYTHONPATH'))\n", + "\n", + "# Set the PYTHONPATH if necessary\n", + "os.environ['PYTHONPATH'] = '/home/v-zikaiwang/SPTAG/Release'\n", + "sys.path.append('/home/v-zikaiwang/SPTAG/Release')\n", + "\n", + "# Verify by importing SPTAG again\n", + "import SPTAG\n", + "print(SPTAG.__file__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -46,28 +67,24 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['deletes.bin',\n", - " 'graph.bin',\n", - " 'indexloader.ini',\n", - " 'metadata.bin',\n", - " 'metadataIndex.bin',\n", - " 'tree.bin',\n", - " 'vectors.bin']" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ + "import sys\n", + "import os\n", + "\n", + "# Print the current PYTHONPATH\n", + "print(os.environ.get('PYTHONPATH'))\n", + "\n", + "# Set the PYTHONPATH if necessary\n", + "os.environ['PYTHONPATH'] = '/home/v-zikaiwang/SPTAG/Release'\n", + "sys.path.append('/home/v-zikaiwang/SPTAG/Release')\n", + "\n", + "# Verify by importing SPTAG again\n", "import SPTAG\n", + "print(SPTAG.__file__)\n", + "# import SPTAG\n", "\n", "index = SPTAG.AnnIndex('BKT', 'Float', vector_dimension)\n", "\n", @@ -87,20 +104,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[450, 247, 150, 832, 626, 828, 253, 471, 243, 365]\n", - "[11.603649139404297, 12.164414405822754, 12.225785255432129, 12.336504936218262, 12.416375160217285, 12.481977462768555, 12.573633193969727, 12.616722106933594, 12.708147048950195, 12.821012496948242]\n", - "[b'450\\n', b'247\\n', b'150\\n', b'832\\n', b'626\\n', b'828\\n', b'253\\n', b'471\\n', b'243\\n', b'365\\n']\n", - "[(450, 11.603649139404297, b'450\\n'), (247, 12.164414405822754, b'247\\n'), (150, 12.225785255432129, b'150\\n'), (832, 12.336504936218262, b'832\\n'), (626, 12.416375160217285, b'626\\n'), (828, 12.481977462768555, b'828\\n'), (253, 12.573633193969727, b'253\\n'), (471, 12.616722106933594, b'471\\n'), (243, 12.708147048950195, b'243\\n'), (365, 12.821012496948242, b'365\\n')]\n" - ] - } - ], + "outputs": [], "source": [ "# Local index test on the vector search\n", "index = SPTAG.AnnIndex.Load('sptag_index')\n", @@ -108,7 +114,8 @@ "# prepare query vector\n", "q = np.random.rand(vector_dimension).astype(np.float32)\n", "\n", - "result = index.SearchWithMetaData(q, 10) # Search k=3 nearest vectors for query vector q\n", + "# result = index.SearchWithMetaData(q, 10) # Search k=3 nearest vectors for query vector q\n", + "result = index.Search(q, 10) # Search k=3 nearest vectors for query vector q\n", "print (result[0]) # nearest k vector ids\n", "print (result[1]) # nearest k vector distances\n", "print (result[2]) # nearest k vector metadatas\n", @@ -139,18 +146,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CompletedProcess(args='Quantizer.exe -d 100 -v Float -f DEFAULT -i sptag_index\\\\vectors.bin -o quan_doc_vectors.bin -oq quantizer.bin -qt PQQuantizer -qd 50 -ts 1000 -norm false', returncode=0)\n", - "(1000, 50)\n" - ] - } - ], + "outputs": [], "source": [ "import subprocess\n", "import struct\n", @@ -177,27 +175,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['deletes.bin',\n", - " 'graph.bin',\n", - " 'indexloader.ini',\n", - " 'metadata.bin',\n", - " 'metadataIndex.bin',\n", - " 'quantizer.bin',\n", - " 'tree.bin',\n", - " 'vectors.bin']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import SPTAG\n", "\n", @@ -219,19 +199,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[450, 247, 832, 626, 150, 253, 471, 828, 243, 207, 365, 823]\n", - "[11.497368812561035, 12.21573257446289, 12.328060150146484, 12.369790077209473, 12.420766830444336, 12.47612476348877, 12.48221206665039, 12.526467323303223, 12.802266120910645, 12.819355010986328, 12.876933097839355, 12.913871765136719]\n", - "[b'450\\n', b'247\\n', b'832\\n', b'626\\n', b'150\\n', b'253\\n', b'471\\n', b'828\\n', b'243\\n', b'207\\n', b'365\\n', b'823\\n']\n" - ] - } - ], + "outputs": [], "source": [ "# Local index test on the vector search\n", "index = SPTAG.AnnIndex.Load('quan_sptag_index')\n", @@ -244,26 +214,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['HeadIndex',\n", - " 'indexloader.ini',\n", - " 'metadata.bin',\n", - " 'metadataIndex.bin',\n", - " 'SPTAGFullList.bin',\n", - " 'SPTAGHeadVectorIDs.bin',\n", - " 'SPTAGHeadVectors.bin']" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import SPTAG\n", "\n", @@ -301,19 +254,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[450, 247, 150, 832, 626, 828, 253, 471, 243, 365, 207, 823]\n", - "[11.603649139404297, 12.164414405822754, 12.225785255432129, 12.336504936218262, 12.416375160217285, 12.481977462768555, 12.573633193969727, 12.616722106933594, 12.708147048950195, 12.821012496948242, 12.878414154052734, 13.01927375793457]\n", - "[b'450\\n', b'247\\n', b'150\\n', b'832\\n', b'626\\n', b'828\\n', b'253\\n', b'471\\n', b'243\\n', b'365\\n', b'207\\n', b'823\\n']\n" - ] - } - ], + "outputs": [], "source": [ "index = SPTAG.AnnIndex.Load('spann_index')\n", "result = index.SearchWithMetaData(q, 12) # Search k=3 nearest vectors for query vector q\n", @@ -324,27 +267,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['HeadIndex',\n", - " 'indexloader.ini',\n", - " 'metadata.bin',\n", - " 'metadataIndex.bin',\n", - " 'quantizer.bin',\n", - " 'SPTAGFullList.bin',\n", - " 'SPTAGHeadVectorIDs.bin',\n", - " 'SPTAGHeadVectors.bin']" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import SPTAG\n", "\n", @@ -383,19 +308,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[450, 247, 832, 626, 150, 253, 471, 828, 243, 207, 365, 823]\n", - "[11.497368812561035, 12.21573257446289, 12.328060150146484, 12.369790077209473, 12.420766830444336, 12.47612476348877, 12.48221206665039, 12.526467323303223, 12.802266120910645, 12.819355010986328, 12.876933097839355, 12.913871765136719]\n", - "[b'450\\n', b'247\\n', b'832\\n', b'626\\n', b'150\\n', b'253\\n', b'471\\n', b'828\\n', b'243\\n', b'207\\n', b'365\\n', b'823\\n']\n" - ] - } - ], + "outputs": [], "source": [ "index = SPTAG.AnnIndex.Load('spann_quan_index')\n", "index.SetQuantizerADC(True)\n", @@ -527,7 +442,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.3" + "version": "3.10.12" }, "mimetype": "text/x-python", "name": "python",