Skip to content

Commit 0730eab

Browse files
OSS-Fuzz Teamcopybara-github
authored andcommitted
Add structures to support incremental indexing to the indexer database
Indexer-PiperOrigin-RevId: 807878968
1 parent 89f82e1 commit 0730eab

File tree

2 files changed

+283
-6
lines changed

2 files changed

+283
-6
lines changed

infra/indexer/index/sqlite.cc

Lines changed: 201 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@
3030
namespace oss_fuzz {
3131
namespace indexer {
3232
namespace {
33+
34+
// Note: We could in principle enforce UNIQUE constraints on `reference` foreign
35+
// key pairs, as well as those of `virtual_method_link` and
36+
// `entity_translation_unit` (as an extreme, non-ID fields of e.g. `location`
37+
// could also be made into a UNIQUE tuple). But those are unique by construction
38+
// now and we hope to avoid the overhead of checking those constraints.
39+
3340
const char kCreateDb[] =
3441
"PRAGMA foreign_keys = ON;\n"
3542
"PRAGMA user_version = " SCHEMA_VERSION
@@ -87,6 +94,25 @@ const char kCreateDb[] =
8794
"CREATE INDEX virtual_method_link_parent ON virtual_method_link("
8895
" parent_entity_id);\n";
8996

97+
const char kCreateIncrementalIndexingSupportTables[] =
98+
"CREATE TABLE translation_unit(\n"
99+
" id INTEGER PRIMARY KEY,\n"
100+
" path TEXT);\n"
101+
"\n"
102+
"CREATE TABLE entity_translation_unit(\n"
103+
" id INTEGER PRIMARY KEY,\n"
104+
" entity_id INT NOT NULL,\n"
105+
" tu_id INT NOT NULL,\n"
106+
" FOREIGN KEY (entity_id) REFERENCES entity(id),\n"
107+
" FOREIGN KEY (tu_id) REFERENCES translation_unit(id));\n"
108+
"\n"
109+
"CREATE TABLE reference_translation_unit(\n"
110+
" id INTEGER PRIMARY KEY,\n"
111+
" reference_id INT NOT NULL,\n"
112+
" tu_id INT NOT NULL,\n"
113+
" FOREIGN KEY (reference_id) REFERENCES reference(id),\n"
114+
" FOREIGN KEY (tu_id) REFERENCES translation_unit(id));\n";
115+
90116
const char kInsertLocation[] =
91117
"INSERT INTO location\n"
92118
" (id, dirname, basename, start_line, end_line)\n"
@@ -109,6 +135,21 @@ const char kInsertLink[] =
109135
" (id, parent_entity_id, child_entity_id)\n"
110136
" VALUES (?1, ?2, ?3);";
111137

138+
const char kInsertTranslationUnit[] =
139+
"INSERT INTO translation_unit\n"
140+
" (id, path)\n"
141+
" VALUES (?1, ?2);";
142+
143+
const char kInsertEntityTranslationUnit[] =
144+
"INSERT INTO entity_translation_unit\n"
145+
" (id, entity_id, tu_id)\n"
146+
" VALUES (?1, ?2, ?3);";
147+
148+
const char kInsertReferenceTranslationUnit[] =
149+
"INSERT INTO reference_translation_unit\n"
150+
" (id, reference_id, tu_id)\n"
151+
" VALUES (?1, ?2, ?3);";
152+
112153
const char kFinalizeDb[] =
113154
"VACUUM;\n"
114155
"REINDEX;\n"
@@ -163,8 +204,8 @@ bool InsertLocations(sqlite3* db, absl::Span<const Location> locations) {
163204

164205
bool InsertEntities(sqlite3* db, absl::Span<const Entity> entities) {
165206
// `substitute_entity_id` foreign key can refer to a yet-unadded entity.
166-
if (sqlite3_exec(db, "PRAGMA foreign_keys = OFF;", nullptr,
167-
nullptr, nullptr) != SQLITE_OK) {
207+
if (sqlite3_exec(db, "PRAGMA foreign_keys = OFF;", nullptr, nullptr,
208+
nullptr) != SQLITE_OK) {
168209
LOG(ERROR) << "sqlite disabling foreign keys failed: `"
169210
<< sqlite3_errmsg(db) << "`";
170211
return false;
@@ -261,8 +302,8 @@ bool InsertEntities(sqlite3* db, absl::Span<const Entity> entities) {
261302
std::move(cleanup).Cancel();
262303
sqlite3_finalize(insert_entity);
263304

264-
if (sqlite3_exec(db, "PRAGMA foreign_keys = ON;", nullptr, nullptr, nullptr)
265-
!= SQLITE_OK) {
305+
if (sqlite3_exec(db, "PRAGMA foreign_keys = ON;", nullptr, nullptr,
306+
nullptr) != SQLITE_OK) {
266307
LOG(ERROR) << "sqlite re-enabling foreign keys failed: `"
267308
<< sqlite3_errmsg(db) << "`";
268309
return false;
@@ -320,7 +361,7 @@ bool InsertVirtualMethodLinks(sqlite3* db,
320361
return false;
321362
}
322363

323-
for (ReferenceId i = 0; i < links.size(); ++i) {
364+
for (VirtualMethodLinkId i = 0; i < links.size(); ++i) {
324365
const VirtualMethodLink& link = links[i];
325366
if (sqlite3_bind_int64(insert_link, 1, i) != SQLITE_OK ||
326367
sqlite3_bind_int64(insert_link, 2, link.parent()) != SQLITE_OK ||
@@ -332,7 +373,7 @@ bool InsertVirtualMethodLinks(sqlite3* db,
332373
}
333374

334375
if (sqlite3_step(insert_link) != SQLITE_DONE) {
335-
LOG(ERROR) << "sqlite executing insert_reference failed: `"
376+
LOG(ERROR) << "sqlite executing insert_link failed: `"
336377
<< sqlite3_errmsg(db) << "`";
337378
sqlite3_finalize(insert_link);
338379
return false;
@@ -345,6 +386,126 @@ bool InsertVirtualMethodLinks(sqlite3* db,
345386
sqlite3_finalize(insert_link);
346387
return true;
347388
}
389+
390+
bool InsertTranslationUnits(
391+
sqlite3* db, absl::Span<const TranslationUnit> translation_units) {
392+
sqlite3_stmt* insert_tu = nullptr;
393+
if (sqlite3_prepare_v2(db, kInsertTranslationUnit,
394+
sizeof(kInsertTranslationUnit), &insert_tu,
395+
nullptr) != SQLITE_OK) {
396+
LOG(ERROR) << "sqlite compiling prepared statement failed: `"
397+
<< sqlite3_errmsg(db) << "`";
398+
return false;
399+
}
400+
401+
for (TranslationUnitId i = 0; i < translation_units.size(); ++i) {
402+
const TranslationUnit& tu = translation_units[i];
403+
if (sqlite3_bind_int64(insert_tu, 1, i) != SQLITE_OK ||
404+
sqlite3_bind_text(insert_tu, 2, tu.index_path().data(),
405+
tu.index_path().size(), SQLITE_STATIC) != SQLITE_OK) {
406+
LOG(ERROR) << "sqlite binding insert_tu failed: `" << sqlite3_errmsg(db)
407+
<< "`";
408+
sqlite3_finalize(insert_tu);
409+
return false;
410+
}
411+
412+
if (sqlite3_step(insert_tu) != SQLITE_DONE) {
413+
LOG(ERROR) << "sqlite executing insert_tu failed: `" << sqlite3_errmsg(db)
414+
<< "`";
415+
sqlite3_finalize(insert_tu);
416+
return false;
417+
}
418+
419+
sqlite3_reset(insert_tu);
420+
sqlite3_clear_bindings(insert_tu);
421+
}
422+
423+
sqlite3_finalize(insert_tu);
424+
return true;
425+
}
426+
427+
bool InsertEntityTranslationUnits(
428+
sqlite3* db,
429+
absl::Span<const EntityTranslationUnit> entity_translation_units) {
430+
sqlite3_stmt* insert_entity_tu = nullptr;
431+
if (sqlite3_prepare_v2(db, kInsertEntityTranslationUnit,
432+
sizeof(kInsertEntityTranslationUnit),
433+
&insert_entity_tu, nullptr) != SQLITE_OK) {
434+
LOG(ERROR) << "sqlite compiling prepared statement failed: `"
435+
<< sqlite3_errmsg(db) << "`";
436+
return false;
437+
}
438+
439+
for (EntityTranslationUnitId i = 0; i < entity_translation_units.size();
440+
++i) {
441+
const EntityTranslationUnit& entity_tu = entity_translation_units[i];
442+
if (sqlite3_bind_int64(insert_entity_tu, 1, i) != SQLITE_OK ||
443+
sqlite3_bind_int64(insert_entity_tu, 2, entity_tu.entity_id()) !=
444+
SQLITE_OK ||
445+
sqlite3_bind_int64(insert_entity_tu, 3, entity_tu.tu_id()) !=
446+
SQLITE_OK) {
447+
LOG(ERROR) << "sqlite binding insert_entity_tu failed: `"
448+
<< sqlite3_errmsg(db) << "`";
449+
sqlite3_finalize(insert_entity_tu);
450+
return false;
451+
}
452+
453+
if (sqlite3_step(insert_entity_tu) != SQLITE_DONE) {
454+
LOG(ERROR) << "sqlite executing insert_entity_tu failed: `"
455+
<< sqlite3_errmsg(db) << "`";
456+
sqlite3_finalize(insert_entity_tu);
457+
return false;
458+
}
459+
460+
sqlite3_reset(insert_entity_tu);
461+
sqlite3_clear_bindings(insert_entity_tu);
462+
}
463+
464+
sqlite3_finalize(insert_entity_tu);
465+
return true;
466+
}
467+
468+
bool InsertReferenceTranslationUnits(
469+
sqlite3* db,
470+
absl::Span<const ReferenceTranslationUnit> reference_translation_units) {
471+
sqlite3_stmt* insert_reference_tu = nullptr;
472+
if (sqlite3_prepare_v2(db, kInsertReferenceTranslationUnit,
473+
sizeof(kInsertReferenceTranslationUnit),
474+
&insert_reference_tu, nullptr) != SQLITE_OK) {
475+
LOG(ERROR) << "sqlite compiling prepared statement failed: `"
476+
<< sqlite3_errmsg(db) << "`";
477+
return false;
478+
}
479+
480+
for (ReferenceTranslationUnitId i = 0; i < reference_translation_units.size();
481+
++i) {
482+
const ReferenceTranslationUnit& reference_tu =
483+
reference_translation_units[i];
484+
if (sqlite3_bind_int64(insert_reference_tu, 1, i) != SQLITE_OK ||
485+
sqlite3_bind_int64(insert_reference_tu, 2,
486+
reference_tu.reference_id()) != SQLITE_OK ||
487+
sqlite3_bind_int64(insert_reference_tu, 3, reference_tu.tu_id()) !=
488+
SQLITE_OK) {
489+
LOG(ERROR) << "sqlite binding insert_reference_tu failed: `"
490+
<< sqlite3_errmsg(db) << "`";
491+
sqlite3_finalize(insert_reference_tu);
492+
return false;
493+
}
494+
495+
if (sqlite3_step(insert_reference_tu) != SQLITE_DONE) {
496+
LOG(ERROR) << "sqlite executing insert_reference_tu failed: `"
497+
<< sqlite3_errmsg(db) << "`";
498+
sqlite3_finalize(insert_reference_tu);
499+
return false;
500+
}
501+
502+
sqlite3_reset(insert_reference_tu);
503+
sqlite3_clear_bindings(insert_reference_tu);
504+
}
505+
506+
sqlite3_finalize(insert_reference_tu);
507+
return true;
508+
}
348509
} // anonymous namespace
349510

350511
bool SaveAsSqlite(const FlatIndex& index, const std::string& path) {
@@ -391,6 +552,39 @@ bool SaveAsSqlite(const FlatIndex& index, const std::string& path) {
391552
return false;
392553
}
393554

555+
if (index.incremental_indexing_metadata.has_value()) {
556+
const IncrementalIndexingMetadata& metadata =
557+
*index.incremental_indexing_metadata;
558+
559+
LOG(INFO) << "creating incremental indexing support tables";
560+
if (sqlite3_exec(db, kCreateIncrementalIndexingSupportTables, nullptr,
561+
nullptr, &error) != SQLITE_OK) {
562+
LOG(ERROR) << "incremental indexing support table creation failed: `"
563+
<< error << "`";
564+
sqlite3_close(db);
565+
return false;
566+
}
567+
568+
LOG(INFO) << "inserting translation units";
569+
if (!InsertTranslationUnits(db, metadata.translation_units)) {
570+
sqlite3_close(db);
571+
return false;
572+
}
573+
574+
LOG(INFO) << "inserting entity - translation unit pairs";
575+
if (!InsertEntityTranslationUnits(db, metadata.entity_translation_units)) {
576+
sqlite3_close(db);
577+
return false;
578+
}
579+
580+
LOG(INFO) << "inserting reference - translation unit pairs";
581+
if (!InsertReferenceTranslationUnits(
582+
db, metadata.reference_translation_units)) {
583+
sqlite3_close(db);
584+
return false;
585+
}
586+
}
587+
394588
LOG(INFO) << "finalizing database";
395589
if (sqlite3_exec(db, kFinalizeDb, nullptr, nullptr, &error) != SQLITE_OK) {
396590
LOG(ERROR) << "database finalization failed: `" << error << "`";
@@ -427,5 +621,6 @@ bool SaveAsSqlite(const FlatIndex& index, const std::string& path) {
427621
sqlite3_close(db);
428622
return backup_success;
429623
}
624+
430625
} // namespace indexer
431626
} // namespace oss_fuzz

infra/indexer/index/types.h

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,13 @@ using LocationId = uint64_t;
4040
using EntityId = uint64_t;
4141
using ReferenceId = uint64_t;
4242
using VirtualMethodLinkId = uint64_t;
43+
using TranslationUnitId = uint64_t;
44+
using EntityTranslationUnitId = uint64_t;
45+
using ReferenceTranslationUnitId = uint64_t;
4346
constexpr LocationId kInvalidLocationId = 0xffffffffffffffffull;
4447
constexpr EntityId kInvalidEntityId = 0xffffffffffffffffull;
48+
constexpr ReferenceId kInvalidReferenceId = 0xffffffffffffffffull;
49+
constexpr TranslationUnitId kInvalidTranslationUnitId = 0xffffffffffffffffull;
4550

4651
inline bool IsRealPath(absl::string_view path) {
4752
// Examples of built-in paths: `<built-in>` and `<command-line>`.
@@ -180,6 +185,7 @@ class Entity {
180185
CHECK_EQ(substitute_relationship_.has_value(),
181186
new_substitute_entity_id.has_value());
182187
if (substitute_relationship_.has_value()) {
188+
CHECK_NE(*new_substitute_entity_id, kInvalidEntityId);
183189
substitute_relationship_->entity_id_ = *new_substitute_entity_id;
184190
}
185191
}
@@ -343,13 +349,89 @@ H AbslHashValue(H h, const VirtualMethodLink& link) {
343349
return H::combine(std::move(h), link.parent(), link.child());
344350
}
345351

352+
// Represents a single translation unit.
353+
class TranslationUnit {
354+
public:
355+
explicit TranslationUnit(const std::string& index_path)
356+
: index_path_(index_path) {}
357+
358+
const std::string& index_path() const { return index_path_; }
359+
360+
bool operator==(const TranslationUnit&) const = default;
361+
std::strong_ordering operator<=>(const TranslationUnit&) const = default;
362+
363+
private:
364+
std::string index_path_;
365+
};
366+
367+
template <typename H>
368+
H AbslHashValue(H h, const TranslationUnit& tu) {
369+
return H::combine(std::move(h), tu.index_path());
370+
}
371+
372+
// Links an entity to a translation unit it is encountered in (many-to-many).
373+
class EntityTranslationUnit {
374+
public:
375+
EntityTranslationUnit(EntityId entity_id, TranslationUnitId tu_id)
376+
: entity_id_(entity_id), tu_id_(tu_id) {
377+
CHECK_NE(entity_id, kInvalidEntityId);
378+
}
379+
380+
EntityId entity_id() const { return entity_id_; }
381+
TranslationUnitId tu_id() const { return tu_id_; }
382+
383+
bool operator==(const EntityTranslationUnit&) const = default;
384+
std::strong_ordering operator<=>(const EntityTranslationUnit&) const =
385+
default;
386+
387+
private:
388+
EntityId entity_id_;
389+
TranslationUnitId tu_id_;
390+
};
391+
392+
template <typename H>
393+
H AbslHashValue(H h, const EntityTranslationUnit& etu) {
394+
return H::combine(std::move(h), etu.entity_id(), etu.tu_id());
395+
}
396+
397+
// Links a reference to a translation unit it is encountered in (many-to-many).
398+
class ReferenceTranslationUnit {
399+
public:
400+
ReferenceTranslationUnit(ReferenceId reference_id, TranslationUnitId tu_id)
401+
: reference_id_(reference_id), tu_id_(tu_id) {}
402+
403+
ReferenceId reference_id() const { return reference_id_; }
404+
TranslationUnitId tu_id() const { return tu_id_; }
405+
406+
bool operator==(const ReferenceTranslationUnit&) const = default;
407+
std::strong_ordering operator<=>(const ReferenceTranslationUnit&) const =
408+
default;
409+
410+
private:
411+
ReferenceId reference_id_;
412+
TranslationUnitId tu_id_;
413+
};
414+
415+
template <typename H>
416+
H AbslHashValue(H h, const ReferenceTranslationUnit& etu) {
417+
return H::combine(std::move(h), etu.reference_id(), etu.tu_id());
418+
}
419+
420+
// A set of optional metadata for incremental indexing support.
421+
struct IncrementalIndexingMetadata {
422+
std::vector<TranslationUnit> translation_units;
423+
std::vector<EntityTranslationUnit> entity_translation_units;
424+
std::vector<ReferenceTranslationUnit> reference_translation_units;
425+
};
426+
346427
// A simple holder for a sorted index, used as an interchange format/interface
347428
// definition between uses of the index.
348429
struct FlatIndex {
349430
std::vector<Location> locations;
350431
std::vector<Entity> entities;
351432
std::vector<Reference> references;
352433
std::vector<VirtualMethodLink> virtual_method_links;
434+
std::optional<IncrementalIndexingMetadata> incremental_indexing_metadata;
353435
};
354436

355437
namespace testing_internal {

0 commit comments

Comments
 (0)