Skip to content

Commit 1359e25

Browse files
committed
fix: Preserve user-defined ordering of pages
Issue-21: #21
1 parent cde9904 commit 1359e25

File tree

1 file changed

+35
-43
lines changed

1 file changed

+35
-43
lines changed

src/mkdocs_llmstxt/_internal/plugin.py

Lines changed: 35 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ class _MDPageInfo(NamedTuple):
3636
path_md: Path
3737
md_url: str
3838
content: str
39-
description: str
4039

4140

4241
class MkdocsLLMsTxtPlugin(BasePlugin[_PluginConfig]):
@@ -54,10 +53,9 @@ class MkdocsLLMsTxtPlugin(BasePlugin[_PluginConfig]):
5453
mkdocs_config: MkDocsConfig
5554
"""The global MkDocs configuration."""
5655

57-
md_pages: dict[str, list[_MDPageInfo]]
58-
"""Dictionary mapping section names to a list of page infos."""
59-
6056
_sections: dict[str, dict[str, str]]
57+
_file_uris: set[str]
58+
_md_pages: dict[str, _MDPageInfo]
6159

6260
def _expand_inputs(self, inputs: list[str | dict[str, str]], page_uris: list[str]) -> dict[str, str]:
6361
expanded: dict[str, str] = {}
@@ -90,10 +88,6 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
9088
if config.site_url is None:
9189
raise ValueError("'site_url' must be set in the MkDocs configuration to be used with the 'llmstxt' plugin")
9290
self.mkdocs_config = config
93-
94-
# A `defaultdict` could be used, but we need to retain the same order between `config.sections` and `md_pages`
95-
# (which wouldn't be guaranteed when filling `md_pages` in `on_page_content()`).
96-
self.md_pages = {section: [] for section in self.config.sections}
9791
return config
9892

9993
def on_files(self, files: Files, *, config: MkDocsConfig) -> Files | None: # noqa: ARG002
@@ -114,6 +108,8 @@ def on_files(self, files: Files, *, config: MkDocsConfig) -> Files | None: # no
114108
section_name: self._expand_inputs(file_list, page_uris=page_uris) # type: ignore[arg-type]
115109
for section_name, file_list in self.config.sections.items()
116110
}
111+
self._file_uris = set(chain.from_iterable(self._sections.values()))
112+
self._md_pages = {}
117113
return files
118114

119115
def on_page_content(self, html: str, *, page: Page, **kwargs: Any) -> str | None: # noqa: ARG002
@@ -125,37 +121,32 @@ def on_page_content(self, html: str, *, page: Page, **kwargs: Any) -> str | None
125121
html: The rendered HTML.
126122
page: The page object.
127123
"""
128-
src_uri = page.file.src_uri
129-
for section_name, files in self._sections.items():
130-
if src_uri in files:
131-
path_md = Path(page.file.abs_dest_path).with_suffix(".md")
132-
page_md = _generate_page_markdown(
133-
html,
134-
should_autoclean=self.config.autoclean,
135-
preprocess=self.config.preprocess,
136-
path=str(path_md),
137-
)
138-
139-
md_url = Path(page.file.dest_uri).with_suffix(".md").as_posix()
140-
# Apply the same logic as in the `Page.url` property.
141-
if md_url in (".", "./"):
142-
md_url = ""
143-
144-
# Guaranteed to exist as we require `site_url` to be configured.
145-
base = cast("str", self.mkdocs_config.site_url)
146-
if not base.endswith("/"):
147-
base += "/"
148-
md_url = urljoin(base, md_url)
149-
150-
self.md_pages[section_name].append(
151-
_MDPageInfo(
152-
title=page.title if page.title is not None else src_uri,
153-
path_md=path_md,
154-
md_url=md_url,
155-
content=page_md,
156-
description=files[src_uri],
157-
),
158-
)
124+
if (src_uri := page.file.src_uri) in self._file_uris:
125+
path_md = Path(page.file.abs_dest_path).with_suffix(".md")
126+
page_md = _generate_page_markdown(
127+
html,
128+
should_autoclean=self.config.autoclean,
129+
preprocess=self.config.preprocess,
130+
path=str(path_md),
131+
)
132+
133+
md_url = Path(page.file.dest_uri).with_suffix(".md").as_posix()
134+
# Apply the same logic as in the `Page.url` property.
135+
if md_url in (".", "./"):
136+
md_url = ""
137+
138+
# Guaranteed to exist as we require `site_url` to be configured.
139+
base = cast("str", self.mkdocs_config.site_url)
140+
if not base.endswith("/"):
141+
base += "/"
142+
md_url = urljoin(base, md_url)
143+
144+
self._md_pages[src_uri] = _MDPageInfo(
145+
title=page.title if page.title is not None else src_uri,
146+
path_md=path_md,
147+
md_url=md_url,
148+
content=page_md,
149+
)
159150

160151
return html
161152

@@ -179,9 +170,10 @@ def on_post_build(self, *, config: MkDocsConfig, **kwargs: Any) -> None: # noqa
179170

180171
full_markdown = markdown
181172

182-
for section_name, file_list in self.md_pages.items():
173+
for section_name, page_uris in self._sections.items():
183174
markdown += f"## {section_name}\n\n"
184-
for page_title, path_md, md_url, content, desc in file_list:
175+
for page_uri, desc in page_uris.items():
176+
page_title, path_md, md_url, content = self._md_pages[page_uri]
185177
path_md.write_text(content, encoding="utf8")
186178
_logger.debug(f"Generated MD file to {path_md}")
187179
markdown += f"- [{page_title}]({md_url}){(': ' + desc) if desc else ''}\n"
@@ -192,8 +184,8 @@ def on_post_build(self, *, config: MkDocsConfig, **kwargs: Any) -> None: # noqa
192184

193185
if self.config.full_output is not None:
194186
full_output_file = Path(config.site_dir).joinpath(self.config.full_output)
195-
for section_name, file_list in self.md_pages.items():
196-
list_content = "\n".join(info.content for info in file_list)
187+
for section_name, page_uris in self._sections.items():
188+
list_content = "\n".join(self._md_pages[page_uri].content for page_uri in page_uris)
197189
full_markdown += f"# {section_name}\n\n{list_content}"
198190
full_output_file.write_text(full_markdown, encoding="utf8")
199191
_logger.debug(f"Generated file /{self.config.full_output}.txt")

0 commit comments

Comments
 (0)