@@ -36,7 +36,6 @@ class _MDPageInfo(NamedTuple):
36
36
path_md : Path
37
37
md_url : str
38
38
content : str
39
- description : str
40
39
41
40
42
41
class MkdocsLLMsTxtPlugin (BasePlugin [_PluginConfig ]):
@@ -54,10 +53,9 @@ class MkdocsLLMsTxtPlugin(BasePlugin[_PluginConfig]):
54
53
mkdocs_config : MkDocsConfig
55
54
"""The global MkDocs configuration."""
56
55
57
- md_pages : dict [str , list [_MDPageInfo ]]
58
- """Dictionary mapping section names to a list of page infos."""
59
-
60
56
_sections : dict [str , dict [str , str ]]
57
+ _file_uris : set [str ]
58
+ _md_pages : dict [str , _MDPageInfo ]
61
59
62
60
def _expand_inputs (self , inputs : list [str | dict [str , str ]], page_uris : list [str ]) -> dict [str , str ]:
63
61
expanded : dict [str , str ] = {}
@@ -90,10 +88,6 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
90
88
if config .site_url is None :
91
89
raise ValueError ("'site_url' must be set in the MkDocs configuration to be used with the 'llmstxt' plugin" )
92
90
self .mkdocs_config = config
93
-
94
- # A `defaultdict` could be used, but we need to retain the same order between `config.sections` and `md_pages`
95
- # (which wouldn't be guaranteed when filling `md_pages` in `on_page_content()`).
96
- self .md_pages = {section : [] for section in self .config .sections }
97
91
return config
98
92
99
93
def on_files (self , files : Files , * , config : MkDocsConfig ) -> Files | None : # noqa: ARG002
@@ -114,6 +108,8 @@ def on_files(self, files: Files, *, config: MkDocsConfig) -> Files | None: # no
114
108
section_name : self ._expand_inputs (file_list , page_uris = page_uris ) # type: ignore[arg-type]
115
109
for section_name , file_list in self .config .sections .items ()
116
110
}
111
+ self ._file_uris = set (chain .from_iterable (self ._sections .values ()))
112
+ self ._md_pages = {}
117
113
return files
118
114
119
115
def on_page_content (self , html : str , * , page : Page , ** kwargs : Any ) -> str | None : # noqa: ARG002
@@ -125,37 +121,32 @@ def on_page_content(self, html: str, *, page: Page, **kwargs: Any) -> str | None
125
121
html: The rendered HTML.
126
122
page: The page object.
127
123
"""
128
- src_uri = page .file .src_uri
129
- for section_name , files in self ._sections .items ():
130
- if src_uri in files :
131
- path_md = Path (page .file .abs_dest_path ).with_suffix (".md" )
132
- page_md = _generate_page_markdown (
133
- html ,
134
- should_autoclean = self .config .autoclean ,
135
- preprocess = self .config .preprocess ,
136
- path = str (path_md ),
137
- )
138
-
139
- md_url = Path (page .file .dest_uri ).with_suffix (".md" ).as_posix ()
140
- # Apply the same logic as in the `Page.url` property.
141
- if md_url in ("." , "./" ):
142
- md_url = ""
143
-
144
- # Guaranteed to exist as we require `site_url` to be configured.
145
- base = cast ("str" , self .mkdocs_config .site_url )
146
- if not base .endswith ("/" ):
147
- base += "/"
148
- md_url = urljoin (base , md_url )
149
-
150
- self .md_pages [section_name ].append (
151
- _MDPageInfo (
152
- title = page .title if page .title is not None else src_uri ,
153
- path_md = path_md ,
154
- md_url = md_url ,
155
- content = page_md ,
156
- description = files [src_uri ],
157
- ),
158
- )
124
+ if (src_uri := page .file .src_uri ) in self ._file_uris :
125
+ path_md = Path (page .file .abs_dest_path ).with_suffix (".md" )
126
+ page_md = _generate_page_markdown (
127
+ html ,
128
+ should_autoclean = self .config .autoclean ,
129
+ preprocess = self .config .preprocess ,
130
+ path = str (path_md ),
131
+ )
132
+
133
+ md_url = Path (page .file .dest_uri ).with_suffix (".md" ).as_posix ()
134
+ # Apply the same logic as in the `Page.url` property.
135
+ if md_url in ("." , "./" ):
136
+ md_url = ""
137
+
138
+ # Guaranteed to exist as we require `site_url` to be configured.
139
+ base = cast ("str" , self .mkdocs_config .site_url )
140
+ if not base .endswith ("/" ):
141
+ base += "/"
142
+ md_url = urljoin (base , md_url )
143
+
144
+ self ._md_pages [src_uri ] = _MDPageInfo (
145
+ title = page .title if page .title is not None else src_uri ,
146
+ path_md = path_md ,
147
+ md_url = md_url ,
148
+ content = page_md ,
149
+ )
159
150
160
151
return html
161
152
@@ -179,9 +170,10 @@ def on_post_build(self, *, config: MkDocsConfig, **kwargs: Any) -> None: # noqa
179
170
180
171
full_markdown = markdown
181
172
182
- for section_name , file_list in self .md_pages .items ():
173
+ for section_name , page_uris in self ._sections .items ():
183
174
markdown += f"## { section_name } \n \n "
184
- for page_title , path_md , md_url , content , desc in file_list :
175
+ for page_uri , desc in page_uris .items ():
176
+ page_title , path_md , md_url , content = self ._md_pages [page_uri ]
185
177
path_md .write_text (content , encoding = "utf8" )
186
178
_logger .debug (f"Generated MD file to { path_md } " )
187
179
markdown += f"- [{ page_title } ]({ md_url } ){ (': ' + desc ) if desc else '' } \n "
@@ -192,8 +184,8 @@ def on_post_build(self, *, config: MkDocsConfig, **kwargs: Any) -> None: # noqa
192
184
193
185
if self .config .full_output is not None :
194
186
full_output_file = Path (config .site_dir ).joinpath (self .config .full_output )
195
- for section_name , file_list in self .md_pages .items ():
196
- list_content = "\n " .join (info . content for info in file_list )
187
+ for section_name , page_uris in self ._sections .items ():
188
+ list_content = "\n " .join (self . _md_pages [ page_uri ]. content for page_uri in page_uris )
197
189
full_markdown += f"# { section_name } \n \n { list_content } "
198
190
full_output_file .write_text (full_markdown , encoding = "utf8" )
199
191
_logger .debug (f"Generated file /{ self .config .full_output } .txt" )
0 commit comments