python · freundTech · Sep 21, 2021 · Sep 21, 2021 · Sep 21, 2021 · CAM-Gerlach
@@ -2,6 +2,7 @@ pep-0000.txt
 pep-0000.rst
 pep-????.html
 peps.rss
+peps.bib
 __pycache__
 *.pyc
 *.pyo

@@ -27,12 +27,16 @@ pep-0000.rst: $(wildcard pep-????.txt) $(wildcard pep-????.rst) $(wildcard pep0/
 rss:
 	$(PYTHON) pep2rss.py .
 
+bib: pep-0000.rst
+	$(PYTHON) pep2bib.py .
+
 install:
 	echo "Installing is not necessary anymore. It will be done in post-commit."
 
 clean:
 	-rm pep-0000.rst
 	-rm *.html
+	-rm *.bib
 	-rm -rf build
 
 update:

@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+
+# usage: python3 pep2bib.py .
+
+import glob
+import os
+import re
+import sys
+from pybtex.database import Entry, BibliographyData
+
+from pep_parsing_helpers import pep_number, pep_creation_dt, first_line_starting_with, parse_authors
+
+BIB_PATH = os.path.join(sys.argv[1], 'peps.bib')
+
+
+name_first_regex = re.compile(r'(.*)<.*>')
+mail_first_regex = re.compile(r'.*\((.*)\)')
+name_only_regex = re.compile(r'(.*)')
+
+
+months = {
+    1: 'jan',
+    2: 'feb',
+    3: 'mar',
+    4: 'apr',
+    5: 'may',
+    6: 'jun',
+    7: 'jul',
+    8: 'aug',
+    9: 'sep',
+    10: 'oct',
+    11: 'nov',
+    12: 'dec',
+}
+
+
+def authors_to_bib(authors):
+    cleaned = []
+    for author in authors:
+        match = name_first_regex.match(author)
+        if match is None:
+            match = mail_first_regex.match(author)
+        if match is None:
+            match = name_only_regex.match(author)
+        cleaned.append(match.group(1).strip())
+    return " and ".join(cleaned)
+
+
+def main():
+    # get list of peps with creation time
+    # (from "Created:" string in pep .rst or .txt)
+    peps = glob.glob('pep-*.txt')
+    peps.extend(glob.glob('pep-*.rst'))
+
+    peps_with_dt = [(pep_number(full_path), pep_creation_dt(full_path), full_path) for full_path in peps]
+    # sort peps by number
+    peps_with_dt.sort()
+
+    items = {}
+    for n, dt, full_path in peps_with_dt:
+        title = first_line_starting_with(full_path, 'Title:')
+        author_string = first_line_starting_with(full_path, 'Author:')
+        authors = parse_authors(author_string)
+        authors = authors_to_bib(authors)
+        url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n
+        item = Entry('techreport', [
+            ('author', authors),
+            ('title', 'PEP %d: %s' % (n, title)),
+            ('institution', "Python Software Foundation"),
+            ('year', str(dt.year)),
+            ('month', months[dt.month]),
+            ('type', 'PEP'),
+            ('number', str(n)),
+            ('url', url)
+        ])
+        items['pep%d' % n] = item
+
+    bib = BibliographyData(items)
+    bib_str = bib.to_string('bibtex')
+
+    # pybtex always quotes strings, but we want month strings unquoted, so bib styles can replace it
+    bib_str = re.sub('month = "(.*)"', r'month = \1', bib_str)
+
+    with open(BIB_PATH, 'w', encoding="utf-8") as fp:
+        fp.write(bib_str)
+
+
+if __name__ == '__main__':
+    main()
@@ -5,15 +5,15 @@
 import datetime
 import glob
 import os
-import re
 import sys
-import time
 import PyRSS2Gen as rssgen
 import docutils.frontend
 import docutils.nodes
 import docutils.parsers.rst
 import docutils.utils
 
+from pep_parsing_helpers import pep_creation_dt, first_line_starting_with, parse_authors
+
 RSS_PATH = os.path.join(sys.argv[1], 'peps.rss')
 
 
@@ -53,38 +53,12 @@ def pep_abstract(full_path: str) -> str:
     return abstract
 
 
-def firstline_startingwith(full_path, text):
-    for line in open(full_path, encoding="utf-8"):
-        if line.startswith(text):
-            return line[len(text):].strip()
-    return None
-
-
 # get list of peps with creation time
 # (from "Created:" string in pep .rst or .txt)
 peps = glob.glob('pep-*.txt')
 peps.extend(glob.glob('pep-*.rst'))
 
 
-def pep_creation_dt(full_path):
-    created_str = firstline_startingwith(full_path, 'Created:')
-    # bleh, I was hoping to avoid re but some PEPs editorialize
-    # on the Created line
-    m = re.search(r'''(\d+-\w+-\d{4})''', created_str)
-    if not m:
-        # some older ones have an empty line, that's okay, if it's old
-        # we ipso facto don't care about it.
-        # "return None" would make the most sense but datetime objects
-        # refuse to compare with that. :-|
-        return datetime.datetime(*time.localtime(0)[:6])
-    created_str = m.group(1)
-    try:
-        t = time.strptime(created_str, '%d-%b-%Y')
-    except ValueError:
-        t = time.strptime(created_str, '%d-%B-%Y')
-    return datetime.datetime(*t[:6])
-
-
 peps_with_dt = [(pep_creation_dt(full_path), full_path) for full_path in peps]
 # sort peps by date, newest first
 peps_with_dt.sort(reverse=True)
@@ -96,8 +70,9 @@ def pep_creation_dt(full_path):
         n = int(full_path.split('-')[-1].split('.')[0])
     except ValueError:
         pass
-    title = firstline_startingwith(full_path, 'Title:')
-    author = firstline_startingwith(full_path, 'Author:')
+    title = first_line_starting_with(full_path, 'Title:')
+    authors = first_line_starting_with(full_path, 'Author:')
+    author = parse_authors(authors)[0] # RSS only supports one author
     abstract = pep_abstract(full_path)
     url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n
     item = rssgen.RSSItem(

@@ -0,0 +1,56 @@
+import re
+import datetime
+import time
-import re
-import datetime
-import time
+import datetime
+import re
+import time
-import re
-import datetime
-import time
+import datetime
+import re
+import time
+
+
+def first_line_starting_with(full_path, text):
+    result = None
+    for line in open(full_path, encoding="utf-8"):
+        if result is not None:
+            if not line[0].strip():  # Line begins with whitespace
+                result += line
+            else:
+                return result
+        if line.startswith(text):
+            result = line[len(text):].strip()
+    return None
+
+
+def pep_creation_dt(full_path):
+    created_str = first_line_starting_with(full_path, 'Created:')
+    # bleh, I was hoping to avoid re but some PEPs editorialize
+    # on the Created line
+    m = re.search(r'''(\d+-\w+-\d{4})''', created_str)
+    if not m:
+        # some older ones have an empty line, that's okay, if it's old
+        # we ipso facto don't care about it.
+        # "return None" would make the most sense but datetime objects
+        # refuse to compare with that. :-|
+        return datetime.datetime(*time.localtime(0)[:6])
+    created_str = m.group(1)
+    try:
+        t = time.strptime(created_str, '%d-%b-%Y')
+    except ValueError:
+        t = time.strptime(created_str, '%d-%B-%Y')
+    return datetime.datetime(*t[:6])
+
+
+def pep_number(full_path):
+    n_str = full_path.split('-')[-1].split('.')[0]
+    try:
+        n = int(n_str)
+    except ValueError:
+        raise Exception("Can't parse pep number %s" % n_str)
+
+    return n
+
+
+def parse_authors(authors_str):
+    orig_authors = authors_str.split(',')
+    authors = []
+    for author in orig_authors:
+        authors.append(author.strip())
+
+    return authors
+
+
-    return authors
+    return authors
-    return authors
+    return authors
@@ -4,3 +4,6 @@ docutils >= 0.16
 
 # For RSS
 feedgen >= 0.9.0  # For RSS feed
+
+# For bibliography
+pybtex >= 0.24.0
-
-# For bibliography
-pybtex >= 0.24.0
-
-# For bibliography
-pybtex >= 0.24.0