- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 1.7k
Infra: Add pep2bib.py for generating BibTeX entries #2085
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -2,6 +2,7 @@ pep-0000.txt | |
| pep-0000.rst | ||
| pep-????.html | ||
| peps.rss | ||
| peps.bib | ||
| __pycache__ | ||
| *.pyc | ||
| *.pyo | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| #!/usr/bin/env python3 | ||
|  | ||
| # usage: python3 pep2bib.py . | ||
|  | ||
| import glob | ||
| import os | ||
| import re | ||
| import sys | ||
| from pybtex.database import Entry, BibliographyData | ||
|  | ||
| from pep_parsing_helpers import pep_number, pep_creation_dt, first_line_starting_with, parse_authors | ||
|  | ||
| BIB_PATH = os.path.join(sys.argv[1], 'peps.bib') | ||
|  | ||
|  | ||
| name_first_regex = re.compile(r'(.*)<.*>') | ||
| mail_first_regex = re.compile(r'.*\((.*)\)') | ||
| name_only_regex = re.compile(r'(.*)') | ||
|  | ||
|  | ||
| months = { | ||
| 1: 'jan', | ||
| 2: 'feb', | ||
| 3: 'mar', | ||
| 4: 'apr', | ||
| 5: 'may', | ||
| 6: 'jun', | ||
| 7: 'jul', | ||
| 8: 'aug', | ||
| 9: 'sep', | ||
| 10: 'oct', | ||
| 11: 'nov', | ||
| 12: 'dec', | ||
| } | ||
|  | ||
|  | ||
| def authors_to_bib(authors): | ||
| cleaned = [] | ||
| for author in authors: | ||
| match = name_first_regex.match(author) | ||
| if match is None: | ||
| match = mail_first_regex.match(author) | ||
| if match is None: | ||
| match = name_only_regex.match(author) | ||
| cleaned.append(match.group(1).strip()) | ||
| return " and ".join(cleaned) | ||
|  | ||
|  | ||
| def main(): | ||
| # get list of peps with creation time | ||
| # (from "Created:" string in pep .rst or .txt) | ||
| peps = glob.glob('pep-*.txt') | ||
| peps.extend(glob.glob('pep-*.rst')) | ||
|  | ||
| peps_with_dt = [(pep_number(full_path), pep_creation_dt(full_path), full_path) for full_path in peps] | ||
| # sort peps by number | ||
| peps_with_dt.sort() | ||
|  | ||
| items = {} | ||
| for n, dt, full_path in peps_with_dt: | ||
| title = first_line_starting_with(full_path, 'Title:') | ||
| author_string = first_line_starting_with(full_path, 'Author:') | ||
| authors = parse_authors(author_string) | ||
| authors = authors_to_bib(authors) | ||
| url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might want to factor this into a top-level constant, so its easier to update if and when we move to  | ||
| item = Entry('techreport', [ | ||
| ('author', authors), | ||
| ('title', 'PEP %d: %s' % (n, title)), | ||
| ('institution', "Python Software Foundation"), | ||
| ('year', str(dt.year)), | ||
| ('month', months[dt.month]), | ||
| ('type', 'PEP'), | ||
| ('number', str(n)), | ||
| ('url', url) | ||
| ]) | ||
| items['pep%d' % n] = item | ||
|  | ||
| bib = BibliographyData(items) | ||
| bib_str = bib.to_string('bibtex') | ||
|  | ||
| # pybtex always quotes strings, but we want month strings unquoted, so bib styles can replace it | ||
| bib_str = re.sub('month = "(.*)"', r'month = \1', bib_str) | ||
|  | ||
| with open(BIB_PATH, 'w', encoding="utf-8") as fp: | ||
| fp.write(bib_str) | ||
|  | ||
|  | ||
| if __name__ == '__main__': | ||
| main() | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,56 @@ | ||||||||||||||
| import re | ||||||||||||||
| import datetime | ||||||||||||||
| import time | ||||||||||||||
| 
      Comment on lines
    
      +1
     to 
      +3
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||||||||||
|  | ||||||||||||||
|  | ||||||||||||||
| def first_line_starting_with(full_path, text): | ||||||||||||||
| result = None | ||||||||||||||
| for line in open(full_path, encoding="utf-8"): | ||||||||||||||
| if result is not None: | ||||||||||||||
| if not line[0].strip(): # Line begins with whitespace | ||||||||||||||
| result += line | ||||||||||||||
| else: | ||||||||||||||
| return result | ||||||||||||||
| if line.startswith(text): | ||||||||||||||
| result = line[len(text):].strip() | ||||||||||||||
| return None | ||||||||||||||
|  | ||||||||||||||
|  | ||||||||||||||
| def pep_creation_dt(full_path): | ||||||||||||||
| created_str = first_line_starting_with(full_path, 'Created:') | ||||||||||||||
| # bleh, I was hoping to avoid re but some PEPs editorialize | ||||||||||||||
| # on the Created line | ||||||||||||||
| m = re.search(r'''(\d+-\w+-\d{4})''', created_str) | ||||||||||||||
| if not m: | ||||||||||||||
| # some older ones have an empty line, that's okay, if it's old | ||||||||||||||
| # we ipso facto don't care about it. | ||||||||||||||
| # "return None" would make the most sense but datetime objects | ||||||||||||||
| # refuse to compare with that. :-| | ||||||||||||||
| return datetime.datetime(*time.localtime(0)[:6]) | ||||||||||||||
| created_str = m.group(1) | ||||||||||||||
| try: | ||||||||||||||
| t = time.strptime(created_str, '%d-%b-%Y') | ||||||||||||||
| except ValueError: | ||||||||||||||
| t = time.strptime(created_str, '%d-%B-%Y') | ||||||||||||||
| return datetime.datetime(*t[:6]) | ||||||||||||||
|  | ||||||||||||||
|  | ||||||||||||||
| def pep_number(full_path): | ||||||||||||||
| n_str = full_path.split('-')[-1].split('.')[0] | ||||||||||||||
| try: | ||||||||||||||
| n = int(n_str) | ||||||||||||||
| except ValueError: | ||||||||||||||
| raise Exception("Can't parse pep number %s" % n_str) | ||||||||||||||
|  | ||||||||||||||
| return n | ||||||||||||||
|  | ||||||||||||||
|  | ||||||||||||||
| def parse_authors(authors_str): | ||||||||||||||
| orig_authors = authors_str.split(',') | ||||||||||||||
| authors = [] | ||||||||||||||
| for author in orig_authors: | ||||||||||||||
| authors.append(author.strip()) | ||||||||||||||
|  | ||||||||||||||
| return authors | ||||||||||||||
|  | ||||||||||||||
|  | ||||||||||||||
| 
      Comment on lines
    
      +54
     to 
      +56
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|  | @@ -4,3 +4,6 @@ docutils >= 0.16 | |||||||
|  | ||||||||
| # For RSS | ||||||||
| feedgen >= 0.9.0 # For RSS feed | ||||||||
|  | ||||||||
| # For bibliography | ||||||||
| pybtex >= 0.24.0 | ||||||||
| 
      Comment on lines
    
      +7
     to 
      +9
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 I would strongly advise against adding this to the requirements file, as this means all CIs and users that set up an env for our repo in a normal fashion will have to install it, when it isn't (yet) actually used anywhere other than a niche manual command. I suggest handling it just like we did the  | ||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are top-level constants, so I'd think it would make sense to make them
UPPER_SNAKE_CASElike the others.