forked from texora/engineering-blogs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_opml.rb
executable file
·133 lines (116 loc) · 3.3 KB
/
generate_opml.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env ruby
require 'builder'
require 'feedbag'
require 'json'
require 'nokogiri'
OUTPUT_FILENAME = 'engineering_blogs.opml'
TITLE = 'Engineering Blogs'
# grab name/url pairings from README.md
readme = File.open('README.md', 'r')
contents = readme.read
matches = contents.scan(/\* (.*) (http.*)/)
# skip over blogs that aren't found
unavailable = []
fast_forwards = [
'Baidu Research',
'Booking.com',
'Fynd',
'Graphcool',
'LinkedIn',
'Medallia',
'OmniTI',
'Paperless Post',
'Pluralsight',
'Prolific Interactive',
'Quora',
'Robert Elder Software',
'Simple',
'SlideShare',
'SourceClear',
'Viget',
'Zalando',
'Zapier',
'Zynga',
'Dave Beazley',
'Edan Kwan',
'Grzegorz Gajos',
'Joe Armstrong',
'Kai Hendry',
'LiveOverflow'
]
Struct.new('Blog', :name, :web_url, :rss_url)
blogs = []
# for each blog URL, check if rss URL exists
matches.each do |match|
name = match[0]
web_url = match[1]
if fast_forwards.include?(name)
puts "#{name}: TEMP IGNORE"
unavailable.push(Struct::Blog.new(name, web_url, nil))
next
end
# if rss_url already in existing opml file, use that; otherwise, do a lookup
rss_url = nil
if File.exist?(OUTPUT_FILENAME)
xml = Nokogiri::XML(File.open(OUTPUT_FILENAME))
existing_blog = xml.xpath("//outline[@htmlUrl='#{web_url}']").first
if existing_blog
rss_url = existing_blog.attr('xmlUrl')
puts "#{name}: ALREADY HAVE"
end
end
if rss_url.nil?
puts "#{name}: GETTING"
rss_check_url = "https://cloud.feedly.com/v3/search/feeds/?query=#{web_url}"
next if !rss_check_url
uri = URI.parse(rss_check_url)
response = JSON.parse(Net::HTTP.get(uri))
rss_url = response['responseData']['url'] if response['responseData'] && response['responseData'].has_key?('url')
# use Feedbag as a backup to Feedly Api
if rss_url.nil?
rss_url = Feedbag.find(web_url).first
if rss_url.nil?
suggested_paths = ['/rss', '/feed', '/feeds', '/atom.xml', '/feed.xml', '/rss.xml', '.atom', '/.rss']
suggested_paths.each do |suggested_path|
rss_url = Feedbag.find("#{web_url.chomp('/')}#{suggested_path}").first
break if rss_url
end
end
end
end
if rss_url && rss_url.length > 0
blogs.push(Struct::Blog.new(name, web_url, rss_url))
else
unavailable.push(Struct::Blog.new(name, web_url, rss_url))
end
end
blogs.sort_by { |b| b.name.capitalize }
unavailable.sort_by { |b| b.name.capitalize }
# create and write to opml file
xml = Builder::XmlMarkup.new(indent: 2)
xml.instruct! :xml, version: '1.0', encoding: 'UTF-8'
xml.tag!('opml', version: '1.0') do
# head
xml.tag!('head') do
xml.title TITLE
end
# body
xml.tag!('body') do
xml.tag!('outline', text: TITLE, title: TITLE) do
blogs.each do |blog|
xml.tag!('outline', type: 'rss', text: blog.name, title: blog.name,
xmlUrl: blog.rss_url, htmlUrl: blog.web_url)
end
end
end
end
output = File.new(OUTPUT_FILENAME, 'wb')
output.write(xml.target!)
output.close
puts "DONE: #{blogs.count} written to #{OUTPUT_FILENAME}"
puts "\nUnable to find an RSS feed for the following blogs:"
puts "==================================================="
unavailable.each do |b|
puts "#{b.name} | #{b.web_url}"
end
puts "==================================================="