Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scripts to import videos #60

Merged
merged 5 commits into from
Jul 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,8 @@ gem "meilisearch-rails"
gem "ahoy_matey", "~> 4.2"
gem "vite_rails"
gem "meta-tags", "~> 2.18"

gem "groupdate", "~> 6.2"

gem "appsignal", "~> 3.4"

gem "chartkick", "~> 5.0"

gem "rails_autolink", "~> 1.1"
Expand All @@ -126,3 +123,4 @@ gem "ruby-openai"
gem "json-repair", "~> 0.2.0"

gem "redcarpet", "~> 3.6"
gem "country_select", "~> 8.0"
34 changes: 7 additions & 27 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,11 @@ GEM
chartkick (5.0.7)
concurrent-ruby (1.3.3)
connection_pool (2.4.1)
countries (6.0.1)
countries (5.7.2)
unaccent (~> 0.3)
crack (1.0.0)
bigdecimal
country_select (8.0.3)
countries (~> 5.0)
crack (0.4.5)
rexml
crass (1.0.6)
date (3.3.4)
Expand Down Expand Up @@ -177,15 +178,9 @@ GEM
raabro (~> 1.4)
globalid (1.2.1)
activesupport (>= 6.1)
google-protobuf (4.27.2-aarch64-linux)
bigdecimal
rake (>= 13)
google-protobuf (4.27.2-arm64-darwin)
bigdecimal
rake (>= 13)
google-protobuf (4.27.2-x86_64-darwin)
bigdecimal
rake (>= 13)
google-protobuf (4.27.2-x86_64-linux)
bigdecimal
rake (>= 13)
Expand Down Expand Up @@ -215,15 +210,9 @@ GEM
json-repair (0.2.0)
language_server-protocol (3.17.0.3)
lint_roller (1.1.0)
litestream (0.10.4)
logfmt (>= 0.0.10)
sqlite3
litestream (0.10.4-arm64-darwin)
logfmt (>= 0.0.10)
sqlite3
litestream (0.10.4-x86_64-darwin)
logfmt (>= 0.0.10)
sqlite3
litestream (0.10.4-x86_64-linux)
logfmt (>= 0.0.10)
sqlite3
Expand Down Expand Up @@ -269,12 +258,8 @@ GEM
net-smtp (0.5.0)
net-protocol
nio4r (2.7.3)
nokogiri (1.16.6-aarch64-linux)
racc (~> 1.4)
nokogiri (1.16.6-arm64-darwin)
racc (~> 1.4)
nokogiri (1.16.6-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.16.6-x86_64-linux)
racc (~> 1.4)
pagy (6.5.0)
Expand Down Expand Up @@ -406,9 +391,7 @@ GEM
fugit (~> 1.11.0)
railties (>= 7.1)
sorbet-runtime (0.5.11475)
sqlite3 (2.0.2-aarch64-linux-gnu)
sqlite3 (2.0.2-arm64-darwin)
sqlite3 (2.0.2-x86_64-darwin)
sqlite3 (2.0.2-x86_64-linux-gnu)
standard (1.39.1)
language_server-protocol (~> 3.17.0.2)
Expand Down Expand Up @@ -437,8 +420,8 @@ GEM
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unaccent (0.4.0)
unicode-display_width (2.5.0)
uri (0.13.0)
unicode-display_width (2.4.2)
uri (0.12.2)
useragent (0.16.10)
vcr (6.2.0)
view_component (3.12.1)
Expand Down Expand Up @@ -471,11 +454,7 @@ GEM
zeitwerk (2.6.16)

PLATFORMS
aarch64-linux
arm64-darwin-22
arm64-darwin-23
x86_64-darwin-19
x86_64-darwin-23
x86_64-linux

DEPENDENCIES
Expand All @@ -491,6 +470,7 @@ DEPENDENCIES
capybara
chartkick (~> 5.0)
countries
country_select (~> 8.0)
debug
dockerfile-rails (>= 1.2)
dotenv-rails
Expand Down
2 changes: 2 additions & 0 deletions app/clients/youtube/playlists.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module Youtube
class Playlists < Youtube::Client
DEFAULT_METADATA_PARSER = "Youtube::VideoMetadata"
def all(channel_id:, title_matcher: nil)
items = all_items("/playlists", query: {channelId: channel_id, part: "snippet,contentDetails"}).map do |metadata|
OpenStruct.new({
Expand All @@ -10,6 +11,7 @@ def all(channel_id:, title_matcher: nil)
channel_id: metadata.snippet.channelId,
year: metadata.snippet.title.match(/\d{4}/).to_s.presence || DateTime.parse(metadata.snippet.publishedAt).year,
videos_count: metadata.contentDetails.itemCount,
metadata_parser: DEFAULT_METADATA_PARSER,
slug: metadata.snippet.title.parameterize
})
end
Expand Down
2 changes: 2 additions & 0 deletions app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ class Event < ApplicationRecord

# validations
validates :name, presence: true
VALID_COUNTRY_CODES = ISO3166::Country.codes
validates :country_code, inclusion: {in: VALID_COUNTRY_CODES}, allow_nil: true
end
92 changes: 92 additions & 0 deletions app/models/youtube/video_metadata.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# require "active_support/core_ext/hash/keys"

# This class is used to extract the metadata from a youtube video
# it will try to:
# - extract the speakers from the title
# - remove the event_name from the title to make less redondant
# - remove leading separators from the title
module Youtube
class VideoMetadata
SPEAKERS_SECTION_SEPARATOR = " by "
SEPARATOR_IN_BETWEEN_SPEAKERS = / & |, | and /

def initialize(metadata:, event_name:, options: {})
@metadata = metadata
@event_name = event_name
end

def cleaned
OpenStruct.new(
{
title: title,
raw_title: raw_title,
speakers: speakers,
event_name: @event_name,
published_at: @metadata.published_at,
description: description,
video_id: @metadata.video_id
}
)
end

def keynote?
title_without_event_name.match(/keynote/i)
end

def lighting?
title_without_event_name.match(/lightning talks/i)
end

private

def extract_info_from_title
title_parts = title_without_event_name.split(SPEAKERS_SECTION_SEPARATOR)
speakers = title_parts.last.split(SEPARATOR_IN_BETWEEN_SPEAKERS).map(&:strip)
title = title_parts[0..-2].join(SPEAKERS_SECTION_SEPARATOR).gsub(/^\s*-/, "").strip

{
title: keynote? ? remove_leading_and_trailing_separators_from(title_without_event_name) : remove_leading_and_trailing_separators_from(title),
speakers: speakers
}
end

def speakers
return [] if lighting?

title_parts = title_without_event_name.split(SPEAKERS_SECTION_SEPARATOR)
title_parts.last.split(SEPARATOR_IN_BETWEEN_SPEAKERS).map(&:strip)
end

def raw_title
@metadata.title
end

def title_without_event_name
# RubyConf AU 2013: From Stubbies to Longnecks by Geoffrey Giesemann
# will return "From Stubbies to Longnecks by Geoffrey Giesemann"
remove_leading_and_trailing_separators_from(raw_title.gsub(@event_name, "").gsub(/\s+/, " "))
end

## remove : - and other separators from the title
def remove_leading_and_trailing_separators_from(title)
title.gsub(/^[-:]?/, "").strip.then do |title|
title.gsub(/[-:,]$/, "").strip
end
end

def title
if keynote? || lighting?
# when it is a keynote or lighting, usually we want to keep the full title without the event name
remove_leading_and_trailing_separators_from(title_without_event_name)
else
title_parts = title_without_event_name.split(SPEAKERS_SECTION_SEPARATOR)
title = title_parts[0..-2].join(SPEAKERS_SECTION_SEPARATOR).gsub(/^\s*-/, "").strip
remove_leading_and_trailing_separators_from(title)
end
end

def description
@metadata.description
end
end
end
84 changes: 84 additions & 0 deletions app/models/youtube/video_metadata_rails_world.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# require "active_support/core_ext/hash/keys"

# This class is used to extract the metadata from a youtube video
# it will try to:
# - extract the speakers from the title
# - remove the event_name from the title to make less redondant
# - remove leading separators from the title
module Youtube
class VideoMetadataRailsWorld
SPEAKERS_SECTION_SEPARATOR = " - "
SEPARATOR_IN_BETWEEN_SPEAKERS = / & |, | and /

def initialize(metadata:, event_name:, options: {})
@metadata = metadata
@event_name = event_name
end

def cleaned
OpenStruct.new(
{
title: title,
raw_title: raw_title,
speakers: speakers,
event_name: @event_name,
published_at: @metadata.published_at,
description: description,
video_id: @metadata.video_id
}
)
end

def keynote?
title_without_event_name.match(/keynote/i)
end

private

def extract_info_from_title
title_parts = title_without_event_name.split(SPEAKERS_SECTION_SEPARATOR)
speakers = title_parts.last.split(SEPARATOR_IN_BETWEEN_SPEAKERS).map(&:strip)
title = title_parts[0..-2].join(SPEAKERS_SECTION_SEPARATOR).gsub(/^\s*-/, "").strip

{
title: keynote? ? remove_leading_and_trailing_separators_from(title_without_event_name) : remove_leading_and_trailing_separators_from(title),
speakers: speakers
}
end

def speakers
raw_title_parts.first.split(SEPARATOR_IN_BETWEEN_SPEAKERS).map(&:strip)
end

def raw_title
@metadata.title
end

def title_without_event_name
# RubyConf AU 2013: From Stubbies to Longnecks by Geoffrey Giesemann
# will return "From Stubbies to Longnecks by Geoffrey Giesemann"
remove_leading_and_trailing_separators_from(raw_title.gsub(@event_name, "").gsub(/\s+/, " "))
end

## remove : - and other separators from the title
def remove_leading_and_trailing_separators_from(title)
return title if title.blank?

title.gsub(/^[-:]?/, "").strip.then do |title|
title.gsub(/[-:,]$/, "").strip
end
end

def title
remove_leading_and_trailing_separators_from(raw_title_parts[1])
end

def description
@metadata.description
end

def raw_title_parts
title_without_event_name.split(SPEAKERS_SECTION_SEPARATOR)
end
end
end
1 change: 1 addition & 0 deletions config/credentials.yml.enc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
uAKB/HPZ9hm7lyAIpWgUe+24TG/N8bi2smtvEa7aD0nZNIU1sb0ctVEYNcSQbKJjHWSED68aF9/O7iYBt1QYU93in3J14rDvj0bHJ8GCWNEQFI6QekRPcOVrX4qehGn4Y29ikhsrc1/th6V0EHrkP4q3d+AtK0zazPLpLQgMFSMX5dYjck2vT8HfK3nOvX77+RUPXhyyO9roIBDBpSf8kEayzUi0q/h/k8cGsqbM3aL2InfXNM8gaQgJ7atEAzJsKoyt5yGoFxjWTDoOFEqnIIhXu1uRmW+S5aKUC/h0Kil/nEgsvufZGuPiDQgHvXZmO/3W3MmaCLaO2LHcp4qMySaH5SDBOXdzAmGuEl0SMhdyvtuZBCw60WmWRhh9GnhwBwJs/FdlaGnAGLZyB+lQhYYNTOww--9Tq1/zY6N+kujjV9--iOafU6Imt5fluW+yk+MPWQ==
Loading