Skip to content

Commit

Permalink
pluto notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
a-mhamdi committed Dec 20, 2024
1 parent d1e0bf0 commit 45b4adb
Show file tree
Hide file tree
Showing 8 changed files with 4,317 additions and 48 deletions.
Binary file modified Codes/Julia/Part-3/cnn/cnn.bson
Binary file not shown.
2,283 changes: 2,271 additions & 12 deletions Codes/Julia/Part-3/cnn/cnn.jl

Large diffs are not rendered by default.

64 changes: 62 additions & 2 deletions Codes/Julia/Part-3/gan/gan.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,29 @@
### A Pluto.jl notebook ###
# v0.20.3

using Markdown
using InteractiveUtils

# ╔═╡ e085fb51-8865-4b12-80e9-e5a0ebcdb5da
####################################
#= Generative Adverserial Network =#
####################################
# `versioninfo()` -> 1.11.1

using Flux # v0.14.25

# ╔═╡ 01ca737b-829c-4697-aa5d-6b907cd4c3fd
using Images: Gray

# ╔═╡ 189414a9-03b9-42d7-93ca-15ed58a20ce8
using ProgressMeter

## Generator: noise vector -> synthetic sample.

# ╔═╡ 281920a0-6d99-4896-a8dd-3de46f1182fb
using Plots

# ╔═╡ eb7aa3c4-4340-4ed4-918e-2974002f613d
function generator(; latent_dim=16, img_shape=(28,28,1,1))
return Chain(
Dense(latent_dim, 128, relu),
Expand All @@ -18,6 +34,8 @@ function generator(; latent_dim=16, img_shape=(28,28,1,1))
end

## Discriminator : sample -> score indicating the probability that the sample is real.

# ╔═╡ 69b4af29-b689-4b20-a773-78742bb68281
function discriminator(; img_shape=(28,28,1,1))
return Chain(
x -> reshape(x, :, size(x, 4)),
Expand All @@ -28,9 +46,13 @@ function discriminator(; img_shape=(28,28,1,1))
end

## Loss functions

# ╔═╡ 11471f2f-df52-4764-b511-9fe1a8848432
bce_loss(y_true, y_pred) = Flux.logitbinarycrossentropy(y_pred, y_true)

## Training function

# ╔═╡ e866f93b-221d-4444-9fff-28e7c20028d6
function train_gan(gen, disc, gen_opt, disc_opt; n_epochs=16, latent_dim=16)
@showprogress for epoch in 1:n_epochs

Expand All @@ -56,28 +78,66 @@ function train_gan(gen, disc, gen_opt, disc_opt; n_epochs=16, latent_dim=16)
end

## Setup the GAN

# ╔═╡ 6c3f364c-0d0d-4a34-832e-f3653f15ea54
gen = generator()

# ╔═╡ 3b43ad53-e53e-4849-82db-0441ed8daa3b
disc = discriminator()

# ╔═╡ 4e20b5ff-8b80-4850-9d59-4058a3c16a24
gen_opt = Adam(0.001)

# ╔═╡ 2fc91e4e-b736-4b24-856f-e997cea5e497
disc_opt = Adam(0.0002)

## Train the GAN

# ╔═╡ 0362230d-17e3-424e-b923-1e3b1c5359b5
train_gan(gen, disc, gen_opt, disc_opt)

## Generate and plot some images

# ╔═╡ b0bfaa5f-4e77-4b52-9970-dc0adb47bddb
latent_dim = 16

# ╔═╡ c7707565-b11c-45d1-a670-43b546d447d4
noise = randn(Float32, latent_dim, 16)
generated_images = [ gen(noise[:, i]) for i in 1:16 ]

using Plots
# ╔═╡ c685204f-dd86-48ad-96d6-3d94ac30321a
generated_images = [ gen(noise[:, i]) for i in 1:16 ]

# ╔═╡ e50e02cb-8601-4b8b-adad-e9c8f39e7ce5
plot_images = [ plot(Gray.(generated_images[i])[:,:,1,1]) for i in 1:16 ]

# ╔═╡ 62f695e1-6cba-49df-b722-99ccfa5f8c73
titles = reshape([string(i) for i in 1:16], 1, :);

# ╔═╡ 036ebfaf-e14e-4558-b795-d04923922489
plot(
plot_images...,
layout = (4, 4),
title = titles, titleloc=:right, titlefont=font(8),
size = (800, 800)
)

# ╔═╡ Cell order:
# ╠═e085fb51-8865-4b12-80e9-e5a0ebcdb5da
# ╠═01ca737b-829c-4697-aa5d-6b907cd4c3fd
# ╠═189414a9-03b9-42d7-93ca-15ed58a20ce8
# ╠═eb7aa3c4-4340-4ed4-918e-2974002f613d
# ╠═69b4af29-b689-4b20-a773-78742bb68281
# ╠═11471f2f-df52-4764-b511-9fe1a8848432
# ╠═e866f93b-221d-4444-9fff-28e7c20028d6
# ╠═6c3f364c-0d0d-4a34-832e-f3653f15ea54
# ╠═3b43ad53-e53e-4849-82db-0441ed8daa3b
# ╠═4e20b5ff-8b80-4850-9d59-4058a3c16a24
# ╠═2fc91e4e-b736-4b24-856f-e997cea5e497
# ╠═0362230d-17e3-424e-b923-1e3b1c5359b5
# ╠═b0bfaa5f-4e77-4b52-9970-dc0adb47bddb
# ╠═c7707565-b11c-45d1-a670-43b546d447d4
# ╠═c685204f-dd86-48ad-96d6-3d94ac30321a
# ╠═281920a0-6d99-4896-a8dd-3de46f1182fb
# ╠═e50e02cb-8601-4b8b-adad-e9c8f39e7ce5
# ╠═62f695e1-6cba-49df-b722-99ccfa5f8c73
# ╠═036ebfaf-e14e-4558-b795-d04923922489
182 changes: 175 additions & 7 deletions Codes/Julia/Part-3/nlp/nlp.jl
Original file line number Diff line number Diff line change
@@ -1,36 +1,84 @@
### A Pluto.jl notebook ###
# v0.20.3

using Markdown
using InteractiveUtils

# ╔═╡ d8292e75-d298-4e8e-b7bb-acc848e01b4a
#################################
#= Natural Language Processing =#
#################################
# `versioninfo()` -> 1.11.1

using Markdown

# ╔═╡ bf1da4bc-48f4-4042-a7ab-e042bbd32d41
using TextAnalysis

# ╔═╡ c1fb388b-9c59-429b-b036-9bc4395ab5af
using Embeddings

# ╔═╡ fdd4f0b1-1cae-467a-9fbf-6fbbe1c35c1c
using LinearAlgebra

# ╔═╡ f336e7e2-99ae-4b91-8f93-0607eede9773
txt = "The quick brown fox is jumping over the lazy dog" # Pangram [modif.]

# ╔═╡ 40388d9d-16db-4e3c-9384-bc4638ba3663
md"Create a `Corpus` using `txt`"

# ╔═╡ 4febd42e-b2c5-46e2-af27-3e81f825fdab
crps = Corpus([StringDocument(txt)])

# ╔═╡ f38b2eee-ee30-4a4f-8500-2c076000ff95
lexicon(crps)

# ╔═╡ beb39317-c83c-4c0e-ac1d-ab64cde4307d
update_lexicon!(crps)

# ╔═╡ 2c3d4f5f-a0fd-42e0-ba1d-ba390c6da171
lexicon(crps)

# ╔═╡ 59170576-05f3-4edc-b26b-8b48152a4477
lexical_frequency(crps, "fox")

# ╔═╡ 2388744f-24b6-4ac1-8673-0feb223bfe37
md"Create a `StringDocument` using `txt`"

# ╔═╡ 811cdd9b-7cc8-444b-a621-479f785af38c
sd = StringDocument(txt)

# ╔═╡ 75322fd2-a075-4c89-b219-95b86753457a
md"Get a smaller set of words `text(sd)`"

# ╔═╡ ce5adafd-e6d3-4571-a289-c31940e3ebfc
prepare!(sd, strip_articles | strip_numbers | strip_punctuation | strip_case | strip_whitespace)

# ╔═╡ 356963d7-9999-43bd-a21b-49b3056a172e
stem!(sd)

# ╔═╡ 560a06c5-9078-49ad-9da6-9838656a0948
md"Get the tokens of `sd`"

# ╔═╡ 5b336441-b1f6-4ce1-a979-64e363bbc5fc
the_tokens = tokens(sd)

# ╔═╡ 25388144-78af-4f7d-9f24-7c190141e61f
md"Get the stemmed tokens of `sd`"

# ╔═╡ 6c5b1442-6297-44cb-8f97-200572f5dfb9
stemmer = Stemmer("english")

# ╔═╡ 1cf0c275-c1cc-4453-a5b8-0cfcd541faf8
stemmed_tokens = stem(stemmer, the_tokens)

# ╔═╡ 8295d84f-8aa6-4ad9-be45-fcc9aa3ae087
println("Original tokens: ", the_tokens)

# ╔═╡ 7a155470-e751-4137-9c87-b047bc2d44c6
println("Stemmed tokens: ", stemmed_tokens)

# ╔═╡ e1566e3f-4c08-4485-b345-9cefda319e45
md"**Part-of-speech tags**"

#=
Expand All @@ -56,57 +104,177 @@ pos = PoSTagger()
pos(crps)
=#

# ╔═╡ 38639182-7aaa-44e7-b11f-60aebe6a54d8
md"**Word embeddings**"
using Embeddings

# ╔═╡ 88d15a75-7d44-405c-a8e4-19c68708d0a5
embtab = load_embeddings(GloVe{:en}, max_vocab_size=5)

# ╔═╡ eb53e251-d8e4-4b66-86fd-279fbbabe7e4
embtab.vocab

# ╔═╡ d1f06dd1-2078-4b4e-a33a-40191b96a1bb
embtab.embeddings

# ╔═╡ eadd26c5-d4ea-4408-ab78-5bc1cce4cd12
glove = load_embeddings(GloVe{:en}, 3, max_vocab_size=10_000)

# ╔═╡ d77c50ba-84af-419e-b127-a2deb9a3859c
const word_to_index = Dict(word => ii for (ii,word) in enumerate(glove.vocab))

# ╔═╡ 2be17f28-55e6-43c2-bece-77702ad72923
function get_word_vector(word)
idx = word_to_index[word]
return glove.embeddings[:, idx]
end

using LinearAlgebra
# ╔═╡ 6e1668bb-43af-49be-9d9d-623a101dba03
function cosine_similarity(v1::Vector{Float32}, v2::Vector{Float32})
return *(v1', v2) / *(norm(v1), norm(v2))
end

# ╔═╡ 937e7d17-34cc-4e59-b2bf-98cf9e153bb5
md"_e.g. - \"king\" - \"man\" + \"woman\"\"queen\"_"

# ╔═╡ 37ee5b81-3444-49ab-8732-530004b00035
king = get_word_vector("king")

# ╔═╡ 1f128c17-8da6-4752-939b-25e75e8a8725
queen = get_word_vector("queen")

# ╔═╡ 4cc8fb94-43a6-44cd-8754-bb1ef0190573
man = get_word_vector("man")

# ╔═╡ 9c71279a-6ebd-4c86-82e2-c0af535f33b9
woman = get_word_vector("woman")

# ╔═╡ accb5901-6b0b-41f6-8d7c-3dd5a33d571a
cosine_similarity(king - man + woman, queen)

# ╔═╡ 07d3ff60-b7e9-4d99-9a73-601e421e1b64
md"_e.g. - \"Madrid\" - \"Spain\" + \"France\"\"Paris\"_"

# ╔═╡ 6bb3a773-c5c4-4d06-a28e-03737ce02797
Madrid = get_word_vector("madrid")

# ╔═╡ 3b8f4450-4248-4b60-a61c-5411c311d5d6
Spain = get_word_vector("spain")

# ╔═╡ cb9b1c25-9407-4ada-98eb-2b2462479729
France = get_word_vector("france")

# ╔═╡ 17e527a4-b80f-43bd-94a4-3e0411bcc642
Paris = get_word_vector("paris")

# ╔═╡ dca54445-4704-4726-99ce-3f60431459ba
cosine_similarity(Madrid - Spain + France, Paris)

# ╔═╡ 74d57c25-34f0-4f60-9ba9-6d6b507b61f2
md"**Text classification**"

# ╔═╡ 65f6b972-a4fe-4b62-8dc8-80f564de8fbf
md"https://github.com/JuliaText/TextAnalysis.jl/blob/master/docs/src/classify.md"
m = NaiveBayesClassifier([:legal, :financial])

# ╔═╡ 1653799c-175e-4024-b9a8-ca08446cdc79
md"**Semantic analysis**"

# ╔═╡ 3af49352-fd14-4181-8f2e-5f5aeaf6aff3
fit!(m, "this is financial doc", :financial)

# ╔═╡ 8c93b998-3e9a-4c6f-8ae2-14d1292942ce
fit!(m, "this is legal doc", :legal)
predict(m, "this should be predicted as a legal document")

md"**Semantic analysis**"
m = DocumentTermMatrix(crps)
# ╔═╡ 33272333-27b7-4ed1-918c-fac97e0e18fd
predict(m, "this should be predicted as a legal document")

# ╔═╡ 3a6c37d3-7ea9-4d5b-b486-e3c48be5e076
md"*Latent Semantic Analysis*"

# ╔═╡ 2ded08e4-b048-41e7-a02c-8733e02ef928
lsa(m)

# ╔═╡ c011919b-0378-4116-a085-21ee18231104
md"*Latent Dirichlet Allocation*"

# ╔═╡ 4202f7f4-499b-44df-8d79-4779609c93fe
k = 2 # number of topics

# ╔═╡ cddae5b7-8f84-4407-9a09-9f7d19aac071
iterations = 1000 # number of Gibbs sampling iterations

# ╔═╡ 46c868e8-23f8-4c22-9742-2126c5b7e15e
α = 0.1 # hyper parameter

# ╔═╡ e4ea104e-0bd9-4d54-a4f3-800ad6a3dde9
β = 0.1 # hyper parameter
ϕ, θ = lda(m, k, iterations, α, β) #

# ╔═╡ 92520517-1ba9-4436-9472-91699cb2b56c
ϕ, θ = lda(m, k, iterations, α, β) #

# ╔═╡ 11fe766c-bb25-478a-bd94-e786f98d21aa
m = NaiveBayesClassifier([:legal, :financial])

# ╔═╡ 9120c455-745e-498a-acc7-827da3f8c9ae
m = DocumentTermMatrix(crps)

# ╔═╡ Cell order:
# ╠═d8292e75-d298-4e8e-b7bb-acc848e01b4a
# ╠═bf1da4bc-48f4-4042-a7ab-e042bbd32d41
# ╠═f336e7e2-99ae-4b91-8f93-0607eede9773
# ╠═40388d9d-16db-4e3c-9384-bc4638ba3663
# ╠═4febd42e-b2c5-46e2-af27-3e81f825fdab
# ╠═f38b2eee-ee30-4a4f-8500-2c076000ff95
# ╠═beb39317-c83c-4c0e-ac1d-ab64cde4307d
# ╠═2c3d4f5f-a0fd-42e0-ba1d-ba390c6da171
# ╠═59170576-05f3-4edc-b26b-8b48152a4477
# ╠═2388744f-24b6-4ac1-8673-0feb223bfe37
# ╠═811cdd9b-7cc8-444b-a621-479f785af38c
# ╠═75322fd2-a075-4c89-b219-95b86753457a
# ╠═ce5adafd-e6d3-4571-a289-c31940e3ebfc
# ╠═356963d7-9999-43bd-a21b-49b3056a172e
# ╠═560a06c5-9078-49ad-9da6-9838656a0948
# ╠═5b336441-b1f6-4ce1-a979-64e363bbc5fc
# ╠═25388144-78af-4f7d-9f24-7c190141e61f
# ╠═6c5b1442-6297-44cb-8f97-200572f5dfb9
# ╠═1cf0c275-c1cc-4453-a5b8-0cfcd541faf8
# ╠═8295d84f-8aa6-4ad9-be45-fcc9aa3ae087
# ╠═7a155470-e751-4137-9c87-b047bc2d44c6
# ╠═e1566e3f-4c08-4485-b345-9cefda319e45
# ╠═38639182-7aaa-44e7-b11f-60aebe6a54d8
# ╠═c1fb388b-9c59-429b-b036-9bc4395ab5af
# ╠═88d15a75-7d44-405c-a8e4-19c68708d0a5
# ╠═eb53e251-d8e4-4b66-86fd-279fbbabe7e4
# ╠═d1f06dd1-2078-4b4e-a33a-40191b96a1bb
# ╠═eadd26c5-d4ea-4408-ab78-5bc1cce4cd12
# ╠═d77c50ba-84af-419e-b127-a2deb9a3859c
# ╠═2be17f28-55e6-43c2-bece-77702ad72923
# ╠═fdd4f0b1-1cae-467a-9fbf-6fbbe1c35c1c
# ╠═6e1668bb-43af-49be-9d9d-623a101dba03
# ╠═937e7d17-34cc-4e59-b2bf-98cf9e153bb5
# ╠═37ee5b81-3444-49ab-8732-530004b00035
# ╠═1f128c17-8da6-4752-939b-25e75e8a8725
# ╠═4cc8fb94-43a6-44cd-8754-bb1ef0190573
# ╠═9c71279a-6ebd-4c86-82e2-c0af535f33b9
# ╠═accb5901-6b0b-41f6-8d7c-3dd5a33d571a
# ╠═07d3ff60-b7e9-4d99-9a73-601e421e1b64
# ╠═6bb3a773-c5c4-4d06-a28e-03737ce02797
# ╠═3b8f4450-4248-4b60-a61c-5411c311d5d6
# ╠═cb9b1c25-9407-4ada-98eb-2b2462479729
# ╠═17e527a4-b80f-43bd-94a4-3e0411bcc642
# ╠═dca54445-4704-4726-99ce-3f60431459ba
# ╠═74d57c25-34f0-4f60-9ba9-6d6b507b61f2
# ╠═65f6b972-a4fe-4b62-8dc8-80f564de8fbf
# ╠═11fe766c-bb25-478a-bd94-e786f98d21aa
# ╠═3af49352-fd14-4181-8f2e-5f5aeaf6aff3
# ╠═8c93b998-3e9a-4c6f-8ae2-14d1292942ce
# ╠═33272333-27b7-4ed1-918c-fac97e0e18fd
# ╠═1653799c-175e-4024-b9a8-ca08446cdc79
# ╠═9120c455-745e-498a-acc7-827da3f8c9ae
# ╠═3a6c37d3-7ea9-4d5b-b486-e3c48be5e076
# ╠═2ded08e4-b048-41e7-a02c-8733e02ef928
# ╠═c011919b-0378-4116-a085-21ee18231104
# ╠═4202f7f4-499b-44df-8d79-4779609c93fe
# ╠═cddae5b7-8f84-4407-9a09-9f7d19aac071
# ╠═46c868e8-23f8-4c22-9742-2126c5b7e15e
# ╠═e4ea104e-0bd9-4d54-a4f3-800ad6a3dde9
# ╠═92520517-1ba9-4436-9472-91699cb2b56c
Loading

0 comments on commit 45b4adb

Please sign in to comment.