diff --git a/DESCRIPTION b/DESCRIPTION
index eb118233..5eeb3c6a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -101,6 +101,7 @@ Imports:
     stringi,
     stringr,
     tidyr (>= 1.0.0),
+    tidyselect,
     vroom,
     xml2
 Suggests:
@@ -109,6 +110,7 @@ Suggests:
     knitr,
     mockery,
     rmarkdown,
+    RSocrata,
     rvest,
     rworldmap,
     sf,
diff --git a/NAMESPACE b/NAMESPACE
index 68efa036..de3a4b03 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -71,6 +71,7 @@ importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,mutate_at)
 importFrom(dplyr,mutate_if)
+importFrom(dplyr,n)
 importFrom(dplyr,na_if)
 importFrom(dplyr,pull)
 importFrom(dplyr,recode)
@@ -94,6 +95,7 @@ importFrom(jsonlite,fromJSON)
 importFrom(lifecycle,deprecate_stop)
 importFrom(lubridate,as_date)
 importFrom(lubridate,dmy)
+importFrom(lubridate,dmy_hms)
 importFrom(lubridate,mdy)
 importFrom(lubridate,month)
 importFrom(lubridate,year)
@@ -132,6 +134,7 @@ importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,replace_na)
 importFrom(tidyr,separate)
+importFrom(tidyselect,vars_select_helpers)
 importFrom(utils,download.file)
 importFrom(utils,untar)
 importFrom(vroom,vroom)
diff --git a/NEWS.md b/NEWS.md
index 942c164e..ad8f7d6c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -20,6 +20,7 @@ This release is currently under development
 - Fixed a bug in the data sourced from Germany so that instead of treating it as a line list of individuals it is treated as a relatively finely resolved count data which needs to be summed up (by @sbfnk).
 - Fixed a bug in the Vietnam class due to `stringr` ([#448](https://github.com/epiforecasts/covidregionaldata/pull/448) by @RichardMN).
 - Fixed a bug with the Netherlands class were the lack of Hospitalisation data in the source was causing the class to fail ([#446](https://github.com/epiforecasts/covidregionaldata/pull/446) by @RichardMN).
+- Fixed an issue with the Colombia data and reduced dependencies by making `RSocrata` be a suggested package ([#433](https://github.com/epiforecasts/covidregionaldata/pull/433) by @RichardMN).
 
 ## Depreciations
 
diff --git a/R/Colombia.R b/R/Colombia.R
index b920f1d9..f72e7880 100644
--- a/R/Colombia.R
+++ b/R/Colombia.R
@@ -3,7 +3,7 @@
 #'  and processing COVID-19 region data for Colombia
 #'
 # nolint start
-#' @source \url{https://github.com/danielcs88/colombia_covid-19/}
+#' @source \url{https://www.datos.gov.co/Salud-y-Protecci-n-Social/Casos-positivos-de-COVID-19-en-Colombia/gt2j-8ykr}
 # nolint end
 #' @export
 #' @concept dataset
@@ -21,63 +21,99 @@ Colombia <- R6::R6Class("Colombia",
     #' @field origin name of origin to fetch data for
     origin = "Colombia",
     #' @field supported_levels A list of supported levels.
-    supported_levels = list("1"),
+    supported_levels = list("1", "2"),
     #' @field supported_region_names A list of region names in order of level.
-    supported_region_names = list("1" = "departamento"),
+    supported_region_names = list(
+      "1" = "departamento",
+      "2" = "municipio"
+    ),
     #' @field supported_region_codes A list of region codes in order of level.
-    supported_region_codes = list("1" = "iso_3166_2"),
+    supported_region_codes = list(
+      "1" = "iso_3166_2",
+      "2" = "codigo_municipio"
+    ),
     #' @field common_data_urls List of named links to raw data.
     # nolint start
     common_data_urls = list(
-      "main" = "https://raw.githubusercontent.com/danielcs88/colombia_covid-19/master/datos/cronologia.csv"
+      "main" = "https://www.datos.gov.co/resource/gt2j-8ykr.csv?$select=fecha_diagnostico,ciudad_municipio"
     ),
     # nolint end
     #' @field source_data_cols existing columns within the raw data
-    source_data_cols = c("cases_total"),
+    source_data_cols = c("cases_new"),
     #' @field source_text Plain text description of the source of the data
-    source_text = "Daniel C\u00e1rdenas",
+    source_text = "Datos abiertos Colombia (Colombia open data)",
     #' @field source_url Website address for explanation/introduction of the
     #' data
-    source_url = "https://github.com/danielcs88/colombia_covid-19/",
+    source_url = "https://www.datos.gov.co/Salud-y-Protecci-n-Social/Casos-positivos-de-COVID-19-en-Colombia/gt2j-8ykr", # nolint
 
     #' @description Set up a table of region codes for clean data
     #' @importFrom dplyr mutate
     set_region_codes = function() {
-      self$codes_lookup$`1` <- covidregionaldata::colombia_codes
+      self$codes_lookup$`1` <- covidregionaldata::colombia_codes %>%
+        select(level_1_region, level_1_region_code) %>%
+        unique()
+      self$codes_lookup$`2` <- covidregionaldata::colombia_codes
+    },
+
+    #' @description Colombia specific download using Socrata API
+    #' This uses the `RSocrata` package if it is installed or downloads
+    #' a much larger csv file if that package is not available.
+    #' @importFrom dplyr select
+    download = function() {
+      message_verbose(self$verbose,
+                      "Downloading Colombia data. This may take a while.")
+      # RSocrata package is recommended but not required
+      if (requireNamespace("RSocrata", quietly = self$verbose)) {
+        self$data$raw$main <- RSocrata::read.socrata(self$data_urls[["main"]])
+      } else {
+        stop("covidregionaldata::Colombia$download - requires RSocrata package.\n",
+             "Please run install.packages(\"RSocrata\")\n", call.=TRUE)
+      }
     },
 
-    #' @description Colombia specific state level data cleaning
-    #' @importFrom dplyr select mutate
-    #' @importFrom lubridate ymd
+    #' @description Colombia specific data cleaning
+    #' @importFrom dplyr select mutate rename summarise group_by n
+    #' @importFrom lubridate dmy_hms as_date
     #' @importFrom stringr str_replace_all str_to_sentence str_to_title
     #' @importFrom rlang .data
     #'
     clean_common = function() {
       self$data$clean <- self$data$raw[["main"]] %>%
-        select(
-          date = .data$fecha,
-          level_1_region = .data$departamento,
-          cases_total = .data$casos
-        ) %>%
-        mutate(
-          date = ymd(.data$date),
-          level_1_region = iconv(.data$level_1_region,
-            from = "UTF-8",
-            to = "ASCII//TRANSLIT"
-          ),
-          level_1_region = str_replace_all(.data$level_1_region, " D.C.", ""),
-          level_1_region = str_replace_all(
-            .data$level_1_region,
-            "San Andres y Providencia",
-            "San Andres, Providencia y Santa Catalina"
-          ),
-          level_1_region = str_to_sentence(.data$level_1_region),
-          level_1_region = str_to_title(.data$level_1_region)
+        rename(
+          date = .data$fecha_diagnostico,
+          level_2_region_code = .data$ciudad_municipio
         ) %>%
+        group_by(date, level_2_region_code) %>%
+        summarise(cases_new = n(), .groups = "drop") %>%
+        mutate(date = as_date(dmy_hms(date)),
+               level_2_region_code = sprintf("%05d", level_2_region_code)) %>%
         left_join(
-          self$codes_lookup$`1`,
-          by = c("level_1_region" = "level_1_region")
+          self$codes_lookup$`2`,
+          by = c("level_2_region_code" = "level_2_region_code")
         )
+    },
+
+    #' @description Colombia Specific Department Level Data Cleaning
+    #'
+    #' Aggregates data to the level 1 (department) regional level. Data is
+    #' provided by the source at the level 2 (municipality) regional level.
+    #'
+    #' @importFrom dplyr group_by summarise ungroup across select
+    #' @importFrom tidyselect vars_select_helpers
+    clean_level_1 = function() {
+      self$data$clean <- self$data$clean %>%
+        select(-level_2_region_code, -level_2_region) %>%
+        group_by(
+          .data$date,
+          .data$level_1_region, .data$level_1_region_code
+        ) %>%
+        summarise(
+          across(
+            tidyselect::vars_select_helpers$where(is.numeric),
+            sum
+          )
+        ) %>%
+        ungroup()
     }
   )
 )
diff --git a/R/Vietnam.R b/R/Vietnam.R
index 258ced9c..63dfc455 100644
--- a/R/Vietnam.R
+++ b/R/Vietnam.R
@@ -104,7 +104,7 @@ Vietnam <- R6::R6Class("Vietnam",
           cases_total,
           deaths_total,
           recovered_total
-          ) %>%
+        ) %>%
         mutate(ncsc_region_code = as.numeric(ncsc_region_code)) %>%
         left_join(
           self$data$raw$provinces %>%
@@ -119,8 +119,6 @@ Vietnam <- R6::R6Class("Vietnam",
           level_1_region = str_replace_all(level_1_region,
                                         "TP HCM", "Hochiminh"),
         ) %>%
-        #
-        #tidyr::drop_na(date, region_name) %>%
         mutate(
           level_1_region = stri_trans_general(level_1_region, "ASCII"),
           level_1_region = stri_trim_both(level_1_region),
diff --git a/data-raw/colombia_codes.R b/data-raw/colombia_codes.R
index 8cb8792b..8c6202a5 100644
--- a/data-raw/colombia_codes.R
+++ b/data-raw/colombia_codes.R
@@ -22,7 +22,7 @@ level_1_region <- read_html(co_iso) %>%
   html_text()
 level_1_region <- level_1_region[1:33]
 
-colombia_codes <- data.frame(
+colombia_departments <- data.frame(
   level_1_region_code,
   level_1_region,
   stringsAsFactors = FALSE
@@ -37,7 +37,7 @@ colombia_codes <- data.frame(
 replacements <- list(
   "Distrito Capital De Bogota" = "Bogota"
 )
-colombia_codes <- colombia_codes %>%
+colombia_departments <- colombia_departments %>%
   mutate(
     level_1_region = ifelse(level_1_region %in% names(replacements),
       replacements[level_1_region],
@@ -46,5 +46,47 @@ colombia_codes <- colombia_codes %>%
     level_1_region = as.character(level_1_region)
   )
 
+# Download list of municipalities and codes
+# 
+
+colombia_municipalities_sheet <- download_excel(
+  "https://www.dane.gov.co/files/censo2005/provincias/subregiones.xls",
+  "colombia_municipalities.xls",
+  verbose = TRUE,
+  transpose = FALSE,
+  sheet = "Hoja1"
+) 
+  colombia_municipalities <- colombia_municipalities_sheet %>%
+  select(level_2_region = NOM_MPIO,
+         level_2_region_code = DPTOC_MPIO,
+         level_1_region = NOM_DEPTO) %>%
+  mutate(
+    level_1_region = stri_trans_general(level_1_region, "latin-ascii"),
+    level_1_region = stri_trim_both(level_1_region),
+    level_1_region = stringr::str_to_title(level_1_region),
+    level_1_region =
+      str_replace_all(.data$level_1_region,
+                      c(" D.c." = "",
+                        "Archipielago De San Andres"
+                        = "San Andres, Providencia Y Santa Catalina",
+                        "Norte Santander" = "Norte De Santander"
+                      )
+      ),
+    level_2_region = stri_trans_general(level_2_region, "latin-ascii"),
+    level_2_region = stri_trim_both(level_2_region),
+    level_2_region =
+      str_replace_all(.data$level_2_region,
+                      c(" D.C." = ""
+                      )
+      ),
+    level_2_region = stringr::str_to_title(level_2_region),
+  ) 
+  
+
+# anti_join(colombia_municipalities, colombia_departments, by=c("level_1_region"))
+colombia_codes <- left_join(colombia_municipalities,
+                            colombia_departments,
+                            by=c("level_1_region"))
+
 # update package region_codes
 usethis::use_data(colombia_codes, overwrite = TRUE)
diff --git a/data/all_country_data.rda b/data/all_country_data.rda
index 5a05afe2..961dfecf 100644
Binary files a/data/all_country_data.rda and b/data/all_country_data.rda differ
diff --git a/data/colombia_codes.rda b/data/colombia_codes.rda
index efc435d6..81666ce6 100644
Binary files a/data/colombia_codes.rda and b/data/colombia_codes.rda differ
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 4423c8e6..e6a1fa72 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -30,6 +30,7 @@ COVID
 covidregionaldata
 cre
 CSSE
+csv
 ctb
 currrently
 DataClass
@@ -132,6 +133,7 @@ rlang
 rmarkdown
 Roxygen
 RoxygenNote
+RSocrata
 rvest
 RVIM
 rworldmap
@@ -144,6 +146,7 @@ seperate
 shapefiles
 sherratt
 Sherratt
+Socrata
 sophie
 SouthAfrica
 spi
diff --git a/man/Colombia.Rd b/man/Colombia.Rd
index d1f88abb..f8228ff5 100644
--- a/man/Colombia.Rd
+++ b/man/Colombia.Rd
@@ -4,7 +4,7 @@
 \alias{Colombia}
 \title{Colombia Class for downloading, cleaning and processing notification data}
 \source{
-\url{https://github.com/danielcs88/colombia_covid-19/}
+\url{https://www.datos.gov.co/Salud-y-Protecci-n-Social/Casos-positivos-de-COVID-19-en-Colombia/gt2j-8ykr}
 }
 \description{
 Information for downloading, cleaning
@@ -70,7 +70,9 @@ data}
 \subsection{Public methods}{
 \itemize{
 \item \href{#method-set_region_codes}{\code{Colombia$set_region_codes()}}
+\item \href{#method-download}{\code{Colombia$download()}}
 \item \href{#method-clean_common}{\code{Colombia$clean_common()}}
+\item \href{#method-clean_level_1}{\code{Colombia$clean_level_1()}}
 \item \href{#method-clone}{\code{Colombia$clone()}}
 }
 }
@@ -79,7 +81,6 @@ data}
 \itemize{
 \item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="available_regions">}\href{../../covidregionaldata/html/DataClass.html#method-available_regions}{\code{covidregionaldata::DataClass$available_regions()}}\out{</span>}
 \item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="clean">}\href{../../covidregionaldata/html/DataClass.html#method-clean}{\code{covidregionaldata::DataClass$clean()}}\out{</span>}
-\item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="download">}\href{../../covidregionaldata/html/DataClass.html#method-download}{\code{covidregionaldata::DataClass$download()}}\out{</span>}
 \item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="download_JSON">}\href{../../covidregionaldata/html/DataClass.html#method-download_JSON}{\code{covidregionaldata::DataClass$download_JSON()}}\out{</span>}
 \item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="filter">}\href{../../covidregionaldata/html/DataClass.html#method-filter}{\code{covidregionaldata::DataClass$filter()}}\out{</span>}
 \item \out{<span class="pkg-link" data-pkg="covidregionaldata" data-topic="DataClass" data-id="get">}\href{../../covidregionaldata/html/DataClass.html#method-get}{\code{covidregionaldata::DataClass$get()}}\out{</span>}
@@ -100,16 +101,41 @@ Set up a table of region codes for clean data
 \if{html}{\out{<div class="r">}}\preformatted{Colombia$set_region_codes()}\if{html}{\out{</div>}}
 }
 
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-download"></a>}}
+\if{latex}{\out{\hypertarget{method-download}{}}}
+\subsection{Method \code{download()}}{
+Colombia specific download using Socrata API
+This uses the \code{RSocrata} package if it is installed or downloads
+a much larger csv file if that package is not available.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Colombia$download()}\if{html}{\out{</div>}}
+}
+
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-clean_common"></a>}}
 \if{latex}{\out{\hypertarget{method-clean_common}{}}}
 \subsection{Method \code{clean_common()}}{
-Colombia specific state level data cleaning
+Colombia specific data cleaning
 \subsection{Usage}{
 \if{html}{\out{<div class="r">}}\preformatted{Colombia$clean_common()}\if{html}{\out{</div>}}
 }
 
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-clean_level_1"></a>}}
+\if{latex}{\out{\hypertarget{method-clean_level_1}{}}}
+\subsection{Method \code{clean_level_1()}}{
+Colombia Specific Department Level Data Cleaning
+
+Aggregates data to the level 1 (department) regional level. Data is
+provided by the source at the level 2 (municipality) regional level.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{Colombia$clean_level_1()}\if{html}{\out{</div>}}
+}
+
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-clone"></a>}}
diff --git a/man/colombia_codes.Rd b/man/colombia_codes.Rd
index 2292ad7a..40ff0e81 100644
--- a/man/colombia_codes.Rd
+++ b/man/colombia_codes.Rd
@@ -5,7 +5,7 @@
 \alias{colombia_codes}
 \title{Region Codes for Colombia Dataset.}
 \format{
-An object of class \code{data.frame} with 33 rows and 2 columns.
+An object of class \code{data.frame} with 1119 rows and 4 columns.
 }
 \usage{
 colombia_codes
diff --git a/tests/testthat/custom_data/Colombia_level_1.rds b/tests/testthat/custom_data/Colombia_level_1.rds
index 7f131bb8..4893c7fb 100644
Binary files a/tests/testthat/custom_data/Colombia_level_1.rds and b/tests/testthat/custom_data/Colombia_level_1.rds differ
diff --git a/tests/testthat/custom_data/Colombia_level_2.rds b/tests/testthat/custom_data/Colombia_level_2.rds
new file mode 100644
index 00000000..4893c7fb
Binary files /dev/null and b/tests/testthat/custom_data/Colombia_level_2.rds differ