Skip to content

Commit

Permalink
Website Cleanup and Changes to Error Handling (#11)
Browse files Browse the repository at this point in the history
* website cleanup and some changes to error handling

* investing ci issue

* investigating ci issue

* logging request headers, better handling of file retrieval

* download the lfs files
  • Loading branch information
ASRagab authored Aug 21, 2023
1 parent f83aa6f commit 77ba7bf
Show file tree
Hide file tree
Showing 13 changed files with 94 additions and 76 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v3
with:
lfs: true
- name: Setup JDK
uses: actions/setup-java@v3
with:
Expand All @@ -27,8 +29,8 @@ jobs:
run: sbt -v "+scalafmtCheckAll;+test;"
- name: Binary Compatibility Check
run: sbt +mimaReportBinaryIssues
- name: Run BasicApp
- name: Run Cats Effect Example
env:
UNSTRUCTURED_API_KEY: ${{ secrets.UNSTRUCTURED_API_KEY }}
run: |
sbt "examples/runMain org.twelvehart.unstructured4s.examples.BasicApp"
sbt "examples/runMain org.twelvehart.unstructured4s.examples.CatsEffectApp"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[![Scala Steward badge](https://img.shields.io/badge/Scala_Steward-helping-blue.svg?style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA4AAAAQCAMAAAARSr4IAAAAVFBMVEUAAACHjojlOy5NWlrKzcYRKjGFjIbp293YycuLa3pYY2LSqql4f3pCUFTgSjNodYRmcXUsPD/NTTbjRS+2jomhgnzNc223cGvZS0HaSD0XLjbaSjElhIr+AAAAAXRSTlMAQObYZgAAAHlJREFUCNdNyosOwyAIhWHAQS1Vt7a77/3fcxxdmv0xwmckutAR1nkm4ggbyEcg/wWmlGLDAA3oL50xi6fk5ffZ3E2E3QfZDCcCN2YtbEWZt+Drc6u6rlqv7Uk0LdKqqr5rk2UCRXOk0vmQKGfc94nOJyQjouF9H/wCc9gECEYfONoAAAAASUVORK5CYII=)](https://scala-steward.org)
[![ci](https://github.com/ASRagab/unstructured4s/workflows/pull-request/badge.svg)](https://github.com/ASRagab/unstructured4s/actions)
[![ci](https://github.com/ASRagab/unstructured4s/workflows/ci/badge.svg)](https://github.com/ASRagab/unstructured4s/actions)
[![Maven Central](https://img.shields.io/maven-central/v/org.twelvehart/unstructured4s-core_3.svg?label=Maven%20Central)](https://search.maven.org/search?q=g:%22org.twelvehart%22%20AND%20a:%22unstructured4s-core_3%22)
[![Scaladoc](https://javadoc.io/badge2/org.twelvehart/unstructured4s-core_3/javadoc.svg)](https://javadoc.io/doc/org.twelvehart/unstructured4s-core_3)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
Expand Down
18 changes: 12 additions & 6 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import org.typelevel.scalacoptions.ScalacOptions

Global / onChangedBuildSource := ReloadOnSourceChanges

Global / excludeLintKeys ++= Set(ThisBuild / pomIncludeRepository, unstructured4s / paradox / sourceDirectory)

ThisBuild / scalaVersion := "3.3.0"

ThisBuild / mimaFailOnNoPrevious := false
Expand All @@ -20,9 +22,9 @@ lazy val unstructured4s = (project in file("."))
.settings(
commonSettings,
CustomTasks.settings,
publish / skip := true,
Compile / publishArtifact := false,
Compile / paradoxMaterialTheme :=
publish / skip := true,
Compile / publishArtifact := false,
Compile / paradoxMaterialTheme :=
ParadoxMaterialTheme()
.withColor("blue-grey", "orange")
.withRepository(uri("https://github.com/ASRagab/unstructured4s"))
Expand All @@ -35,9 +37,13 @@ lazy val unstructured4s = (project in file("."))
Compile / paradoxProperties ++= Map(
"snip.core.base_dir" -> ((ThisBuild / baseDirectory).value / "core").getAbsolutePath
),
paradox / sourceDirectory := sourceDirectory.value / "paradox",
git.remoteRepo := scmInfo.value.get.connection.replace("scm:git:", ""),
Test / tpolecatExcludeOptions += ScalacOptions.warnNonUnitStatement
paradox / sourceDirectory := sourceDirectory.value / "paradox",
git.remoteRepo := scmInfo.value.get.connection.replace("scm:git:", ""),
Test / tpolecatExcludeOptions += ScalacOptions.warnNonUnitStatement,
ghpagesCleanSite / excludeFilter :=
new FileFilter {
def accept(f: File) = (ghpagesRepository.value / "CNAME").getCanonicalPath == f.getCanonicalPath
} || "versions.html"
)
.aggregate(core, examples)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,17 @@ object Metadata:
(value.filename, value.filetype, value.pageNumber)
)

enum Unstructured4sError(val body: String, val error: String) extends Exception(error):
case JsonParseError(override val body: String, override val error: String) extends Unstructured4sError(body, error)
case HttpResponseError(override val body: String, val statusCode: String)
extends Unstructured4sError(body, statusCode)
enum Unstructured4sError(val message: String) extends Exception(message):
case JsonParseError(override val message: String) extends Unstructured4sError(message)
case HttpResponseError(override val message: String) extends Unstructured4sError(message)

given Conversion[ResponseException[String, io.circe.Error], Unstructured4sError] with
def apply(responseException: ResponseException[String, io.circe.Error]): Unstructured4sError =
responseException match
case DeserializationException(body, error) => Unstructured4sError.JsonParseError(body, error.getMessage)
case HttpError(body, code) => Unstructured4sError.HttpResponseError(body, code.toString)
case DeserializationException(body, error) =>
Unstructured4sError.JsonParseError(s"Issue parsing response ${error.getMessage}: $body")
case HttpError(body, code) =>
Unstructured4sError.HttpResponseError(s"Http status ${code}: $body")

// #unstructured4s_response
case class Unstructured4sResponse[A](result: Either[Unstructured4sError, List[A]])
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package org.twelvehart.unstructured4s.model

import org.twelvehart.unstructured4s.model.OCRStrategy.Auto
import sttp.client3.*
import sttp.model.Uri
import sttp.model.Part

import scala.annotation.unused

import java.io.File

export CanMultipart.given
Expand Down Expand Up @@ -54,50 +54,51 @@ object Unstructured4sRequestFields:
private val OcrLanguagesField = "ocr_languages"
private val PdfInferTableStructureField = "pdf_infer_table_structure"

// #general_request_fields
final case class GeneralRequestFields(
// #general_request_fields
outputFormat: OutputFormat = OutputFormat.Json,
xmlKeepTags: Boolean = false,
coordinates: Boolean = false,
encoding: Encoding = `UTF-8`,
skipInferTableTypes: Option[Vector[String]] = None,
ocrLanguages: Option[Seq[String]] = None,
includePageBreaks: Boolean = false,
ocrStrategy: Option[OCRStrategy] = None
ocrStrategy: OCRStrategy = OCRStrategy.Auto
// #general_request_fields
) extends Unstructured4sRequestFields:
// #general_request_fields
override def toMultipartSequence: List[Part[RequestBody[Any]]] =

override def toMultipartSequence: List[Part[RequestBody[Any]]] =
val requiredParts = List(
outputFormat.toMultipart,
multipart(XmlKeepTagsField, xmlKeepTags.toString),
multipart(CoordinatesField, coordinates.toString),
encoding.toMultipart,
multipart(IncludePageBreaksField, includePageBreaks.toString)
multipart(IncludePageBreaksField, includePageBreaks.toString),
ocrStrategy.toMultipart
)

val skipInferTableTypesPart =
skipInferTableTypes.map(types => multipart(SkipInferTableTypesField, types.mkString("[", ",", "]")))
val ocrLanguagesPart =
ocrLanguages.map(langs => multipart(OcrLanguagesField, langs.mkString))
val ocrStrategyPart = ocrStrategy.map(_.toMultipart)

val maybeParts = List(ocrLanguagesPart, skipInferTableTypesPart, ocrStrategyPart).flatten
val maybeParts = List(ocrLanguagesPart, skipInferTableTypesPart).flatten
requiredParts ++ maybeParts
end GeneralRequestFields

// #hires_request_fields
final case class HiResRequestFields(
// #hires_request_fields
outputFormat: OutputFormat = OutputFormat.Json,
encoding: Encoding = Encoding("utf-8"),
encoding: Encoding = `UTF-8`,
coordinates: Boolean = false,
pdfInferTableStructure: Boolean = false,
skipInferTableTypes: Option[Vector[String]] = None,
includePageBreaks: Boolean = false,
hiResModelName: Option[HiResModelName] = None,
ocrLanguages: Option[Seq[String]] = None
// #hires_request_fields
) extends Unstructured4sRequestFields:
// #hires_request_fields

override def toMultipartSequence: List[Part[RequestBody[Any]]] =
val ocrLanguagesPart =
ocrLanguages.map(langs => multipart(OcrLanguagesField, langs.mkString))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@ import org.twelvehart.unstructured4s.model.*
object BasicApp extends App:
private val backend = sttp.client3.HttpClientSyncBackend()

private val program = for
file <- pdfEither
apiKey <- apiKeyEnv
client = Unstructured4s.make(backend, ApiKey(apiKey))
response = client.partition(file)
result <- response.result
_ = println(result.mkString("\n"))
yield ()
private val program =
for
file <- pdfEither
apiKey <- apiKeyEnv
client = Unstructured4s.make(backend, ApiKey(apiKey))
response = client.partition(file)
result <- response.result
_ = backend.close()
yield result

program.fold(println, identity)
program.fold(
err => println(s"${err.getMessage}"),
res => println(s"$res")
)
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.twelvehart.unstructured4s.examples

import cats.effect.*
import cats.implicits.*
import org.twelvehart.unstructured4s.*
import org.twelvehart.unstructured4s.model.*
import sttp.capabilities
Expand All @@ -16,8 +17,9 @@ object CatsEffectApp extends IOApp.Simple:
.map(backend =>
Slf4jLoggingBackend(
backend,
logRequestHeaders = false,
logRequestBody = true
logRequestHeaders = true,
logRequestBody = true,
sensitiveHeaders = Set("unstructured-api-key")
)
)

Expand All @@ -28,6 +30,6 @@ object CatsEffectApp extends IOApp.Simple:
client <- Unstructured4s.make(backend, ApiKey(apiKey))
file <- IO.fromEither(pdfEither)
response <- client.partition(file, HiResRequestFields())
_ <- IO.fromEither(response.result).map(result => println(result.mkString("\n")))
_ <- IO.println(response.result.bimap(_.getMessage, _.mkString("\n")).merge)
yield ()
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,20 @@ import cats.implicits.*
import org.twelvehart.unstructured4s.model.*

import java.io.File
import scala.util.Try

export Common.*

object Common:
val pdfEither: Either[Throwable, UnstructuredFile] = {
val currentDir = new File(".").getCanonicalPath
Try(new File(currentDir, "data/sample.pdf")).map(UnstructuredFile(_)).toEither
val file = new File(currentDir, "data/sample.pdf")
Either.cond(file.exists, UnstructuredFile(file), new Exception(s"File not found at ${file.getCanonicalPath}"))
}

val pngEither: Either[Throwable, UnstructuredFile] = {
val currentDir = new File(".").getCanonicalPath
Try(new File(currentDir, "data/sample.png")).map(UnstructuredFile(_)).toEither
val file = new File(currentDir, "data/sample.png")
Either.cond(file.exists, UnstructuredFile(file), new Exception(s"File not found at ${file.getCanonicalPath}"))
}

val filesEither: Either[Throwable, Seq[UnstructuredFile]] =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.twelvehart.unstructured4s.examples

import cats.implicits.*
import org.twelvehart.unstructured4s.*
import org.twelvehart.unstructured4s.model.*
import sttp.capabilities.WebSockets
Expand All @@ -19,8 +20,9 @@ object ZIOApp extends ZIOAppDefault:
.map(backend =>
Slf4jLoggingBackend(
backend,
logRequestHeaders = false,
logRequestBody = true
logRequestHeaders = true,
logRequestBody = true,
sensitiveHeaders = Set("unstructured-api-key")
)
)
}
Expand All @@ -33,8 +35,8 @@ object ZIOApp extends ZIOAppDefault:
backend <- ZIO.service[SttpClient]
unstructuredClient <- Unstructured4s.make(backend, ApiKey(apiKey))
response <- unstructuredClient.partitionMultiple(files)
result <- ZIO.fromEither(response.result)
_ <- Console.printLine(result.mkString("\n"))
result = response.result.bimap(_.getMessage, _.mkString("\n")).merge
_ <- Console.printLine(result)
yield ()
}

Expand Down
19 changes: 11 additions & 8 deletions src/main/paradox/getting-started.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
# Getting Started

## Api Key
## ApiKey

You will need to obtain an [apikey] from [Unstructured.io]
You will need to obtain an [ApiKey] from [Unstructured]

[apikey]: https://unstructured.io/#get-api-key
[Unstructured.io]: https://unstructured.io
[ApiKey]: https://unstructured.io/#get-api-key
[Unstructured]: https://unstructured.io

By default, an apikey is needed to make the client; however, individual requests can
also be provided a separate apikey, which will override the default, when passed a header,
see further below.
also be provided a separate apikey, which will override the default, when passed a header.

## Instantiating the Client

In addition to the `unstructured4s-core` module, you will need to provide an `sttp` backend, you can use any of the backends provided by [sttp] as long as you can provide a client with the effect capability `F[_]` that has a `Functor` instance (this will typical require some kind of interoperability module with the `cats` typeclass hierarchy)
In addition to the `unstructured4s-core` module, you will need to provide an `sttp` backend, you can use any of the backends provided by [sttp]
as long as you can provide a client with the effect capability `F[_]` that has a `Functor` instance (this will typical require some kind of
interoperability module with the `cats` typeclass hierarchy)

[sttp]: https://sttp.softwaremill.com/en/latest/backends/summary.html


<br/>
First obtain the necessary dependencies for `sttp`, here are the ones for `ZIO`:

First obtain the necessary dependencies for sttp, here are the ones for `ZIO`:


@@dependency[sbt,Maven,Gradle] {
Expand All @@ -28,6 +30,7 @@ First obtain the necessary dependencies for `sttp`, here are the ones for `ZIO`:
}

<br/>

Here is the one for the `fs2` backend:


Expand Down
5 changes: 2 additions & 3 deletions src/main/paradox/index.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Unstructured4s

A Scala 3 library that wraps the [Unstructured] API.
A Scala 3 library that provides a functional wrapper for the [Unstructured] API.

[Unstructured]: https://unstructured.io

Expand All @@ -16,8 +16,7 @@ The library is published to [Sonatype] and [Maven Central].

<br/>

See the @link:[Unstructured4s Examples](https://github.com/ASRagab/unstructured4s/tree/main/examples/src/main/scala/org/twelvehart/unstructured4s/examples) { open=new }
for an even quicker start.
See the @link:[Unstructured4s Examples](https://github.com/ASRagab/unstructured4s/tree/main/examples/src/main/scala/org/twelvehart/unstructured4s/examples) { open=new } for an even quicker start.

[Sonatype]: https://s01.oss.sonatype.org/content/repositories/releases/org/twelvehart/unstructured4s-core_3/
[Maven Central]: https://search.maven.org/artifact/org/twelvehart/unstructured4s-core_3
Expand Down
11 changes: 5 additions & 6 deletions src/main/paradox/making-requests.md
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
# Making Requests

The [Unstructured.io API] takes multipart form requests. There are currently two types of requests one can make with the
The [Unstructured] API takes multipart form requests. There are currently two types of requests one can make with the
Unstructured4s client:

[Unstructured.io API]: https://unstructured-io.github.io/unstructured/api.html
[Unstructured]: https://unstructured-io.github.io/unstructured/api.html

## partition

The `partition` request is used to send one file to be partitioned by Unstructured.io. The request takes three
parameters,
only one of which does not have a default value:
parameters, only one of which does not have a default value:

* `file` - The file to be partitioned, of type UnstructuredFile, just an `opaque type` for `java.io.File`
* `file` - The file to be partitioned, of type `UnstructuredFile`, just an `opaque type` for `java.io.File`
* `request` - `Unstructured4sRequestFields` The request fields to be sent to Unstructured.io. These can be of two types:
* `GeneralRequestFields`
* `HiResRequestFields`
* `customHeaders` - List[Header] - A list of custom `sttp` headers to be sent with the request. These will override
* `customHeaders` - `List[Header]` - A list of custom `sttp` headers to be sent with the request. These will override
any default headers including the `unstructured-api-key` header.

A hopefully sensible default with all necessary parameters for each is provided in api client.
Expand Down
Loading

0 comments on commit 77ba7bf

Please sign in to comment.