@@ -5,6 +5,7 @@ import dev.langchain4j.data.document.splitter.DocumentSplitters
55import dev .langchain4j .data .message .UserMessage
66import dev .langchain4j .data .segment .TextSegment
77import dev .langchain4j .memory .chat .MessageWindowChatMemory
8+ import dev .langchain4j .model .chat .request .ResponseFormat
89import dev .langchain4j .model .embedding .onnx .bgesmallenv15q .BgeSmallEnV15QuantizedEmbeddingModel
910import dev .langchain4j .model .ollama .OllamaChatModel
1011import dev .langchain4j .model .openai .OpenAiChatModel
@@ -47,10 +48,10 @@ import spray.json.RootJsonFormat
4748import java .io .FileInputStream
4849import java .net .URLEncoder
4950import java .nio .file .Paths
50- import java .time .{Instant , ZoneId }
51+ import java .time .{Duration , Instant , ZoneId }
5152import java .util .concurrent .atomic .AtomicBoolean
5253import scala .concurrent .Future
53- import scala .concurrent .duration .*
54+ import scala .concurrent .duration .DurationInt
5455import scala .sys .process .{Process , stringSeqToProcess }
5556import scala .util .control .NonFatal
5657import scala .util .{Failure , Success }
@@ -331,6 +332,8 @@ object WikipediaEditsAnalyser extends App {
331332 val model = OpenAiChatModel .builder()
332333 .apiKey(OPENAI_API_KEY )
333334 .modelName(GPT_4_O_MINI )
335+ .temperature(0 )
336+ .timeout(Duration .ofSeconds(30 ))
334337 .build()
335338
336339 val promptPersons =
@@ -403,20 +406,21 @@ object WikipediaEditsAnalyser extends App {
403406 val model = OllamaChatModel .builder
404407 .baseUrl(ollamaContainer.getBaseUrl)
405408 .modelName(" llama3.2:1b" )
406- .temperature(0.1 )
409+ .temperature(0 )
410+ .responseFormat(ResponseFormat .JSON )
411+ .timeout(Duration .ofSeconds(30 ))
407412 .build()
408413
409414 val promptPersons =
410415 """ Extract all names of persons from this text:
411416 |{{content}}
412417 |
413418 |Rules:
414- |- Only extract persons: Full names of individuals mentioned in the text -
415- |- Do not extract places: Any geographical locations including countries, cities, regions, landmarks, or specific addresses
416- |- Do not extract organizations: Names of companies, institutions, government bodies, or any other formal groups
417- |- Return extracted persons as list: one name per line, no leading bullet points/hyphens/numbers
418- |- If the list of extracted persons is empty just return: "NONE" without extra text
419- |- Instead of There are no names of persons in this text, just return "NONE"
419+ |- Only extract persons: Full names of individuals mentioned in the text
420+ |- Do NOT extract places: Geographical locations including countries, cities, regions, landmarks, or specific addresses
421+ |- Do NOT extract organizations: Names of companies, institutions, government bodies, or any other formal groups
422+ |- Return output as JSON with exactly this structure: {"names": ["name1", "name2", ...]}
423+ |- If no persons are found, return: {"names": []}
420424 """ .stripMargin
421425
422426 val message = UserMessage .from(promptPersons.replace(" {{content}}" , content))
@@ -425,16 +429,7 @@ object WikipediaEditsAnalyser extends App {
425429 val response = model.chat(message)
426430 val personsFoundText = response.aiMessage().text().trim()
427431
428- val personsFoundList = if (personsFoundText.isEmpty || personsFoundText.contains(" NONE" ) || personsFoundText.contains(" no names" )) {
429- List .empty[String ]
430- } else {
431- val rawNames = personsFoundText.split(" \n " )
432- .map(_.trim)
433- .filter(_.nonEmpty)
434- .filter(! _.equalsIgnoreCase(" NONE" ))
435- .toList
436- sanitizePersonNames(rawNames)
437- }
432+ val personsFoundList = parseJSONResponse(personsFoundText)
438433
439434 if (personsFoundList.isEmpty) {
440435 Future (ctx)
@@ -449,6 +444,25 @@ object WikipediaEditsAnalyser extends App {
449444 }
450445 }
451446
447+ private def parseJSONResponse (personsFoundText : String ) = {
448+ val personsFoundList = if (personsFoundText.isEmpty) {
449+ List .empty[String ]
450+ } else {
451+ import io .circe .parser .*
452+ parse(personsFoundText) match {
453+ case Right (json) =>
454+ json.hcursor.downField(" names" ).as[List [String ]] match {
455+ case Right (names) => sanitizePersonNames(names)
456+ case Left (_) =>
457+ List .empty[String ]
458+ }
459+ case Left (_) =>
460+ List .empty[String ]
461+ }
462+ }
463+ personsFoundList
464+ }
465+
452466 /**
453467 * Formats data as an ASCII table using the layoutz library.
454468 *
0 commit comments