Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions translator/deepl-translate/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# TOCK Deepl Translation

Here are the configurable variables:

- `tock_translator_deepl_target_languages`: set of supported languages - ex : en,es
- `tock_translator_deepl_api_url`: Deepl api url (default pro api url : https://api.deepl.com/v2/translate).
If you have problems with pro api, you can use free api : https://api-free.deepl.com/v2/translate
- `tock_translator_deepl_api_key` : Deepl api key to use (see your account)
- `tock_translator_deepl_glossary_id`: glossary identifier to use in translation

> Deepl documentation: https://developers.deepl.com/docs

To integrate the module into a custom Tock Admin, pass the module as a parameter to the `ai.tock.nlp.admin.startAdminServer()` function.

Example:

```kt
package ai.tock.bot.admin

import ai.tock.nlp.admin.startAdminServer
import ai.tock.translator.deepl.deeplTranslatorModule

fun main() {
startAdminServer(deeplTranslatorModule())
}
```

## Http Client Configuration

You can configure the Deepl client, including proxy settings, by passing a parameter to `deeplTranslatorModule`:

```kt
startAdminServer(deeplTranslatorModule(OkHttpDeeplClient {
proxyAuthenticator { _: Route?, response: Response ->
// https://square.github.io/okhttp/3.x/okhttp/index.html?okhttp3/Authenticator.html
if (response.challenges().any { it.scheme.equals("OkHttp-Preemptive", ignoreCase = true) }) {
response.request.newBuilder()
.header("Proxy-Authorization", credential)
.build()
} else {
null
}
}
}))
```
52 changes: 52 additions & 0 deletions translator/deepl-translate/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Copyright (C) 2017/2021 e-voyageurs technologies
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->

<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ai.tock</groupId>
<artifactId>tock-translator</artifactId>
<version>24.3.4-SNAPSHOT</version>
</parent>

<artifactId>tock-deepl-translate</artifactId>
<name>Tock Deepl Translator</name>
<description>Deepl translator implementation</description>

<dependencies>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
</dependency>
<dependency>
<groupId>ai.tock</groupId>
<artifactId>tock-translator-core</artifactId>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.12.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
</dependencies>

</project>
119 changes: 119 additions & 0 deletions translator/deepl-translate/src/main/kotlin/DeeplClient.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Copyright (C) 2017/2021 e-voyageurs technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.tock.translator.deepl

import ai.tock.shared.jackson.mapper
import ai.tock.shared.property
import ai.tock.shared.propertyOrNull
import com.fasterxml.jackson.module.kotlin.readValue
import java.io.IOException
import java.util.regex.Pattern
import okhttp3.FormBody
import okhttp3.OkHttpClient
import okhttp3.Request

internal data class TranslationResponse(
val translations: List<Translation>
)

internal data class Translation(
val text: String
)

const val TAG_HANDLING = "xml"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This constant could be private, and it could also be in DeeplClient's companion object


interface DeeplClient {
fun translate(
text: String,
sourceLang: String,
targetLang: String,
preserveFormatting: Boolean,
glossaryId: String?
): String?
}

class OkHttpDeeplClient(
private val apiURL: String = property("tock_translator_deepl_api_url", "https://api.deepl.com/v2/translate"),
private val apiKey: String? = propertyOrNull("tock_translator_deepl_api_key"),
okHttpCustomizer: OkHttpClient.Builder.() -> Unit = {}
) : DeeplClient {
private val client = OkHttpClient.Builder().apply(okHttpCustomizer).build()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This client should use the TockProxyAuthenticator by default


private fun replaceSpecificPlaceholders(text: String): Pair<String, List<String>> {
// Store original placeholders for later restoration
val placeholderPattern = Pattern.compile("\\{:([^}]*)}")
val matcher = placeholderPattern.matcher(text)

val placeholders = mutableListOf<String>()
while (matcher.find()) {
placeholders.add(matcher.group(1))
}

// Replace placeholders with '_PLACEHOLDER_'
val replacedText = matcher.replaceAll("_PLACEHOLDER_")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be a constant in the companion object


return Pair(replacedText, placeholders)
}

private fun revertSpecificPlaceholders(text: String, placeholders: List<String>): String {
var resultText = text
for (placeholder in placeholders) {
resultText = resultText.replaceFirst("_PLACEHOLDER_", "{:$placeholder}")
}
return resultText
}

override fun translate(
text: String,
sourceLang: String,
targetLang: String,
preserveFormatting: Boolean,
glossaryId: String?
): String? {
val (textWithPlaceholders, originalPlaceholders) = replaceSpecificPlaceholders(text)

val formBuilder = FormBody.Builder()

val requestBody = formBuilder
.add("text",textWithPlaceholders)
.add("source_lang",sourceLang)
.add("target_lang",targetLang)
.add("preserve_formatting", preserveFormatting.toString())
.add("tag_handling",TAG_HANDLING)
.build()

glossaryId?.let {
formBuilder.add("glossaryId", it)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
formBuilder.add("glossaryId", it)
formBuilder.add("glossary_id", it)

Based on https://developers.deepl.com/docs/api-reference/translate/openapi-spec-for-text-translation

}

val request = Request.Builder()
.url(apiURL)
.addHeader("Authorization", "DeepL-Auth-Key $apiKey")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

apiKey could be null at this point in the code. The client should fail way earlier if that is the case.

.post(requestBody)
.build()

client.newCall(request).execute().use { response ->
if (!response.isSuccessful) throw IOException("Unexpected code $response")

val responseBody = response.body?.string()
val translationResponse = mapper.readValue<TranslationResponse>(responseBody!!)

val translatedText = translationResponse.translations.firstOrNull()?.text
return translatedText?.let { revertSpecificPlaceholders(it, originalPlaceholders) }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (C) 2017/2021 e-voyageurs technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.tock.translator.deepl

import ai.tock.shared.propertyOrNull
import ai.tock.translator.TranslatorEngine
import java.util.Locale
import org.apache.commons.text.StringEscapeUtils

internal class DeeplTranslatorEngine(client: DeeplClient) : TranslatorEngine {
private val supportedLanguagesProperty = propertyOrNull("tock_translator_deepl_target_languages")
private val supportedLanguages: Set<String>? = supportedLanguagesProperty?.split(",")?.map { it.trim() }?.toSet()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not inline supportedLanguagesProperty?


private val deeplClient = client
private val glossaryId = propertyOrNull("tock_translator_deepl_glossary_id")
override val supportAdminTranslation: Boolean = true

override fun translate(text: String, source: Locale, target: Locale): String {
var translatedTextHTML4 = ""
// Allows to filter translation on a specific language
if (supportedLanguages?.contains(target.language) == true || supportedLanguages == null) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could swap the two conditions to benefit from the smart cast

val translatedText = deeplClient.translate(text, source.language, target.language, true, glossaryId)
translatedTextHTML4 = StringEscapeUtils.unescapeHtml4(translatedText)
}
return translatedTextHTML4
}
}
26 changes: 26 additions & 0 deletions translator/deepl-translate/src/main/kotlin/DeeplTranslatorIoc.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (C) 2017/2021 e-voyageurs technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.tock.translator.deepl

import ai.tock.translator.TranslatorEngine
import com.github.salomonbrys.kodein.Kodein
import com.github.salomonbrys.kodein.bind
import com.github.salomonbrys.kodein.provider

fun deeplTranslatorModule(client: DeeplClient = OkHttpDeeplClient()) = Kodein.Module {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should have a default val deeplTranslatorModule = deeplTranslatorModule() for consistency with other modules

bind<TranslatorEngine>(overrides = true) with provider { DeeplTranslatorEngine(client) }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright (C) 2017/2021 e-voyageurs technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
Comment on lines +1 to +15
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This copyright notice should be above the package declaration I think. Speaking of which, there is no package declaration in this file...?


package ai.tock.translator.deepl

import org.junit.jupiter.api.Disabled
import org.junit.jupiter.api.Test
import java.util.Locale
import kotlin.test.assertEquals

/**
* All these tests are disabled because it uses Deepl pro api that can be expensive
*/
class DeeplTranslateIntegrationTest {
@Test
@Disabled
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If every method is disabled like this, maybe it would be worth just disabling the entire class? Ideally though, there should be functional end-to-end tests with wiremock to simulate Deepl's responses.

fun simpleTest() {
val result = DeeplTranslatorEngine.translate(
"Bonjour, je voudrais me rendre à New-York Mardi prochain",
Locale.FRENCH,
Locale.ENGLISH
)
assertEquals("Hello, I would like to go to New York next Tuesday.", result)
}

@Test
@Disabled
fun testWithEmoticonAndAntislash() {
val result = DeeplTranslatorEngine.translate(
"Bonjour, je suis l'Agent virtuel SNCF Voyageurs! \uD83E\uDD16\n" +
"Je vous informe sur l'état du trafic en temps réel.\n" +
"Dites-moi par exemple \"Mon train 6111 est-il à l'heure ?\", \"Aller à Saint-Lazare\", \"Prochains départs Gare de Lyon\" ...",
Locale.FRENCH,
Locale.ENGLISH
)

assertEquals(
"Hello, I'm the SNCF Voyageurs Virtual Agent! \uD83E\uDD16\n" +
"I inform you about traffic conditions in real time.\n" +
"Tell me for example \"Is my train 6111 on time?\", \"Going to Saint-Lazare\", \"Next departures Gare de Lyon\" ...",
result
)
}

@Test
@Disabled
fun testWithParameters() {
val result = DeeplTranslatorEngine.translate(
"Bonjour, je voudrais me rendre à {:city} {:date}",
Locale.FRENCH,
Locale.GERMAN
)
assertEquals("Hallo, ich würde gerne nach {:city} {:date} fahren.", result)
}

@Test
@Disabled
fun testWithHTML() {
val result = DeeplTranslatorEngine.translate(
"Bonjour, je voudrais me rendre à Paris <br><br/> demain soir",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does Deepl behave with a self-closing <br/> tag? I believe that would be the most common form, if it occurred. Could also try more advanced HTML structures, like a list (somewhat common in web connector messages)

Locale.FRENCH,
Locale.GERMAN
)
assertEquals("Hallo, ich möchte morgen Abend nach Paris <br><br/> fahren", result)
}
}
1 change: 1 addition & 0 deletions translator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
<module>core</module>
<module>noop</module>
<module>google-translate</module>
<module>deepl-translate</module>
</modules>

<dependencies>
Expand Down