Skip to content

Commit

Permalink
[ZEPPELIN-5970][ZEPPELIN-5971] bugs occur when zeppelin.livy.tableWit…
Browse files Browse the repository at this point in the history
…hUTFCharacters is enabled (apache#4675)

* fix error occurs when non-select SQL statement is executed and tableWithUTFCharacter is True

* fix error occurs when non-primitive type of data is within json response such as array  and tableWithUTFCharacter is True

* rename custom function

* fix bug and improve the performance to iterate over the entrySet.

* remove nit

---------

Co-authored-by: Philipp Dallig <philipp.dallig@gmail.com>
  • Loading branch information
2 people authored and akoira committed Feb 1, 2024
1 parent b8a8131 commit ae7fe23
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
package org.apache.zeppelin.livy;

import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;

import java.io.IOException;
import java.io.StringWriter;
Expand Down Expand Up @@ -180,15 +181,20 @@ protected List<String> parseSQLJsonOutput(String output) {
List<String> rows = new ArrayList<>();

String[] rowsOutput = output.split("(?<!\\\\)\\n");

if (rowsOutput.length < 2){
return Arrays.asList(rowsOutput);
}

String[] header = rowsOutput[1].split("\t");
List<String> cells = new ArrayList<>(Arrays.asList(header));
rows.add(StringUtils.join(cells, "\t"));

for (int i = 2; i < rowsOutput.length; i++) {
Map<String, String> retMap = new Gson().fromJson(
rowsOutput[i], new TypeToken<HashMap<String, String>>() {
}.getType()
);
// one-by-one serialization to handle the case when
// the value is non-primitive such as: {"lang": ["java", "NodeJS"]}.
Map<String, String> retMap = deserialize(rowsOutput[i]);

cells = new ArrayList<>();
for (String s : header) {
cells.add(retMap.getOrDefault(s, "null")
Expand All @@ -200,6 +206,26 @@ protected List<String> parseSQLJsonOutput(String output) {
return rows;
}

private Map<String, String> deserialize(String jsonString) {
Map<String, String> map = new HashMap<>();
Gson gson = new Gson();
JsonElement jsonElement = gson.fromJson(jsonString, JsonElement.class);
JsonObject jsonObject = jsonElement.getAsJsonObject();

for (Map.Entry<String, JsonElement> entry : jsonObject.entrySet()) {
String key = entry.getKey();
JsonElement value = entry.getValue();

if (value.isJsonPrimitive()) {
map.put(key, value.getAsString());
} else {
map.put(key, value.toString());
}

}
return map;
}

protected List<String> parseSQLOutput(String str) {
// the regex is referred to org.apache.spark.util.Utils#fullWidthRegex
// for spark every chinese character has two placeholder(one placeholder is one char)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,14 @@ void testParseSQLOutput() {

@Test
void parseSQLJsonOutput() {

// Empty output
List<String> rows = sqlInterpreter.parseSQLJsonOutput("\n");
assertEquals(0, rows.size());

// Empty sql output
// id name
List<String> rows = sqlInterpreter.parseSQLJsonOutput("\nid\tname\n");
rows = sqlInterpreter.parseSQLJsonOutput("\nid\tname\n");
assertEquals(1, rows.size());
assertEquals("id\tname", rows.get(0));

Expand Down Expand Up @@ -274,5 +279,12 @@ void parseSQLJsonOutput() {
assertEquals("1\t1a", rows.get(1));
assertEquals("2\tみんく", rows.get(2));
assertEquals("3\t3a", rows.get(3));


rows = sqlInterpreter.parseSQLJsonOutput("\nid\tarray\tname\n"
+ "{\"id\":1,\"array\":[\"1a\",\"2a\"],\"name\":\"1b\"}\n");
assertEquals(2, rows.size());
assertEquals("id\tarray\tname", rows.get(0));
assertEquals("1\t[\"1a\",\"2a\"]\t1b", rows.get(1));
}
}

0 comments on commit ae7fe23

Please sign in to comment.