Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions java-bigquery-jdbc/tools/client/JDBCClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public static void main(String[] args) throws Exception {
String driverClass = "com.google.cloud.bigquery.jdbc.BigQueryDriver";
String action = null;
String query = null;
String queryFile = null;
boolean noOutput = false;
int generateRows = 0;
int generateCols = 5;
Expand Down Expand Up @@ -53,6 +54,7 @@ public static void main(String[] args) throws Exception {
case "driver-class": driverClass = val; break;
case "action": action = val; break;
case "query": query = val; break;
case "query-file": queryFile = val; break;
case "no-output": noOutput = true; break;
case "generate-rows": generateRows = Integer.parseInt(val); break;
case "generate-cols": generateCols = Integer.parseInt(val); break;
Expand Down Expand Up @@ -88,11 +90,19 @@ public static void main(String[] args) throws Exception {
System.out.println("Connection successful.\n");

if ("query".equals(action)) {
if (generateRows > 0) {
if (query == null && queryFile != null) {
try {
query = readQueryFromFile(queryFile);
} catch (Exception e) {
System.err.println("Error reading query from file: " + e.getMessage());
System.exit(1);
}
}
if (query == null && generateRows > 0) {
query = generateDataQuery(generateRows, generateCols);
}
if (query == null) {
System.err.println("Error: --query or --generate-rows is required when action is 'query'");
System.err.println("Error: --query, --query-file, or --generate-rows is required when action is 'query'");
System.exit(1);
}
Comment on lines 104 to 107

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

If the user provides an empty query string (e.g., --query "") or an empty query file, the client will proceed to execute it and fail with a less descriptive JDBC driver error. Adding a check for empty/blank queries improves usability.

Suggested change
if (query == null) {
System.err.println("Error: --query or --generate-rows is required when action is 'query'");
System.err.println("Error: --query, --query-file, or --generate-rows is required when action is 'query'");
System.exit(1);
}
if (query == null) {
System.err.println("Error: --query, --query-file, or --generate-rows is required when action is 'query'");
System.exit(1);
}
if (query.trim().isEmpty()) {
System.err.println("Error: Query cannot be empty");
System.exit(1);
}

warmup(conn);
Expand Down Expand Up @@ -122,6 +132,10 @@ private static void warmup(Connection conn) {
System.out.println("Warmup complete.\n");
}

private static String readQueryFromFile(String path) throws Exception {
return new String(java.nio.file.Files.readAllBytes(java.nio.file.Paths.get(path)), java.nio.charset.StandardCharsets.UTF_8);
}

private static String generateDataQuery(int rows, int cols) {
int N = (int) Math.ceil(Math.sqrt(rows));
String idxExpr = "(i - 1) * " + N + " + j";
Expand Down
12 changes: 10 additions & 2 deletions java-bigquery-jdbc/tools/client/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,20 @@ PARAMS = ProjectId=bigquery-devtools-drivers;OAuthType=0;OAuthServiceAcctEmail=;
# Additional connection parameters
EXTRA_PARAMS ?=

METHOD ?= getTables

ROWS ?= 10
COLS ?= 5
METHOD ?= getTables

OUTPUT ?= false
QUERY ?= SELECT 1
QUERY_FILE ?=

ifneq ($(QUERY_FILE),)
QUERY_FLAG = --query-file "$(QUERY_FILE)"
else
QUERY_FLAG = --query "$(QUERY)"
endif

ifeq ($(OUTPUT),false)
OUTPUT_FLAG = --no-output
Expand All @@ -53,7 +61,7 @@ COMMON_FLAGS = --url "$(URL);$(DEFAULT_PARAMS);$(PARAMS);$(EXTRA_PARAMS)" \
--driver-class "$(DRIVER_CLASS)"

query: classes
$(J) $(JFR_FLAGS) -cp .:$(DRIVER_JAR) JDBCClient --action query $(COMMON_FLAGS) --query "$(QUERY)" $(OUTPUT_FLAG) $(EXTRA_ARGS)
$(J) $(JFR_FLAGS) -cp .:$(DRIVER_JAR) JDBCClient --action query $(COMMON_FLAGS) $(QUERY_FLAG) $(OUTPUT_FLAG) $(EXTRA_ARGS)

query-generated: classes
$(J) $(JFR_FLAGS) -cp .:$(DRIVER_JAR) JDBCClient --action query $(COMMON_FLAGS) $(OUTPUT_FLAG) --generate-rows $(ROWS) --generate-cols $(COLS) $(EXTRA_ARGS)
Expand Down
15 changes: 14 additions & 1 deletion java-bigquery-jdbc/tools/perf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

# Defaults
ITERATIONS ?= 5
QUERY ?=
QUERY_FILE ?=
ROWS ?= 1000
COLS ?= 5
VERSION ?= $(shell sed -n 's/.*<version>\([^<]*\)<\/version>.*/\1/p' ../../pom.xml | head -n 1)
Expand All @@ -25,7 +27,18 @@ HTAPI_OPTS = EnableHighThroughputAPI=1;HighThroughputActivationRatio=0;HighThrou
RUN_PERF = python3 run_perf.py

# Common flags for run_perf.py
COMMON_FLAGS = -n $(ITERATIONS) --generate-rows $(ROWS) --generate-cols $(COLS) --jar1 $(JAR1) --class1 $(CLASS1)
COMMON_FLAGS = -n $(ITERATIONS) --jar1 $(JAR1) --class1 $(CLASS1)

ifeq ($(QUERY)$(QUERY_FILE),)
COMMON_FLAGS += --generate-rows $(ROWS) --generate-cols $(COLS)
else
ifneq ($(QUERY),)
COMMON_FLAGS += --query "$(QUERY)"
endif
ifneq ($(QUERY_FILE),)
COMMON_FLAGS += --query-file "$(QUERY_FILE)"
endif
endif

ifneq ($(JAR2),)
COMMON_FLAGS += --jar2 $(JAR2) --class2 $(CLASS2)
Expand Down
31 changes: 23 additions & 8 deletions java-bigquery-jdbc/tools/perf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,32 @@ The easiest way to run tests is using the provided `Makefile`. It defines target
The Makefile uses the following defaults which can be overridden:

- `ITERATIONS`: 5
- `ROWS`: 1000
- `COLS`: 5
- `ROWS`: 1000 (default, used if no query/query_file is specified)
- `COLS`: 5 (default, used if no query/query_file is specified)
- `QUERY`: Optional custom query to run
- `QUERY_FILE`: Optional path to a SQL file containing the query to run
- `JAR1`: `../../drivers/google-cloud-bigquery-jdbc-0.9.0-all.jar`
- `PROJECT_ID`: `bigquery-devtools-drivers`
- `CREDENTIALS`: Value of `$GOOGLE_APPLICATION_CREDENTIALS`

### Examples

#### Run REST API tests with defaults
#### Run REST API tests with defaults (generates 1000 rows, 5 columns)

```bash
make run-rest
```

#### Run HTAPI tests with custom iterations and rows
#### Run REST API tests with custom generated data size

```bash
make run-htapi ITERATIONS=3 ROWS=50000
make run-rest ROWS=50000 COLS=10
```

#### Run HTAPI tests with custom iterations and query

```bash
make run-htapi ITERATIONS=3 QUERY="SELECT * FROM my_dataset.my_table LIMIT 50000"
```

#### Compare two drivers
Expand All @@ -64,14 +72,21 @@ For more control, you can run `run_perf.py` directly.
- `--class1`: Class name for the first driver (default: `com.google.cloud.bigquery.jdbc.BigQueryDriver`).
- `--class2`: Class name for the second driver (default: `com.google.cloud.bigquery.jdbc.BigQueryDriver`).
- `-n`, `--iterations`: Number of iterations to run (default: 5).
- `--generate-rows`: Number of rows to generate via query (default: 0).
- `--generate-cols`: Number of columns to generate via query (default: 5).
- `--query`: A specific query to run (if not using generated data).
- `--query`: The query to run.
- `--query-file`: Path to a SQL file containing the query to run.
- `--generate-rows`: Number of rows to generate (default: 0, used if no query/query_file is specified).
- `--generate-cols`: Number of columns to generate (default: 5).
- `--output-md`: Append results as a markdown table to this file.
- `--filter-metrics`: Comma-separated list of metrics to include in the markdown table.

### Examples

#### Run a single driver with a custom query

```bash
python3 run_perf.py --url "jdbc:bigquery://https://www.googleapis.com/bigquery/v2:443;ProjectId=my-project;OAuthType=3" --jar1 path/to/driver.jar --query "SELECT * FROM my_dataset.my_table LIMIT 1000" -n 3
```

#### Run a single driver with generated data

```bash
Expand Down
39 changes: 27 additions & 12 deletions java-bigquery-jdbc/tools/perf/run_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# Base directory of the script
BASE_DIR = os.path.dirname(os.path.abspath(__file__))

def run_test(url, driver_jar, driver_class, query=None, generate_rows=0, generate_cols=5, no_output=True):
def run_test(url, driver_jar, driver_class, query=None, query_file=None, generate_rows=0, generate_cols=5, no_output=True):
# Base client folder is tools/client. Relative to tools/perf it is ../client.
client_dir = os.path.join(os.path.dirname(BASE_DIR), "client")

Expand All @@ -41,7 +41,9 @@ def run_test(url, driver_jar, driver_class, query=None, generate_rows=0, generat

if query:
cmd.extend(["--query", query])
if generate_rows > 0:
elif query_file:
cmd.extend(["--query-file", query_file])
elif generate_rows > 0:
cmd.extend(["--generate-rows", str(generate_rows)])
cmd.extend(["--generate-cols", str(generate_cols)])
if no_output:
Expand Down Expand Up @@ -227,26 +229,38 @@ def main():
parser.add_argument("--class1", default="com.google.cloud.bigquery.jdbc.BigQueryDriver", help="Class name for first driver")
parser.add_argument("--class2", default="com.google.cloud.bigquery.jdbc.BigQueryDriver", help="Class name for second driver")
parser.add_argument("-n", "--iterations", type=int, default=5, help="Number of iterations to run (default 5)")
parser.add_argument("--query", help="Query to run")
parser.add_argument("--query-file", help="Path to a SQL file containing the query to run")
parser.add_argument("--generate-rows", type=int, default=0, help="Number of rows to generate")
parser.add_argument("--generate-cols", type=int, default=5, help="Number of columns to generate")
parser.add_argument("--query", help="Query to run (if not using generated data)")
parser.add_argument("--output-md", help="Append markdown table to this file containing the results")
parser.add_argument("--filter-metrics", help="Comma-separated list of metrics to include in markdown tables")

args = parser.parse_args()

query = args.query
query_file = args.query_file
generate_rows = args.generate_rows
generate_cols = args.generate_cols
Comment on lines +241 to +244

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

To ensure that relative paths for --query-file are resolved correctly regardless of the working directory of the subprocess, and to avoid running multiple benchmark iterations when the file is missing, we should convert query_file to an absolute path and validate its existence upfront.

Suggested change
query = args.query
query_file = args.query_file
generate_rows = args.generate_rows
generate_cols = args.generate_cols
query = args.query
query_file = args.query_file
if query_file:
query_file = os.path.abspath(query_file)
if not os.path.isfile(query_file):
import sys
print(f"Error: Query file '{query_file}' does not exist.", file=sys.stderr)
sys.exit(1)
generate_rows = args.generate_rows
generate_cols = args.generate_cols


if not query and not query_file and generate_rows == 0:
generate_rows = 1000
generate_cols = 5

print("=" * 70)
print(f"JDBC Performance Runner")
print(f"URL : {args.url}")
print(f"Iterations : {args.iterations}")
print(f"Jar 1 : {args.jar1} ({args.class1})")
if args.jar2:
print(f"Jar 2 : {args.jar2} ({args.class2})")
if args.generate_rows > 0:
print(f"Generate Rows: {args.generate_rows}")
print(f"Generate Cols: {args.generate_cols}")
elif args.query:
print(f"Query : {args.query}")
if query:
print(f"Query : {query}")
elif query_file:
print(f"Query File : {query_file}")
elif generate_rows > 0:
print(f"Generate Rows: {generate_rows}")
print(f"Generate Cols: {generate_cols}")
print("=" * 70)

driver_results = {}
Expand All @@ -270,9 +284,10 @@ def main():
url=args.url,
driver_jar=driver_jar,
driver_class=driver_class,
query=args.query,
generate_rows=args.generate_rows,
generate_cols=args.generate_cols,
query=query,
query_file=query_file,
generate_rows=generate_rows,
generate_cols=generate_cols,
no_output=True
)
if res:
Expand All @@ -284,7 +299,7 @@ def main():
base_label=base_label,
new_label=new_label,
diff_label=diff_label,
spec_name=f"Rows: {args.generate_rows}, Cols: {args.generate_cols}" if args.generate_rows > 0 else args.query,
spec_name=query if query else (f"File: {query_file}" if query_file else f"Rows: {generate_rows}, Cols: {generate_cols}"),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Since query_file is now resolved to an absolute path, using the full path in spec_name can make the generated markdown table columns excessively wide. It is cleaner to use only the filename via os.path.basename.

Suggested change
spec_name=query if query else (f"File: {query_file}" if query_file else f"Rows: {generate_rows}, Cols: {generate_cols}"),
spec_name=query if query else (f"File: {os.path.basename(query_file)}" if query_file else f"Rows: {generate_rows}, Cols: {generate_cols}"),

output_md=args.output_md,
filter_metrics=args.filter_metrics
)
Expand Down
Loading