diff --git a/.gitignore b/.gitignore
index e43b0f9..effc315 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-.DS_Store
+*~
+/.metadata/
diff --git a/ql_demos/.gitignore b/ql_demos/.gitignore
new file mode 100644
index 0000000..c3ed10e
--- /dev/null
+++ b/ql_demos/.gitignore
@@ -0,0 +1 @@
+*.cache
diff --git a/ql_demos/cpp/.project b/ql_demos/cpp/.project
new file mode 100644
index 0000000..e9aa19d
--- /dev/null
+++ b/ql_demos/cpp/.project
@@ -0,0 +1,12 @@
+
+
+ ql-demos-cpp
+
+
+
+
+
+
+ com.semmle.plugin.qdt.core.qlnature
+
+
diff --git a/ql_demos/cpp/.qlpath b/ql_demos/cpp/.qlpath
new file mode 100644
index 0000000..61ec3c4
--- /dev/null
+++ b/ql_demos/cpp/.qlpath
@@ -0,0 +1,10 @@
+
+
+
+ com.semmle.code.cpp.library
+
+ com.semmle.code.cpp.dbscheme
+
+ cpp
+
+
diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/BadOverflowCheck.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/BadOverflowCheck.ql
new file mode 100644
index 0000000..35b393d
--- /dev/null
+++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/BadOverflowCheck.ql
@@ -0,0 +1,13 @@
+import cpp
+
+predicate isSmall(Expr e) {
+ e.getType().getSize() < 4
+}
+
+from AddExpr a, Variable v, RelationalOperation cmp
+where a.getAnOperand() = v.getAnAccess()
+ and cmp.getAnOperand() = a
+ and cmp.getAnOperand() = v.getAnAccess()
+ and forall(Expr op | op = a.getAnOperand() | isSmall(op))
+ and not isSmall(a.getExplicitlyConverted())
+select cmp, "Bad overflow check"
diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/README.md b/ql_demos/cpp/ChakraCore-bad-overflow-check/README.md
new file mode 100644
index 0000000..8097be5
--- /dev/null
+++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/README.md
@@ -0,0 +1,3 @@
+Use [this snapshot](https://downloads.lgtm.com/snapshots/cpp/microsoft/chakracore/ChakraCore-revision-2017-April-12--18-13-26.zip)
+
+We now also have this query in our default suite: https://lgtm.com/rules/2156560627/
diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql
new file mode 100644
index 0000000..6af7831
--- /dev/null
+++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/01_overflow_checks.ql
@@ -0,0 +1,12 @@
+import cpp
+
+/** Matches `var < var + ???`. */
+predicate overflowCheck(LocalScopeVariable var, AddExpr add, RelationalOperation compare) {
+ compare.getAnOperand() = var.getAnAccess() and
+ compare.getAnOperand() = add and
+ add.getAnOperand() = var.getAnAccess()
+}
+
+from LocalScopeVariable var, AddExpr add
+where overflowCheck(var, add, _)
+select add, "Overflow check on variable of type " + var.getUnderlyingType()
diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql
new file mode 100644
index 0000000..6a57367
--- /dev/null
+++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/02_var_size.ql
@@ -0,0 +1,13 @@
+import cpp
+
+/** Matches `var < var + ???`. */
+predicate overflowCheck(LocalScopeVariable var, AddExpr add, RelationalOperation compare) {
+ compare.getAnOperand() = var.getAnAccess() and
+ compare.getAnOperand() = add and
+ add.getAnOperand() = var.getAnAccess()
+}
+
+from LocalScopeVariable var, AddExpr add
+where overflowCheck(var, add, _)
+ and var.getType().getSize() < 4
+select add, "Overflow check on variable of type " + var.getUnderlyingType()
diff --git a/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql
new file mode 100644
index 0000000..d927ef1
--- /dev/null
+++ b/ql_demos/cpp/ChakraCore-bad-overflow-check/steps/03_bad_overflow_check.ql
@@ -0,0 +1,14 @@
+import cpp
+
+/** Matches `var < var + ???`. */
+predicate overflowCheck(LocalScopeVariable var, AddExpr add, RelationalOperation compare) {
+ compare.getAnOperand() = var.getAnAccess() and
+ compare.getAnOperand() = add and
+ add.getAnOperand() = var.getAnAccess()
+}
+
+from LocalScopeVariable var, AddExpr add
+where overflowCheck(var, add, _)
+ and var.getType().getSize() < 4
+ and not add.getConversion+().getType().getSize() < 4
+select add, "Bad overflow check on variable of type " + var.getUnderlyingType()
diff --git a/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql
new file mode 100644
index 0000000..9039d2f
--- /dev/null
+++ b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/FizzOverflow.ql
@@ -0,0 +1,57 @@
+/**
+ * @name Fizz Overflow
+ * @description Narrowing conversions on untrusted data could enable
+ * an attacker to trigger an integer overflow.
+ * @kind path-problem
+ * @problem.severity warning
+ */
+
+import cpp
+import semmle.code.cpp.ir.dataflow.TaintTracking
+import semmle.code.cpp.ir.IR
+import DataFlow::PathGraph
+
+/**
+ * The endianness conversion function `Endian::big()`.
+ * It is Folly's replacement for `ntohs` and `ntohl`.
+ */
+class EndianConvert extends Function {
+ EndianConvert() {
+ this.getName() = "big" and
+ this.getDeclaringType().getName().matches("Endian")
+ }
+}
+
+class Cfg extends TaintTracking::Configuration {
+ Cfg() { this = "FizzOverflowIR" }
+
+ /** Holds if `source` is a call to `Endian::big()`. */
+ override predicate isSource(DataFlow::Node source) {
+ source
+ .asInstruction()
+ .(CallInstruction)
+ .getCallTarget()
+ .(FunctionInstruction)
+ .getFunctionSymbol() instanceof EndianConvert
+ }
+
+ /** Holds if `sink` is a narrowing conversion. */
+ override predicate isSink(DataFlow::Node sink) {
+ sink.asInstruction().getResultSize() < sink
+ .asInstruction()
+ .(ConvertInstruction)
+ .getUnary()
+ .getResultSize()
+ }
+}
+
+from
+ Cfg cfg, DataFlow::PathNode source, DataFlow::PathNode sink, ConvertInstruction conv,
+ Type inputType, Type outputType
+where
+ cfg.hasFlowPath(source, sink) and
+ conv = sink.getNode().asInstruction() and
+ inputType = conv.getUnary().getResultType() and
+ outputType = conv.getResultType()
+select sink, source, sink,
+ "Conversion of untrusted data from " + inputType + " to " + outputType + "."
diff --git a/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md
new file mode 100644
index 0000000..f930223
--- /dev/null
+++ b/ql_demos/cpp/Facebook_Fizz_CVE-2019-3560/README.md
@@ -0,0 +1,5 @@
+# Facebook Fizz integer overflow vulnerability (CVE-2019-3560)
+
+Use [this snapshot](https://downloads.lgtm.com/snapshots/cpp/facebook/fizz/facebookincubator_fizz_cpp-srcVersion_c69ad1baf3f04620393ebadc3eedd130b74f4023-dist_odasa-lgtm-2019-01-13-f9dca2a-universal.zip) for the demo.
+
+[Fizz](https://github.com/facebookincubator/fizz) contained a remotely triggerable infinite loop. For more details about the bug, see this [blog post](https://lgtm.com/blog/facebook_fizz_CVE-2019-3560). A proof-of-concept exploit is available [here](https://github.com/Semmle/SecurityExploits/tree/446048470633bf0f8da9570d008d056dbaa28ea9/Facebook/Fizz/CVE-2019-3560).
diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql
new file mode 100644
index 0000000..bb97da8
--- /dev/null
+++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/00_copy_from_user.ql
@@ -0,0 +1,12 @@
+/**
+ * @name Calls to copy_from_user
+ * @description Find all calls to copy_from_user.
+ */
+
+import cpp
+
+// This first query is essentially equivalent to `grep -r copy_from_user`.
+// It has almost 1300 results.
+from FunctionCall call
+where call.getTarget().getName() = "copy_from_user"
+select call
diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql
new file mode 100644
index 0000000..78cf5da
--- /dev/null
+++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/01_copy_from_user_annotated.ql
@@ -0,0 +1,25 @@
+/**
+ * @name Annotate with types and bounds
+ * @description Find all calls to copy_from_user and annotates them with their
+ * type and inferred size bounds.
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+// Let's add some extra columns, so that we can see a bit more information
+// about the calls to copy_from_user.
+//
+// This shows that there are two fairly common patterns:
+// 1. copy_from_user into a statically sized buffer, and the
+// upper bound of `sizeArg` shows that it is safe.
+// 2. copy_from_user into a buffer that was allocated with kzalloc,
+// and the size argument of the kzalloc is the same as the
+// size argument of copy_from_user. These calls are safe.
+from FunctionCall call, Expr destArg, Expr sizeArg
+where
+ call.getTarget().getName() = "copy_from_user" and
+ destArg = call.getArgument(0) and
+ sizeArg = call.getArgument(2)
+select call, destArg.getType(), lowerBound(sizeArg), upperBound(sizeArg),
+ call.getFile().getRelativePath()
diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql
new file mode 100644
index 0000000..04f23a7
--- /dev/null
+++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/02_filter_with_upperbound.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Filter with upper bound
+ * @description This query excludes results that are safe because the upper
+ * bound of the size argument is less than or equal to the size of
+ * the destination variable.
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+// Let's exclude filter out results that look like this:
+//
+// ```
+// struct MyStruct s;
+// copy_from_user(&s, usrptr, sizeof(s));
+// ```
+from FunctionCall call, Expr destArg, Expr sizeArg
+where
+ call.getTarget().getName() = "copy_from_user" and
+ destArg = call.getArgument(0) and
+ sizeArg = call.getArgument(2) and
+ not destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg)
+select call, destArg.getType(), lowerBound(sizeArg), upperBound(sizeArg),
+ call.getFile().getRelativePath()
diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql
new file mode 100644
index 0000000..1fb3bab
--- /dev/null
+++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/03_filter_with_upperbound.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Filter with upper bound, also for arrays
+ * @description This query excludes results that are safe because the upper
+ * bound of the size argument is less than or equal to the size of
+ * the destination variable or array.
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+// It turns out that the filter in the previous query does
+// not work for array types, so let's add a second filter which
+// excludes examples like this:
+//
+// ```
+// struct MyStruct s[2];
+// copy_from_user(s, usrptr, sizeof(s));
+// ```
+from FunctionCall call, Expr destArg, Expr sizeArg
+where
+ call.getTarget().getName() = "copy_from_user" and
+ destArg = call.getArgument(0) and
+ sizeArg = call.getArgument(2) and
+ not destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg) and
+ not destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg)
+select call, destArg.getType(), lowerBound(sizeArg), upperBound(sizeArg),
+ call.getFile().getRelativePath()
diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql
new file mode 100644
index 0000000..ede596a
--- /dev/null
+++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/04_safe_malloc.ql
@@ -0,0 +1,34 @@
+/**
+ * @name kzalloc only
+ * @description If the copy_from_user is preceded by a kzalloc of the correct
+ * size, then it is safe. To demonstrate, find only those results.
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.dataflow.DataFlow
+
+// Let's see if we can detect this pattern:
+//
+// ```
+// buf = kzalloc(size, GFP_KERNEL);
+// ...
+// copy_from_user(buf, usrptr, size);
+// ```
+//
+// In the next query, we'll use `safe_malloc` to filter those
+// calls out, because they are safe.
+predicate safe_malloc(FunctionCall allocCall, FunctionCall copy_from_user) {
+ exists(DataFlow::Node source, DataFlow::Node sink |
+ allocCall.getTarget().getName() = "kzalloc" and
+ copy_from_user.getTarget().getName() = "copy_from_user" and
+ source.asExpr() = allocCall and
+ sink.asExpr() = copy_from_user.getArgument(0) and
+ DataFlow::localFlow(source, sink) and
+ globalValueNumber(allocCall.getArgument(0)) = globalValueNumber(copy_from_user.getArgument(2))
+ )
+}
+
+from FunctionCall allocCall, FunctionCall copy_from_user
+where safe_malloc(allocCall, copy_from_user)
+select allocCall, copy_from_user
diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql
new file mode 100644
index 0000000..eccddfa
--- /dev/null
+++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/05_filter_with_upperbound_and_safe_malloc.ql
@@ -0,0 +1,37 @@
+/**
+ * @name Filter with upper bound and kzalloc
+ * @description This query excludes results that are safe because the upper
+ * bound of the size argument is less than or equal to the size of
+ * the destination variable or array. It also excludes results
+ * that are safe because the right amount of memory was allocated
+ * with kzalloc.
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.dataflow.DataFlow
+
+// We wrote this predicate in the previous query.
+predicate safe_malloc(FunctionCall allocCall, FunctionCall copy_from_user) {
+ exists(DataFlow::Node source, DataFlow::Node sink |
+ allocCall.getTarget().getName() = "kzalloc" and
+ copy_from_user.getTarget().getName() = "copy_from_user" and
+ source.asExpr() = allocCall and
+ sink.asExpr() = copy_from_user.getArgument(0) and
+ DataFlow::localFlow(source, sink) and
+ globalValueNumber(allocCall.getArgument(0)) = globalValueNumber(copy_from_user.getArgument(2))
+ )
+}
+
+// Add a filter to remove results that match the `safe_malloc` pattern.
+from FunctionCall call, Expr destArg, Expr sizeArg
+where
+ call.getTarget().getName() = "copy_from_user" and
+ destArg = call.getArgument(0) and
+ sizeArg = call.getArgument(2) and
+ not destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg) and
+ not destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg) and
+ not safe_malloc(_, call)
+select call, destArg.getType(), lowerBound(sizeArg), upperBound(sizeArg),
+ call.getFile().getRelativePath()
diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql
new file mode 100644
index 0000000..5d62091
--- /dev/null
+++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/06_stackaddress_dataflow.ql
@@ -0,0 +1,43 @@
+/**
+ * @name Data flow from stack variable address
+ * @description This restricts results to those that are most likely to be
+ * dangerous: copying directly into a stack variable.
+ * @kind path-problem
+ * @problem.severity warning
+ * @id demo/msm/06-stackaddress-dataflow
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import semmle.code.cpp.dataflow.DataFlow
+import DataFlow::PathGraph
+
+class Config extends DataFlow::Configuration {
+ Config() { this = "copy_from_user" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(LocalVariable v | source.asExpr().(AddressOfExpr).getOperand() = v.getAnAccess())
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ // This is the logic that was previously in the select clause of the query.
+ exists(FunctionCall call, Expr destArg, Expr sizeArg |
+ call.getTarget().getName() = "copy_from_user" and
+ destArg = sink.asExpr() and
+ destArg = call.getArgument(0) and
+ sizeArg = call.getArgument(2) and
+ not destArg.getType().(PointerType).getBaseType().getSize() >= upperBound(sizeArg) and
+ not destArg.getType().(ArrayType).getSize() >= upperBound(sizeArg)
+ )
+ }
+}
+
+// This query looks specifically for cases where the address of a local
+// variable is used as the target address of the `copy_from_user`. It also
+// uses the DataFlow library, so that you can use the path viewer to see
+// where the stack address comes from.
+//
+// The vulnerabilities are the final two results in `msm_cpp.c`.
+from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink, source, sink, "possibly unsafe copy_from_user"
diff --git a/ql_demos/cpp/Qualcomm-MSM-copy_from_user/README.md b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/README.md
new file mode 100644
index 0000000..545706d
--- /dev/null
+++ b/ql_demos/cpp/Qualcomm-MSM-copy_from_user/README.md
@@ -0,0 +1,5 @@
+[Blog post](https://lgtm.com/blog/qualcomm_copy_from_user)
+
+[Snapshot for this demo](https://downloads.lgtm.com/snapshots/cpp/qualcomm/msm/msm-4.4-revision-2017-May-07--08-33-56.zip)
+
+The blog post was written before we had the C++ dataflow library, so these demo queries are a bit different than the blog post.
diff --git a/ql_demos/cpp/README.md b/ql_demos/cpp/README.md
new file mode 100644
index 0000000..47dd67a
--- /dev/null
+++ b/ql_demos/cpp/README.md
@@ -0,0 +1 @@
+[snapshot](https://downloads.lgtm.com/snapshots/cpp/libssh2/libssh2_libssh2_C_C++_38bf7ce.zip)
diff --git a/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/DTraceUnsafeIndex.ql b/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/DTraceUnsafeIndex.ql
new file mode 100644
index 0000000..73b2f21
--- /dev/null
+++ b/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/DTraceUnsafeIndex.ql
@@ -0,0 +1,52 @@
+/**
+ * @name DTrace unsafe index
+ * @description DTrace registers are user-controllable, so they must not be
+ * used to index an array without a bounds check.
+ * @kind path-problem
+ * @problem.severity warning
+ * @id apple-xnu/cpp/dtrace-unsafe-index
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import DataFlow::PathGraph
+
+class RegisterAccess extends ArrayExpr {
+ RegisterAccess() {
+ exists (LocalScopeVariable regs, Function emulate
+ | regs.getName() = "regs" and
+ emulate.getName() = "dtrace_dif_emulate" and
+ regs.getFunction() = emulate and
+ this.getArrayBase() = regs.getAnAccess())
+ }
+}
+
+class PointerUse extends Expr {
+ PointerUse() {
+ exists (ArrayExpr ae | this = ae.getArrayOffset()) or
+ exists (PointerDereferenceExpr deref | this = deref.getOperand()) or
+ exists (PointerAddExpr add | this = add.getAnOperand())
+ }
+}
+
+class DTraceUnsafeIndexConfig extends DataFlow::Configuration {
+ DTraceUnsafeIndexConfig() {
+ this = "DTraceUnsafeIndexConfig"
+ }
+
+ override predicate isSource(DataFlow::Node node) {
+ node.asExpr() instanceof RegisterAccess
+ }
+
+ override predicate isSink(DataFlow::Node node) {
+ node.asExpr() instanceof PointerUse
+ }
+}
+
+from DTraceUnsafeIndexConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink, source, sink, "DTrace unsafe index"
+
+/*
+ * This query has 16 results. The 16th result is the vulnerability: dtrace_isa.c:817
+ */
diff --git a/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/README.md b/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/README.md
new file mode 100644
index 0000000..b9f2ed7
--- /dev/null
+++ b/ql_demos/cpp/XNU_DTrace_CVE-2017-13782/README.md
@@ -0,0 +1,5 @@
+[Blog post](https://lgtm.com/blog/apple_xnu_dtrace_CVE-2017-13782)
+
+Bug was fixed in [macOS High Sierra 10.13.1](https://support.apple.com/en-us/HT208221).
+
+[This snapshot](https://downloads.lgtm.com/snapshots/cpp/apple/xnu/XNU-revision-2017-June-13--15-52-38.zip) (macOS 10.13) has the bug.
diff --git a/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql
new file mode 100644
index 0000000..94ae60f
--- /dev/null
+++ b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/BCopyNegativeSize.ql
@@ -0,0 +1,34 @@
+/**
+ * @name bcopy with negative size
+ * @description Calling bcopy with a negative size argument will crash the
+ * kernel due to a negative integer overflow.
+ * @kind path-problem
+ * @problem.severity warning
+ * @id apple-xnu/cpp/bcopy-negative-size
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import DataFlow::PathGraph
+
+class MyCfg extends TaintTracking::Configuration {
+ MyCfg() {
+ this = "MyCfg"
+ }
+
+ override predicate isSource(DataFlow::Node source) {
+ source.asExpr().(FunctionCall).getTarget().getName() = "mbuf_data"
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists (FunctionCall call
+ | sink.asExpr() = call.getArgument(2) and
+ call.getTarget().getName() = "__builtin___memmove_chk" and
+ lowerBound(sink.asExpr()) < 0)
+ }
+}
+
+from DataFlow::PathNode sink, DataFlow::PathNode source, MyCfg cfg
+where cfg.hasFlowPath(source, sink)
+select sink, source, sink, "The size argument of bcopy might be negative."
diff --git a/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/README.md b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/README.md
new file mode 100644
index 0000000..7ca34fd
--- /dev/null
+++ b/ql_demos/cpp/XNU_NFS_Boot_CVE-2018-4136_CVE-2018-4160/README.md
@@ -0,0 +1,5 @@
+[Blog post](https://lgtm.com/blog/apple_xnu_nfs_boot_CVE-2018-4136_CVE-2018-4160)
+
+Bug was fixed in [macOS High Sierra 10.13.4](https://support.apple.com/en-gb/HT208692).
+
+[This snapshot](https://downloads.lgtm.com/snapshots/cpp/apple/xnu/xnu-4570.41.2_macOS-10.13.3_Semmle-1.16.1.zip) has the bug.
diff --git a/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/00_mbuf_copydata_tainted_size.ql b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/00_mbuf_copydata_tainted_size.ql
new file mode 100644
index 0000000..b34679d
--- /dev/null
+++ b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/00_mbuf_copydata_tainted_size.ql
@@ -0,0 +1,43 @@
+/**
+ * @name 00 mbuf copydata with tainted size
+ * @description Calling m_copydata with an untrusted size argument
+ * could cause a buffer overflow.
+ * @kind path-problem
+ * @problem.severity warning
+ * @id apple-xnu/cpp/mbuf-copydata-with-tainted-size
+ */
+
+/*
+ * This query is explained in detail in this blog post:
+ *
+ * https://lgtm.com/blog/apple_xnu_icmp_error_CVE-2018-4407
+ *
+ * It is based on the assumption that the function `m_mtod`, which returns
+ * a pointer to the data stored in an `mbuf`, often returns a buffer
+ * containing untrusted data.
+ *
+ * The query has multiple results. The interesting result is the one in
+ * `ip_icmp.c`.
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+import DataFlow::PathGraph
+
+class Config extends TaintTracking::Configuration {
+ Config() { this = "mbuf copydata with tainted size" }
+
+ override predicate isSource(DataFlow::Node source) {
+ source.asExpr().(FunctionCall).getTarget().getName() = "m_mtod"
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists (FunctionCall call
+ | call.getArgument(2) = sink.asExpr() and
+ call.getTarget().getName().matches("%copydata"))
+ }
+}
+
+from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink, source, sink, "m_copydata with tainted size."
diff --git a/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/01_paths_to_icmp_error.ql b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/01_paths_to_icmp_error.ql
new file mode 100644
index 0000000..9514ed1
--- /dev/null
+++ b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/01_paths_to_icmp_error.ql
@@ -0,0 +1,40 @@
+/**
+ * @name 01 Paths from ip_input to icmp_error
+ * @description Find data-flow paths that lead from ip_input to the first parameter of icmp_error.
+ * @kind path-problem
+ * @problem.severity warning
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import DataFlow::PathGraph
+
+/*
+ * The previous query, 00_mbuf_copydata_tainted_size.ql, discovered some
+ * dodgy looking code in `icmp_error`. But is it exploitable? To find out
+ * we need to figure if the zero'th parameter of `icmp_error`, an `mbuf`
+ * named `n`, is attacker-controllable.
+ *
+ * This initial query looks for expression that flows to parameter `n`.
+ */
+
+class Config extends DataFlow::Configuration {
+ Config() { this = "Paths from ip_input to icmp_error" }
+
+ override predicate isSource(DataFlow::Node source) {
+ // Any expression is a valid source.
+ exists (source.asExpr())
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ // The sink is the zero'th parameter of `icmp_error`: `struct mbuf *n`.
+ exists (Parameter p
+ | p = sink.asParameter() and
+ p.getFunction().getName() = "icmp_error" and
+ p.getIndex() = 0)
+ }
+}
+
+from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select source, source, sink, "Expression flows to icmp_error."
diff --git a/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/02_paths_to_icmp_error.ql b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/02_paths_to_icmp_error.ql
new file mode 100644
index 0000000..2ecb5e1
--- /dev/null
+++ b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/02_paths_to_icmp_error.ql
@@ -0,0 +1,39 @@
+/**
+ * @name 02 Paths from ip_input to icmp_error
+ * @description Find data-flow paths that lead from ip_input to the first parameter of icmp_error.
+ * @kind path-problem
+ * @problem.severity warning
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import DataFlow::PathGraph
+
+/*
+ * The previous iteration of this query found every expression that flows
+ * to parameter `n` of `icmp_error`. The most interesting looking results
+ * were the ones that started in the function `ip_input` because that is
+ * where incoming IP packets are handled. So we restrict `isSource` to only
+ * expressions from `ip_input`.
+ */
+
+class Config extends DataFlow::Configuration {
+ Config() { this = "Paths from ip_input to icmp_error" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists (source.asExpr()) and
+ source.getFunction().getName() = "ip_input"
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ // The sink is the zero'th parameter of `icmp_error`: `struct mbuf *n`.
+ exists (Parameter p
+ | p = sink.asParameter() and
+ p.getFunction().getName() = "icmp_error" and
+ p.getIndex() = 0)
+ }
+}
+
+from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select source, source, sink, "Expression flows to icmp_error."
diff --git a/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/03_paths_to_icmp_error.ql b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/03_paths_to_icmp_error.ql
new file mode 100644
index 0000000..a4c3bb9
--- /dev/null
+++ b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/03_paths_to_icmp_error.ql
@@ -0,0 +1,44 @@
+/**
+ * @name 03 Paths from ip_input to icmp_error
+ * @description Find data-flow paths that lead from ip_input to the first parameter of icmp_error.
+ * @kind path-problem
+ * @problem.severity warning
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import DataFlow::PathGraph
+
+/*
+ * The previous query found many dataflow paths that went via the function
+ * `ip_forward`. Those paths look a little complicated, so lets see if we
+ * can find something simpler. In this query, we use the `isBarrier` method
+ * to exclude any path that goes via `ip_forward`. This reduces the number
+ * of results to a much smaller number and we quickly find that there is an
+ * easy exploitation path via the function `ip_dooptions`.
+ */
+
+class Config extends DataFlow::Configuration {
+ Config() { this = "Paths from ip_input to icmp_error" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists (source.asExpr()) and
+ source.getFunction().getName() = "ip_input"
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ // The sink is the zero'th parameter of `icmp_error`: `struct mbuf *n`.
+ exists (Parameter p
+ | p = sink.asParameter() and
+ p.getFunction().getName() = "icmp_error" and
+ p.getIndex() = 0)
+ }
+
+ override predicate isBarrier(DataFlow::Node node) {
+ node.getFunction().getName() = "ip_forward"
+ }
+}
+
+from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select source, source, sink, "Expression flows to icmp_error."
diff --git a/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/README.md b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/README.md
new file mode 100644
index 0000000..cae2e9c
--- /dev/null
+++ b/ql_demos/cpp/XNU_icmp_error_CVE-2018-4407/README.md
@@ -0,0 +1,5 @@
+# Apple XNU icmp_error CVE-2018-4407
+
+Use [this snapshot](https://downloads.lgtm.com/snapshots/cpp/apple/xnu/xnu-4570.71.2_macOS-10.13.6_Semmle-1.18.0.zip) for the demo.
+
+There are two parts to this demo. The first part is `00_mbuf_copydata_tainted_size.ql`, which is the dataflow query that found the bug. It is explained in detail in [this blog post](https://lgtm.com/blog/apple_xnu_icmp_error_CVE-2018-4407). The problem with this query is that it does not find the true source of the untrusted data. This is because it assumes that any call to the function named `m_mtod` can return untrusted data. But not every `mbuf` contains untrusted data. So the second part of the demo, corresponding to [this blog post](https://lgtm.com/blog/apple_xnu_icmp_nfs_pocs), is to use dataflow analysis to find a path that gets an untrusted `mbuf` into `icmp_error`. The second part of the demo is developed in steps, starting with `01_paths_to_icmp_error.ql`.
diff --git a/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/ArrayIndexMightOverflow.ql b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/ArrayIndexMightOverflow.ql
new file mode 100644
index 0000000..94299c7
--- /dev/null
+++ b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/ArrayIndexMightOverflow.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Array index might overflow
+ * @description An array indexing expression of the form
+ * x[i+j] could cause an out-of-bounds write.
+ * @kind problem
+ * @problem.severity warning
+ * @id apple-xnu/cpp/array-index-might-overflow
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+// Find an assignment like this: x[i+j] = v
+from ArrayExpr ae, BinaryArithmeticOperation idx, Assignment assign
+where ae = assign.getLValue()
+ and idx = ae.getArrayOffset()
+ and convertedExprMightOverflow(idx)
+select idx, "Array index might overflow"
diff --git a/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/InfiniteLoop.ql b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/InfiniteLoop.ql
new file mode 100644
index 0000000..cd78f5b
--- /dev/null
+++ b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/InfiniteLoop.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Infinite loop
+ * @description Updating a loop index with a compound assignment
+ * could cause non-termination.
+ * @kind problem
+ * @problem.severity warning
+ * @id apple-xnu/cpp/infinite-loop
+ */
+
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+// Find loops like this:
+// while (x) { ...; x -= n; }
+from Loop loop, Variable v, AssignArithmeticOperation assign
+where (loop.getCondition() = v.getAnAccess() or
+ loop.getCondition().(ComparisonOperation).getAnOperand() = v.getAnAccess())
+ and assign.getLValue() = v.getAnAccess()
+ // Compound assignment is in the body of the loop:
+ and assign = loop.getStmt().getAChild*()
+ and lowerBound(assign.getRValue()) <= 0
+ and upperBound(assign.getRValue()) >= 0
+select loop, "Loop might not terminate due to this $@.", assign, "assignment"
diff --git a/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/README.md b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/README.md
new file mode 100644
index 0000000..58bc6be
--- /dev/null
+++ b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/README.md
@@ -0,0 +1,13 @@
+https://lgtm.com/blog/apple_xnu_packet_mangler_CVE-2017-13904
+
+There were multiple bugs in `packet_mangler.c`. One of the infinite loop bugs was fixed in macOS High Sierra 10.13.2. The other bugs were fixed in macOS High Sierra 10.13.5.
+
+For a demo, the best query to show is `tcphdr_mbuf_copydata.ql`, because it shows uses taint tracking to show the stack buffer overflow.
+
+`ArrayIndexMightOverflow.ql` is a simplified version of the query that originally led us to look at this code. It looks for array indices that might be negative.
+
+`InfiniteLoop.ql` is a query inspired by one of the bugs in this code: the loop might not terminate because the loop counter is updated with a compound assignment (`+=`). We wrote an exploit which causes the right hand side of the assignment to be zero, which means that the loop runs forever.
+
+All three queries find results in [this snapshot](https://downloads.lgtm.com/snapshots/cpp/apple/xnu/XNU-revision-2017-June-13--15-52-38.zip) (macOS 10.13).
+
+The queries also find results in [this newer snapshot for 10.13.3](https://downloads.lgtm.com/snapshots/cpp/apple/xnu/xnu-4570.41.2_macOS-10.13.3_Semmle-1.16.1.zip). Apple thought they had fixed the infinite loop bug in 10.13.2, by changing the loop condition to a `>`. They were wrong.
diff --git a/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/tcphdr_mbuf_copydata.ql b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/tcphdr_mbuf_copydata.ql
new file mode 100644
index 0000000..bbf401c
--- /dev/null
+++ b/ql_demos/cpp/XNU_packet-mangler_CVE-2018-4249/tcphdr_mbuf_copydata.ql
@@ -0,0 +1,32 @@
+/**
+ * @name tcphdr flow to mbuf_copydata
+ * @description Expressions of type tcphdr usually contain values that can
+ * be controlled by an attacker. Therefore, it is dangerous to
+ * use any of those values as the size argument of
+ * mbuf_copydata.
+ * @kind path-problem
+ * @problem.severity warning
+ * @id apple-xnu/cpp/tcphdr_mbuf_copydata
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+import DataFlow::PathGraph
+
+class Config extends TaintTracking::Configuration {
+ Config() { this = "tcphdr_flow" }
+
+ override predicate isSource(DataFlow::Node source) {
+ source.asExpr().getType().stripType().getName() = "tcphdr"
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists (FunctionCall call
+ | call.getArgument(2) = sink.asExpr() and
+ call.getTarget().getName() = "mbuf_copydata")
+ }
+}
+
+from Config cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink, source, sink, "tcp"
diff --git a/ql_demos/cpp/libjpeg-turbo-oob/01_find_colormap_index.ql b/ql_demos/cpp/libjpeg-turbo-oob/01_find_colormap_index.ql
new file mode 100644
index 0000000..d10ec56
--- /dev/null
+++ b/ql_demos/cpp/libjpeg-turbo-oob/01_find_colormap_index.ql
@@ -0,0 +1,8 @@
+import cpp
+
+// Find expressions of the form `base->colormap[i][j]`
+
+from ArrayExpr outer, ArrayExpr inner
+where inner = outer.getArrayBase() and
+ inner.getArrayBase().(FieldAccess).getTarget().getName() = "colormap"
+select outer, "Indexing into colormap."
diff --git a/ql_demos/cpp/libjpeg-turbo-oob/02a_find_guarded_colormap_index.ql b/ql_demos/cpp/libjpeg-turbo-oob/02a_find_guarded_colormap_index.ql
new file mode 100644
index 0000000..cc5f1eb
--- /dev/null
+++ b/ql_demos/cpp/libjpeg-turbo-oob/02a_find_guarded_colormap_index.ql
@@ -0,0 +1,30 @@
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.controlflow.Guards
+import Options
+
+// Find expressions of the form `base->colormap[i][j]` where
+// `j` is checked against `base->cmap_length`
+
+/**
+ * Gets an expression of the form `base->fieldName`, where `base`
+ * is of type `_bmp_source_struct`.
+ */
+Expr bmpSourceStructField(GVN base, string fieldName) {
+ exists (FieldAccess fa |
+ fa.getTarget().getName() = fieldName and
+ fa.getTarget().getDeclaringType().hasName("_bmp_source_struct") and
+ base = globalValueNumber(fa.getQualifier()) and
+ globalValueNumber(result) = globalValueNumber(fa)
+ )
+}
+
+from ArrayExpr outer, ArrayExpr inner, GVN base, GVN index
+where inner = outer.getArrayBase() and
+ inner.getArrayBase() = bmpSourceStructField(base, "colormap") and
+ outer.getArrayOffset() = index.getAnExpr() and
+ exists (GuardCondition gc, Expr bound |
+ bound = bmpSourceStructField(base, "cmap_length") and
+ gc.ensuresLt(index.getAnExpr(), bound, 0, inner.getBasicBlock(), true)
+ )
+select outer, "Guarded indexing into colormap."
diff --git a/ql_demos/cpp/libjpeg-turbo-oob/02b_find_guarded_colormap_index_working.ql b/ql_demos/cpp/libjpeg-turbo-oob/02b_find_guarded_colormap_index_working.ql
new file mode 100644
index 0000000..d61d3b2
--- /dev/null
+++ b/ql_demos/cpp/libjpeg-turbo-oob/02b_find_guarded_colormap_index_working.ql
@@ -0,0 +1,40 @@
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.controlflow.Guards
+import Options
+
+// Find expressions of the form `base->colormap[i][j]` where
+// `j` is checked against `base->cmap_length`
+
+/** Recognize `error_exit` as a non-returning function. */
+class JpegTurboCustomOptions extends CustomOptions {
+ override predicate exprExits(Expr e) {
+ super.exprExits(e) or
+ exists (ExprCall c | e = c |
+ c.getExpr().(PointerDereferenceExpr).getOperand().(FieldAccess).getTarget().hasName("error_exit")
+ )
+ }
+}
+
+/**
+ * Gets an expression of the form `base->fieldName`, where `base`
+ * is of type `_bmp_source_struct`.
+ */
+Expr bmpSourceStructField(GVN base, string fieldName) {
+ exists (FieldAccess fa |
+ fa.getTarget().getName() = fieldName and
+ fa.getTarget().getDeclaringType().hasName("_bmp_source_struct") and
+ base = globalValueNumber(fa.getQualifier()) and
+ globalValueNumber(result) = globalValueNumber(fa)
+ )
+}
+
+from ArrayExpr outer, ArrayExpr inner, GVN base, GVN index
+where inner = outer.getArrayBase() and
+ inner.getArrayBase() = bmpSourceStructField(base, "colormap") and
+ outer.getArrayOffset() = index.getAnExpr() and
+ exists (GuardCondition gc, Expr bound |
+ bound = bmpSourceStructField(base, "cmap_length") and
+ gc.ensuresLt(index.getAnExpr(), bound, 0, inner.getBasicBlock(), true)
+ )
+select outer, "Guarded indexing into colormap."
diff --git a/ql_demos/cpp/libjpeg-turbo-oob/03_find_unguarded_colormap_index.ql b/ql_demos/cpp/libjpeg-turbo-oob/03_find_unguarded_colormap_index.ql
new file mode 100644
index 0000000..10b35d1
--- /dev/null
+++ b/ql_demos/cpp/libjpeg-turbo-oob/03_find_unguarded_colormap_index.ql
@@ -0,0 +1,46 @@
+/**
+ * @problem.severity error
+ * @kind problem
+ * @id libjpeg/unguarded-colormap-index
+ */
+
+import cpp
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.controlflow.Guards
+import Options
+
+// Find expressions of the form `base->colormap[i][j]` where
+// `j` is checked against `base->cmap_length`
+
+/** Recognize `error_exit` as a non-returning function. */
+class JpegTurboCustomOptions extends CustomOptions {
+ override predicate exprExits(Expr e) {
+ super.exprExits(e) or
+ exists (ExprCall c | e = c |
+ c.getExpr().(PointerDereferenceExpr).getOperand().(FieldAccess).getTarget().hasName("error_exit")
+ )
+ }
+}
+
+/**
+ * Gets an expression of the form `base->fieldName`, where `base`
+ * is of type `_bmp_source_struct`.
+ */
+Expr bmpSourceStructField(GVN base, string fieldName) {
+ exists (FieldAccess fa |
+ fa.getTarget().getName() = fieldName and
+ fa.getTarget().getDeclaringType().hasName("_bmp_source_struct") and
+ base = globalValueNumber(fa.getQualifier()) and
+ globalValueNumber(result) = globalValueNumber(fa)
+ )
+}
+
+from ArrayExpr outer, ArrayExpr inner, GVN base, GVN index
+where inner = outer.getArrayBase() and
+ inner.getArrayBase() = bmpSourceStructField(base, "colormap") and
+ outer.getArrayOffset() = index.getAnExpr() and
+ not exists (GuardCondition gc, Expr bound |
+ bound = bmpSourceStructField(base, "cmap_length") and
+ gc.ensuresLt(index.getAnExpr(), bound, 0, inner.getBasicBlock(), true)
+ )
+select outer, "Unguarded indexing into colormap."
diff --git a/ql_demos/cpp/libjpeg-turbo-oob/04_find_unguarded_colormap_no_fps.ql b/ql_demos/cpp/libjpeg-turbo-oob/04_find_unguarded_colormap_no_fps.ql
new file mode 100644
index 0000000..dd631f2
--- /dev/null
+++ b/ql_demos/cpp/libjpeg-turbo-oob/04_find_unguarded_colormap_no_fps.ql
@@ -0,0 +1,130 @@
+/**
+ * @problem.severity error
+ * @kind problem
+ * @id libjpeg/unguarded-colormap-index
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.controlflow.Guards
+import Options
+
+// Find expressions of the form `base->colormap[i][j]` where
+// `j` is checked against `base->cmap_length`
+
+/** Recognize `error_exit` as a non-returning function. */
+class JpegTurboCustomOptions extends CustomOptions {
+ override predicate exprExits(Expr e) {
+ super.exprExits(e) or
+ exists (ExprCall c | e = c |
+ c.getExpr().(PointerDereferenceExpr).getOperand().(FieldAccess).getTarget().hasName("error_exit")
+ )
+ }
+}
+
+/**
+ * Gets an expression of the form `base->fieldName`, where `base`
+ * is of type `_bmp_source_struct`.
+ */
+Expr bmpSourceStructField(GVN base, string fieldName) {
+ exists (FieldAccess fa |
+ fa.getTarget().getName() = fieldName and
+ fa.getTarget().getDeclaringType().hasName("_bmp_source_struct") and
+ base = globalValueNumber(fa.getQualifier()) and
+ globalValueNumber(result) = globalValueNumber(fa)
+ )
+}
+
+/**
+ * Holds if `assgn` allocates a new colour map of the given `size` and
+ * stores it into `base->colormap`.
+ */
+predicate colormapInit(AssignExpr assgn, Expr base, Expr size) {
+ exists (FieldAccess cmap, ExprCall alloc, PointerDereferenceExpr pd |
+ assgn.getLValue() = cmap and
+ base = cmap.getQualifier() and
+ cmap.getTarget().hasQualifiedName("_bmp_source_struct::colormap") and
+ assgn.getRValue() = alloc and
+ pd = alloc.getExpr() and
+ pd.getOperand().(FieldAccess).getTarget().getName() = "alloc_sarray" and
+ size = alloc.getArgument(2)
+ )
+}
+
+/**
+ * A flow configuration for tracking structs that contain colour maps.
+ */
+class ColorMapTracker extends DataFlow::Configuration {
+ ColorMapTracker() { this = "ColorMapTracker" }
+
+ predicate isSource(AssignExpr assgn, DataFlow::Node nd) {
+ exists (Expr s | colormapInit(assgn, s, _) | useUsePair(_, s, nd.asExpr()))
+ }
+
+ override predicate isSource(DataFlow::Node nd) {
+ isSource(_, nd)
+ }
+
+ override predicate isSink(DataFlow::Node nd) {
+ any()
+ }
+}
+
+/**
+ * A flow configuration for tracking expressions that denote the size of
+ * colour maps.
+ */
+class ColorMapSizeTracker extends DataFlow::Configuration {
+ ColorMapSizeTracker() { this = "ColorMapSizeTracker" }
+
+ predicate isSource(AssignExpr assgn, DataFlow::Node nd) {
+ exists (Expr s | colormapInit(assgn, _, s) | useUsePair(_, s, nd.asExpr()))
+ }
+
+ override predicate isSource(DataFlow::Node nd) {
+ isSource(_, nd)
+ }
+
+ override predicate isSink(DataFlow::Node nd) {
+ any()
+ }
+}
+
+/**
+ * Holds if `ae` indexes into the colour map allocated at `origin`.
+ */
+predicate isColorMapIndex(ArrayExpr ae, AssignExpr origin) {
+ exists (ColorMapTracker cfg, FieldAccess fa, DataFlow::Node source |
+ fa = ae.getArrayBase().(ArrayExpr).getArrayBase() and
+ fa.getTarget().getName() = "colormap" and
+ cfg.isSource(origin, source) and
+ cfg.hasFlow(source, DataFlow::exprNode(fa.getQualifier()))
+ )
+}
+
+/**
+ * Holds if `sz` refers to the size of the colour map allocated at `origin`.
+ */
+predicate isColorMapSize(Expr sz, AssignExpr origin) {
+ exists (ColorMapSizeTracker cfg, DataFlow::Node source |
+ cfg.isSource(origin, source) and
+ cfg.hasFlow(source, DataFlow::exprNode(sz))
+ )
+}
+
+from ArrayExpr outer, ArrayExpr inner, GVN base, GVN index
+where inner = outer.getArrayBase() and
+ inner.getArrayBase() = bmpSourceStructField(base, "colormap") and
+ outer.getArrayOffset() = index.getAnExpr() and
+ not exists (GuardCondition gc, Expr bound |
+ bound = bmpSourceStructField(base, "cmap_length")
+ or
+ exists (AssignExpr origin |
+ isColorMapIndex(outer, origin) and
+ isColorMapSize(bound, origin)
+ )
+ |
+ gc.ensuresLt(index.getAnExpr(), bound, 0, inner.getBasicBlock(), true)
+ )
+select outer, "Unguarded indexing into colormap."
diff --git a/ql_demos/cpp/libjpeg-turbo-oob/05_find_unguarded_colormap_generalised.ql b/ql_demos/cpp/libjpeg-turbo-oob/05_find_unguarded_colormap_generalised.ql
new file mode 100644
index 0000000..ee4da6b
--- /dev/null
+++ b/ql_demos/cpp/libjpeg-turbo-oob/05_find_unguarded_colormap_generalised.ql
@@ -0,0 +1,128 @@
+/**
+ * @problem.severity error
+ * @kind problem
+ * @id libjpeg/unguarded-colormap-index
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.valuenumbering.GlobalValueNumbering
+import semmle.code.cpp.controlflow.Guards
+import Options
+
+// Find expressions of the form `base->colormap[i][j]` where
+// `j` is checked against `base->cmap_length`
+
+/** Recognize `error_exit` as a non-returning function. */
+class JpegTurboCustomOptions extends CustomOptions {
+ override predicate exprExits(Expr e) {
+ super.exprExits(e) or
+ exists (ExprCall c | e = c |
+ c.getExpr().(PointerDereferenceExpr).getOperand().(FieldAccess).getTarget().hasName("error_exit")
+ )
+ }
+}
+
+/**
+ * Gets an expression of the form `base->fieldName`.
+ */
+Expr bmpSourceStructField(GVN base, string fieldName) {
+ exists (FieldAccess fa |
+ fa.getTarget().getName() = fieldName and
+ base = globalValueNumber(fa.getQualifier()) and
+ globalValueNumber(result) = globalValueNumber(fa)
+ )
+}
+
+/**
+ * Holds if `assgn` allocates a new colour map of the given `size` and
+ * stores it into `base->colormap`.
+ */
+predicate colormapInit(AssignExpr assgn, Expr base, Expr size) {
+ exists (FieldAccess cmap, ExprCall alloc, PointerDereferenceExpr pd |
+ assgn.getLValue() = cmap and
+ base = cmap.getQualifier() and
+ cmap.getTarget().hasName("colormap") and
+ assgn.getRValue() = alloc and
+ pd = alloc.getExpr() and
+ pd.getOperand().(FieldAccess).getTarget().getName() = "alloc_sarray" and
+ size = alloc.getArgument(2)
+ )
+}
+
+/**
+ * A flow configuration for tracking structs that contain colour maps.
+ */
+class ColorMapTracker extends DataFlow::Configuration {
+ ColorMapTracker() { this = "ColorMapTracker" }
+
+ predicate isSource(AssignExpr assgn, DataFlow::Node nd) {
+ exists (Expr s | colormapInit(assgn, s, _) | useUsePair(_, s, nd.asExpr()))
+ }
+
+ override predicate isSource(DataFlow::Node nd) {
+ isSource(_, nd)
+ }
+
+ override predicate isSink(DataFlow::Node nd) {
+ any()
+ }
+}
+
+/**
+ * A flow configuration for tracking expressions that denote the size of
+ * colour maps.
+ */
+class ColorMapSizeTracker extends DataFlow::Configuration {
+ ColorMapSizeTracker() { this = "ColorMapSizeTracker" }
+
+ predicate isSource(AssignExpr assgn, DataFlow::Node nd) {
+ exists (Expr s | colormapInit(assgn, _, s) | useUsePair(_, s, nd.asExpr()))
+ }
+
+ override predicate isSource(DataFlow::Node nd) {
+ isSource(_, nd)
+ }
+
+ override predicate isSink(DataFlow::Node nd) {
+ any()
+ }
+}
+
+/**
+ * Holds if `ae` indexes into the colour map allocated at `origin`.
+ */
+predicate isColorMapIndex(ArrayExpr ae, AssignExpr origin) {
+ exists (ColorMapTracker cfg, FieldAccess fa, DataFlow::Node source |
+ fa = ae.getArrayBase().(ArrayExpr).getArrayBase() and
+ fa.getTarget().getName() = "colormap" and
+ cfg.isSource(origin, source) and
+ cfg.hasFlow(source, DataFlow::exprNode(fa.getQualifier()))
+ )
+}
+
+/**
+ * Holds if `sz` refers to the size of the colour map allocated at `origin`.
+ */
+predicate isColorMapSize(Expr sz, AssignExpr origin) {
+ exists (ColorMapSizeTracker cfg, DataFlow::Node source |
+ cfg.isSource(origin, source) and
+ cfg.hasFlow(source, DataFlow::exprNode(sz))
+ )
+}
+
+from ArrayExpr outer, ArrayExpr inner, GVN base, GVN index
+where inner = outer.getArrayBase() and
+ inner.getArrayBase() = bmpSourceStructField(base, "colormap") and
+ outer.getArrayOffset() = index.getAnExpr() and
+ not exists (GuardCondition gc, Expr bound |
+ bound = bmpSourceStructField(base, "cmap_length")
+ or
+ exists (AssignExpr origin |
+ isColorMapIndex(outer, origin) and
+ isColorMapSize(bound, origin)
+ )
+ |
+ gc.ensuresLt(index.getAnExpr(), bound, 0, inner.getBasicBlock(), true)
+ )
+select outer, "Unguarded indexing into colormap."
diff --git a/ql_demos/cpp/libjpeg-turbo-oob/README.md b/ql_demos/cpp/libjpeg-turbo-oob/README.md
new file mode 100644
index 0000000..8e08a09
--- /dev/null
+++ b/ql_demos/cpp/libjpeg-turbo-oob/README.md
@@ -0,0 +1,16 @@
+This is demo is an example of variant analysis on a recent [bugfix](https://github.com/libjpeg-turbo/libjpeg-turbo/commit/9c78a04df4e44ef6487eee99c4258397f4fdca55) in [libjpeg-turbo](https://github.com/libjpeg-turbo/libjpeg-turbo), an open-source image processing library.
+
+The fix prevents an out-of-bounds access when processing malformed BMP files: when reading a BMP file, the library allocates a colour map based on the number of colours declared in the BMP header. Later on, individual bytes are read from the file and used as indices into this colour map. Previously, this was done without checking whether the byte actually represented a valid colour, which could cause an out-of-bounds access. The fix introduces a field in the same struct as the colour map that records its size, and checks the index against it, aborting with an error if the index is out of range.
+
+A snapshot of libjpeg-turbo from before the fix is [here](https://downloads.lgtm.com/snapshots/cpp/libjpeg-turbo/libjpeg-turbo-revision-0fa7850aeb273204acd57be11f328b2be5d97dc6.zip), and one that contains the fix is [here](https://downloads.lgtm.com/snapshots/cpp/libjpeg-turbo/libjpeg-turbo-revision-d5f281b734425fc1d930ff2c3f8441aad731343e.zip).
+
+The first five QL files develop a query that flags exactly the fixed accesses on the former snapshot, and nothing on the latter; the last query is a generalisation that finds a new instance of the same problem. All queries are run on the fixed snapshot, except when stated otherwise.
+
+ - 01_find_colormap_index.ql: A simple query to get started; flags all expressions of the form `base->colormap[i][j]`.
+ - 02a_find_guarded_colormap_index.ql: Refines the previous query to only flag indices into `_bmp_source_struct::colormap`, and among those only the ones that are guarded by a comparison against `_bmp_source_struct::cmap_length`. Uses global value numbering to correlate the references to `colormap` and `cmap_length` to make sure they are on the same base object, and the guards library to reason about guarding.
+ - 02b_find_guarded_colormap_index_working.ql: The previous query doesn't actually work, since `ERREXIT` isn't recognised as being a non-returning macro. This query fixes that.
+ - 03_find_unguarded_colormap_index.ql: Flipping the logic around, we now look for _unguarded_ indexing. This gives a few false positives in cases where `cmap_length` isn't used. There is still a guard in these cases, but it's against a parameter that happens to contain the size of the colour map.
+ - 04_find_unguarded_colormap_no_fps.ql: Add inter-procedural tracking to reason about the flow of colour maps and their sizes. This eliminates the remaining FPs on the fixed snapshot, and gives the expected results on the original snapshot.
+ - 05_find_unguarded_colormap_generalised.ql: By removing the hardcoded references to `_bmp_source_struct`, we get a more general query that looks for other unguarded indexes into colour maps. This gives yet more false positives, since there are a few other guarding patterns, but the first three results are actually true positives, which we [reported](https://github.com/libjpeg-turbo/libjpeg-turbo/issues/295). A snapshot with these results fixed is available [here](https://downloads.lgtm.com/snapshots/cpp/libjpeg-turbo/libjpeg-turbo-revision-d00d7d8c194e587ed10a395e0f307ce9dddf5687.zip).
+
+Note that the final query is somewhat non-trivial (>100 LoC, uses global value numbering, guards and inter-procedural flow), so it's perhaps best used with an audience that has seen some simple QL before.
diff --git a/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql b/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql
new file mode 100644
index 0000000..22b3a3c
--- /dev/null
+++ b/ql_demos/cpp/libssh2_eating_error_codes/00_error_codes.ql
@@ -0,0 +1,15 @@
+/**
+ * @name 00_error_codes
+ */
+
+import cpp
+
+// Look for return statements that return a negative integer constant.
+// For example:
+//
+// return -1;
+//
+// The negative return value might be an error code.
+from ReturnStmt ret
+where ret.getExpr().getValue().toInt() < 0
+select ret
diff --git a/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql
new file mode 100644
index 0000000..b5756c4
--- /dev/null
+++ b/ql_demos/cpp/libssh2_eating_error_codes/01_error_codes_call.ql
@@ -0,0 +1,13 @@
+/**
+ * @name 01_error_codes_call
+ */
+
+import cpp
+
+// Extend the previous query to also find calls to functions that sometimes
+// return a negative integer constant.
+from FunctionCall call, ReturnStmt ret
+where
+ ret.getExpr().getValue().toInt() < 0 and
+ call.getTarget() = ret.getEnclosingFunction()
+select ret, call
diff --git a/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql
new file mode 100644
index 0000000..2df3bc3
--- /dev/null
+++ b/ql_demos/cpp/libssh2_eating_error_codes/02_eating_error_codes.ql
@@ -0,0 +1,14 @@
+/**
+ * @name 02_eating_error_codes
+ */
+
+import cpp
+
+// Look for calls that are cast to unsigned, which means that the error
+// code might be accidentally ignored.
+from FunctionCall call, ReturnStmt ret
+where
+ ret.getExpr().getValue().toInt() < 0 and
+ call.getTarget() = ret.getEnclosingFunction() and
+ call.getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned()
+select call, ret
diff --git a/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql
new file mode 100644
index 0000000..5cf6201
--- /dev/null
+++ b/ql_demos/cpp/libssh2_eating_error_codes/03_eating_error_codes_localflow.ql
@@ -0,0 +1,24 @@
+/**
+ * @name 03_eating_error_codes_localflow
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+
+// The previous query only handled cases where the result of the function
+// call is immediately cast to unsigned. So it will fail to detect examples
+// like this, where the cast doesn't happen immediately:
+//
+// int r = f();
+// unsigned int x = r;
+//
+// In this query, we add local dataflow so that we can also handle such
+// cases.
+from FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink
+where
+ ret.getExpr().getValue().toInt() < 0 and
+ call.getTarget() = ret.getEnclosingFunction() and
+ source.asExpr() = call and
+ DataFlow::localFlow(source, sink) and
+ sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned()
+select source, sink
diff --git a/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql
new file mode 100644
index 0000000..43877d3
--- /dev/null
+++ b/ql_demos/cpp/libssh2_eating_error_codes/04_eating_error_codes_localflow_rangeanalysis.ql
@@ -0,0 +1,29 @@
+/**
+ * @name 04_eating_error_codes_localflow_rangeanalysis
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.DataFlow
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+
+// The previous query produced some weird results. The problem is that it
+// treats any expression with an unsigned type as a potential sink. What we
+// really want is to find where the cast from signed to unsigned happens,
+// because that's where the integer overflow occurs. So we want the sink to
+// be a potentially negative expression that gets cast to unsigned.
+//
+// Note that by using range analysis, we can avoid producing false positive
+// results for examples like this:
+//
+// int r = f();
+// if (r < 0) return -1;
+// unsigned int x = r;
+from FunctionCall call, ReturnStmt ret, DataFlow::Node source, DataFlow::Node sink
+where
+ ret.getExpr().getValue().toInt() < 0 and
+ call.getTarget() = ret.getEnclosingFunction() and
+ source.asExpr() = call and
+ DataFlow::localFlow(source, sink) and
+ sink.asExpr().getFullyConverted().getType().getUnderlyingType().(IntegralType).isUnsigned() and
+ lowerBound(sink.asExpr()) < 0
+select source, sink
diff --git a/ql_demos/cpp/libssh2_eating_error_codes/README.md b/ql_demos/cpp/libssh2_eating_error_codes/README.md
new file mode 100644
index 0000000..2c2a630
--- /dev/null
+++ b/ql_demos/cpp/libssh2_eating_error_codes/README.md
@@ -0,0 +1,9 @@
+# Eating error codes in libssh2
+
+Download this [snapshot](https://downloads.lgtm.com/snapshots/cpp/libssh2/libssh2_libssh2_C_C++_38bf7ce.zip) for the demo.
+
+This demo shows how to develop, step-by-step, the query from the [blog post](https://blog.semmle.com/libssh2-integer-overflow/) about libssh2 CVE-2019-13115. This query did not find the bug that caused the CVE. It is instead about doing variant analysis on a bug that we noticed on the development branch of libssh2. We sent the query results to the libssh2 development team and they were able to fix all the variants before the next version of libssh2 was released.
+
+[This](https://lgtm.com/projects/g/libssh2/libssh2/snapshot/6e2f5563c80521b3cde72a6fcdb675c2e085f9cf/files/src/hostkey.c?sort=name&dir=ASC&mode=heatmap&__hstc=70225743.5fa8704c8874c6eafaef219923a26734.1534954774206.1564532078978.1564925733575.72&__hssc=70225743.2.1565139962633&__hsfp=997709570#L677) is an example of the bug. The problem is that `_libssh2_get_c_string` returns a negative integer as an error code, but the type of `r_len` is `unsigned int`, so the error code is accidentally ignored.
+
+For a shorter demo, stop at step 02. Steps 03 and 04 make the query more sophisticated by adding local data flow and range analysis.
diff --git a/ql_demos/cpp/qlpack.yml b/ql_demos/cpp/qlpack.yml
new file mode 100644
index 0000000..12a2c1b
--- /dev/null
+++ b/ql_demos/cpp/qlpack.yml
@@ -0,0 +1,3 @@
+name: codeql-demos-cpp
+version: 0.0.0
+libraryPathDependencies: codeql-cpp
diff --git a/ql_demos/cpp/queries.xml b/ql_demos/cpp/queries.xml
new file mode 100644
index 0000000..99f4a72
--- /dev/null
+++ b/ql_demos/cpp/queries.xml
@@ -0,0 +1 @@
+
diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/01_find_snprintf.ql b/ql_demos/cpp/rsyslog_CVE-2018-1000140/01_find_snprintf.ql
new file mode 100644
index 0000000..e26ac99
--- /dev/null
+++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/01_find_snprintf.ql
@@ -0,0 +1,14 @@
+import cpp
+
+/*
+ * Find all calls to `snprintf`.
+ *
+ * Note: you could do this first step with grep. However,
+ * grep is less good because it doesn't know about macros.
+ * For example, curl does this:
+ *
+ * https://github.com/curl/curl/blob/87501e57f1c166cb250111af54e0470ab8b2099c/lib/curl_printf.h#L42
+ */
+from FunctionCall call
+where call.getTarget().getName() = "snprintf"
+select call
diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/02_find_snprintf_with_result.ql b/ql_demos/cpp/rsyslog_CVE-2018-1000140/02_find_snprintf_with_result.ql
new file mode 100644
index 0000000..98f432e
--- /dev/null
+++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/02_find_snprintf_with_result.ql
@@ -0,0 +1,11 @@
+import cpp
+
+/*
+ * Only code that uses the result of snprintf might be vulnerable.
+ * So restrict the results to those where snprintf is not used
+ * in a "void context".
+ */
+from FunctionCall call
+where call.getTarget().getName() = "snprintf"
+and not call instanceof ExprInVoidContext
+select call
diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/03_find_snprintf_with_result_and_string.ql b/ql_demos/cpp/rsyslog_CVE-2018-1000140/03_find_snprintf_with_result_and_string.ql
new file mode 100644
index 0000000..23fc448
--- /dev/null
+++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/03_find_snprintf_with_result_and_string.ql
@@ -0,0 +1,15 @@
+import cpp
+
+/*
+ * Only calls to `snprintf` with `%s` in the format specifier
+ * are likely to be vulnerable. This is because other format
+ * specifiers, like `%d` can only change the length of the output
+ * string by a few character, but `%s` can change it a lot.
+ * A `%s` specifier is also much more likely to enable an attacker
+ * to overwrite the stack or heap with working shellcode.
+ */
+from FunctionCall call
+where call.getTarget().getName() = "snprintf"
+and not call instanceof ExprInVoidContext
+and call.getArgument(2).getValue().regexpMatch("(?s).*%s.*")
+select call
diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/04_find_snprintf_with_result_and_string_and_local_taint.ql b/ql_demos/cpp/rsyslog_CVE-2018-1000140/04_find_snprintf_with_result_and_string_and_local_taint.ql
new file mode 100644
index 0000000..2d23863
--- /dev/null
+++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/04_find_snprintf_with_result_and_string_and_local_taint.ql
@@ -0,0 +1,16 @@
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+
+/*
+ * Look for dataflow from the result of `snprintf` back to
+ * its size argument. Note that we no longer need the
+ * `call instanceof ExprInVoidContext` clause, because this
+ * is implied by the dataflow.
+ */
+from FunctionCall call, DataFlow::Node source, DataFlow::Node sink
+where call.getTarget().getName() = "snprintf"
+and call.getArgument(2).getValue().regexpMatch("(?s).*%s.*")
+and TaintTracking::localTaint(source, sink)
+and source.asExpr() = call
+and sink.asExpr() = call.getArgument(1)
+select call
diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/05_find_snprintf_with_result_and_string_and_local_taint_ub.ql b/ql_demos/cpp/rsyslog_CVE-2018-1000140/05_find_snprintf_with_result_and_string_and_local_taint_ub.ql
new file mode 100644
index 0000000..1a48670
--- /dev/null
+++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/05_find_snprintf_with_result_and_string_and_local_taint_ub.ql
@@ -0,0 +1,21 @@
+import cpp
+import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import semmle.code.cpp.dataflow.TaintTracking
+
+/*
+ * Use `SimpleRangeAnalysis` to find an upper bound for the size
+ * argument. Here, we have just added the upperbound to the output,
+ * but we could also use it to rule out code that does proper bounds
+ * checking.
+ *
+ * Note: it can also be interesting to add the upperbound to the
+ * query earlier in the sequence of queries, so that you can see
+ * that it infers quite tight bounds for some of the calls.
+ */
+from FunctionCall call, DataFlow::Node source, DataFlow::Node sink
+where call.getTarget().getName() = "snprintf"
+and call.getArgument(2).getValue().regexpMatch("(?s).*%s.*")
+and TaintTracking::localTaint(source, sink)
+and source.asExpr() = call
+and sink.asExpr() = call.getArgument(1)
+select call, upperBound(call.getArgument(1).getFullyConverted())
diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/README.md b/ql_demos/cpp/rsyslog_CVE-2018-1000140/README.md
new file mode 100644
index 0000000..36c17f5
--- /dev/null
+++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/README.md
@@ -0,0 +1,5 @@
+[Blog post](https://lgtm.com/blog/rsyslog_snprintf_CVE-2018-1000140).
+
+This bug was found by one of our [default queries](https://lgtm.com/rules/1505913226124/). However, it also makes a good example of using QL interactively. The queries in this directory show how you can interactively develop the query.
+
+Use [this snapshot](https://downloads.lgtm.com/snapshots/cpp/rsyslog/rsyslog/rsyslog-all-revision-2018-April-27--14-12-31.zip).
diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/README.md b/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/README.md
new file mode 100644
index 0000000..d71a6e1
--- /dev/null
+++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/README.md
@@ -0,0 +1,5 @@
+# Rsyslog demo video
+
+A recording of this demo can be found [here](https://youtu.be/gfaCZoxH_u4).
+
+`rsyslog.srt` (in this directory) is the subtitles file for the video.
diff --git a/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/rsyslog.srt b/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/rsyslog.srt
new file mode 100644
index 0000000..f18de68
--- /dev/null
+++ b/ql_demos/cpp/rsyslog_CVE-2018-1000140/Video/rsyslog.srt
@@ -0,0 +1,1200 @@
+1
+00:00:00,847 --> 00:00:04,148
+Hello, my name is Kevin Backhouse,
+and I'm a security researcher
+
+2
+00:00:04,177 --> 00:00:04,860
+at Semmle.
+
+3
+00:00:04,884 --> 00:00:08,139
+This video is about a vulnerability that was found
+
+4
+00:00:08,173 --> 00:00:09,665
+by my colleague Bas van Schaik
+
+5
+00:00:09,757 --> 00:00:11,628
+in the rsyslog project.
+
+6
+00:00:12,742 --> 00:00:15,456
+What I'm going to do in this video is explain a bit
+
+7
+00:00:15,511 --> 00:00:18,049
+about what the vulnerability was.
+
+8
+00:00:18,083 --> 00:00:19,736
+I'm also going to show you
+
+9
+00:00:20,341 --> 00:00:25,353
+how you can use QL to search
+
+10
+00:00:25,398 --> 00:00:27,231
+for the vulnerability--
+
+11
+00:00:27,277 --> 00:00:29,560
+the coding pattern that caused this vulnerability.
+
+12
+00:00:30,264 --> 00:00:32,338
+I'm going to show how easy that is to do in QL.
+
+13
+00:00:34,338 --> 00:00:41,480
+Rsyslog is an open source logging system maintained
+
+14
+00:00:41,526 --> 00:00:42,827
+by Rainer Gerhards.
+
+15
+00:00:45,337 --> 00:00:51,288
+We reported this vulnerability to him
+and he fixed it incredibly quickly.
+
+16
+00:00:51,318 --> 00:00:52,280
+I think it was only
+
+17
+00:00:52,304 --> 00:00:54,808
+about 24 hours between the time
+
+18
+00:00:54,846 --> 00:00:56,745
+that we reported the vulnerability to him
+
+19
+00:00:56,791 --> 00:00:59,575
+and the time that he had posted a fix
+
+20
+00:00:59,631 --> 00:01:01,041
+for it on GitHub.
+
+21
+00:01:02,034 --> 00:01:03,262
+Something I'd also
+like to point out
+
+22
+00:01:03,280 --> 00:01:07,011
+about rsyslog, is that Rainer takes security
+
+23
+00:01:07,057 --> 00:01:09,710
+or quality incredibly seriously.
+
+24
+00:01:10,357 --> 00:01:12,639
+He runs all the static analysis
+
+25
+00:01:12,668 --> 00:01:14,150
+and dynamic analysis tools
+
+26
+00:01:14,648 --> 00:01:16,032
+that he can get his hands on on this project.
+
+27
+00:01:16,740 --> 00:01:20,756
+This was a bug that had not been detected
+
+28
+00:01:20,796 --> 00:01:22,220
+by any of those other tools.
+
+29
+00:01:27,144 --> 00:01:30,633
+That shows the value of Semmle's technology
+
+30
+00:01:30,761 --> 00:01:34,320
+that we can find problems that get missed
+
+31
+00:01:34,375 --> 00:01:36,818
+by other tools.
+
+32
+00:01:39,563 --> 00:01:46,722
+Now, the backstory behind this vulnerability
+is that towards the end of 2017,
+
+33
+00:01:46,775 --> 00:01:47,888
+I was looking at this code
+
+34
+00:01:47,923 --> 00:01:49,073
+in the curl project.
+
+35
+00:01:52,560 --> 00:01:54,993
+I'd never seen this coding pattern before.
+
+36
+00:01:55,038 --> 00:01:58,666
+I didn't even know that snprintf had a return value,
+
+37
+00:01:58,697 --> 00:02:01,654
+so I was quite interested to see this coding pattern.
+
+38
+00:02:02,891 --> 00:02:05,427
+It's quite clever the way that it's being used here.
+
+39
+00:02:05,481 --> 00:02:08,555
+What they're doing is they're printing
+
+40
+00:02:08,746 --> 00:02:11,748
+a list of strings into a fixed size buffer,
+
+41
+00:02:12,699 --> 00:02:14,155
+and they're using
+
+42
+00:02:14,209 --> 00:02:15,858
+the return value of snprintf
+
+43
+00:02:16,145 --> 00:02:19,792
+to keep track of what offset
+
+44
+00:02:19,813 --> 00:02:22,181
+in the buffer they need to print to next.
+
+45
+00:02:23,898 --> 00:02:27,807
+They're using the size argument of snprintf
+
+46
+00:02:27,849 --> 00:02:30,809
+to make sure that they don't write
+
+47
+00:02:31,229 --> 00:02:32,689
+beyond the end of the buffer.
+
+48
+00:02:34,282 --> 00:02:38,444
+Because I'd never seen snprintf used
+
+49
+00:02:38,475 --> 00:02:40,683
+like this before, I pulled up the man page
+
+50
+00:02:40,731 --> 00:02:45,363
+for snprintf to see how it works.
+
+51
+00:02:50,241 --> 00:02:52,487
+Now, if we search for RETURN here,
+
+52
+00:02:52,525 --> 00:02:54,711
+we get the documentation on the return value.
+
+53
+00:02:55,532 --> 00:02:56,982
+There's a very interesting thing here.
+
+54
+00:02:59,052 --> 00:03:01,409
+It says that the return value
+
+55
+00:03:01,447 --> 00:03:02,695
+is the number of characters
+
+56
+00:03:02,742 --> 00:03:04,385
+which would have been written
+
+57
+00:03:04,437 --> 00:03:07,323
+to the final string if enough space had been available.
+
+58
+00:03:10,686 --> 00:03:13,830
+What that means is that this code
+
+59
+00:03:13,881 --> 00:03:16,236
+that we just saw in curl looks unsafe.
+
+60
+00:03:16,959 --> 00:03:21,668
+Because if there's a string that is too big,
+
+61
+00:03:22,791 --> 00:03:29,451
+it exceeds this value, then,
+if say the string is 100 bytes
+
+62
+00:03:29,497 --> 00:03:34,446
+but there's only 10 bytes left in the buffer,
+then ptr is still going to get incremented
+
+63
+00:03:34,477 --> 00:03:36,164
+by 100 bytes
+
+64
+00:03:36,731 --> 00:03:40,292
+and so on the next iteration of the loop, we can continue,
+
+65
+00:03:40,888 --> 00:03:42,419
+and we're going to continue printing.
+
+66
+00:03:42,608 --> 00:03:52,317
+What's worse is that this sizeof subtraction here
+
+67
+00:03:52,349 --> 00:03:56,230
+is actually going to compute
+the answer minus 90,
+
+68
+00:03:56,500 --> 00:04:00,731
+but the input to snprintf,
+the size input to snprintf,
+
+69
+00:04:00,761 --> 00:04:02,537
+is a size_t, it's an unsigned value.
+
+70
+00:04:03,140 --> 00:04:05,724
+We're going to get a negative integer overflow here.
+
+71
+00:04:07,506 --> 00:04:09,093
+That means that there's going to be effectively
+
+72
+00:04:09,122 --> 00:04:11,146
+no limit on the number of bytes
+
+73
+00:04:11,342 --> 00:04:13,270
+that we can write to the buffer.
+
+74
+00:04:14,108 --> 00:04:17,861
+Now, it turns out that
+this is not actually a problem in curl,
+
+75
+00:04:17,907 --> 00:04:20,720
+because they've got
+their own implementation of snprintf,
+
+76
+00:04:20,755 --> 00:04:21,755
+which you can see here.
+
+77
+00:04:23,520 --> 00:04:27,530
+This version of snprintf works the way
+
+78
+00:04:27,530 --> 00:04:31,479
+that you need it to in order
+to use this coding pattern.
+
+79
+00:04:32,453 --> 00:04:34,285
+So, it returns the number of bytes
+
+80
+00:04:34,316 --> 00:04:36,303
+that it actually wrote rather than the number of bytes
+
+81
+00:04:36,351 --> 00:04:37,776
+that it wanted to write.
+
+82
+00:04:38,800 --> 00:04:39,932
+There's no problem in curl,
+
+83
+00:04:40,066 --> 00:04:42,961
+but after having seen this coding pattern,
+
+84
+00:04:43,007 --> 00:04:44,007
+I thought,
+
+85
+00:04:44,333 --> 00:04:45,318
+"Well, I wonder if anybody else
+
+86
+00:04:45,342 --> 00:04:48,184
+has done the same thing
+but using the real snprintf.
+
+87
+00:04:48,205 --> 00:04:49,205
+Because if they have,
+
+88
+00:04:49,657 --> 00:04:52,623
+then they might have a buffer overflow in their code."
+
+89
+00:04:54,869 --> 00:04:58,695
+Let's now go to Eclipse and use QL to search
+
+90
+00:04:58,742 --> 00:05:00,662
+for this vulnerability.
+
+91
+00:05:02,677 --> 00:05:04,543
+What I'm going to do first is something
+
+92
+00:05:04,572 --> 00:05:06,727
+that you could do equally well
+
+93
+00:05:06,926 --> 00:05:07,952
+with grep,
+
+94
+00:05:07,987 --> 00:05:11,476
+but we're very quickly going to refine
+
+95
+00:05:11,517 --> 00:05:12,517
+this query to something
+
+96
+00:05:12,573 --> 00:05:14,320
+that is much more sophisticated
+
+97
+00:05:14,373 --> 00:05:15,111
+than what you can do
+
+98
+00:05:15,135 --> 00:05:16,135
+with grep.
+
+99
+00:05:16,704 --> 00:05:18,518
+To begin with, I'm just going to search
+
+100
+00:05:18,551 --> 00:05:22,034
+for all calls to snprintf in the project.
+
+101
+00:05:32,752 --> 00:05:33,875
+108 results.
+
+102
+00:05:35,123 --> 00:05:41,282
+Now, 108 results is not that many.
+
+103
+00:05:41,707 --> 00:05:44,095
+You could realistically audit all of these
+
+104
+00:05:44,134 --> 00:05:45,199
+by hand.
+
+105
+00:05:48,111 --> 00:05:51,302
+One of our customers actually had
+
+106
+00:05:51,350 --> 00:05:57,018
+this exact same vulnerability on some proprietary code.
+
+107
+00:05:59,587 --> 00:06:02,190
+If you were to run this query on their code base,
+
+108
+00:06:02,234 --> 00:06:04,692
+then you'd get over 600 results.
+
+109
+00:06:06,042 --> 00:06:08,568
+That's going to be quite tedious to look through
+
+110
+00:06:08,614 --> 00:06:09,614
+by hand.
+
+111
+00:06:10,135 --> 00:06:13,291
+QL can really help you, save you some time
+
+112
+00:06:13,464 --> 00:06:16,270
+by quickly ruling out the cases
+
+113
+00:06:16,334 --> 00:06:18,508
+that are not interesting to look at.
+
+114
+00:06:18,545 --> 00:06:24,296
+The most common reason that the call to snprintf
+
+115
+00:06:24,343 --> 00:06:26,143
+is safe is that you're just not using
+
+116
+00:06:26,188 --> 00:06:27,974
+the return value of snprintf.
+
+117
+00:06:28,565 --> 00:06:32,051
+Let's update the query to rule out those cases.
+
+118
+00:06:37,864 --> 00:06:42,013
+We just want to say that the call is not in a void context.
+
+119
+00:06:46,533 --> 00:06:53,463
+If we run the query that way,
+then we're now down to 45 results.
+
+120
+00:06:54,612 --> 00:06:55,803
+Now, let's look at one of these.
+
+121
+00:06:58,149 --> 00:07:04,102
+This is another common reason
+that it's unlikely to be a problem,
+
+122
+00:07:04,590 --> 00:07:09,918
+this format specifier contains only a %d.
+
+123
+00:07:11,623 --> 00:07:14,164
+Now, a %d is going to be quite difficult
+
+124
+00:07:14,199 --> 00:07:18,197
+for an attacker to do anything with,
+
+125
+00:07:18,956 --> 00:07:23,398
+because the number of bytes
+
+126
+00:07:23,429 --> 00:07:24,376
+that you can generate
+
+127
+00:07:24,400 --> 00:07:25,902
+with a %d is limited
+
+128
+00:07:25,902 --> 00:07:30,317
+and also the content of the string
+
+129
+00:07:30,349 --> 00:07:31,349
+that you can generate
+
+130
+00:07:31,373 --> 00:07:33,365
+with it is quite limited,
+
+131
+00:07:33,415 --> 00:07:36,326
+because you can only generate ASCII digits
+
+132
+00:07:36,813 --> 00:07:39,693
+rather than an arbitrary list of characters.
+
+133
+00:07:40,212 --> 00:07:43,934
+What an attacker is really
+going to want is a %s,
+
+134
+00:07:43,934 --> 00:07:47,802
+because that allows them to control both
+
+135
+00:07:47,900 --> 00:07:49,746
+the size of the string that's generated
+
+136
+00:07:49,794 --> 00:07:51,343
+and also the exact content
+
+137
+00:07:51,343 --> 00:07:52,848
+of the string that's generated.
+
+138
+00:07:53,994 --> 00:07:55,678
+Something that I would also like to point out
+
+139
+00:07:55,710 --> 00:08:01,583
+about this example is that we're now looking
+at a call to snprintf that is not all in one line.
+
+140
+00:08:01,941 --> 00:08:05,405
+That's quickly going to become a pain to search
+
+141
+00:08:05,452 --> 00:08:07,033
+for with regular expressions.
+
+142
+00:08:07,196 --> 00:08:09,855
+When you're searching with QL,
+
+143
+00:08:10,199 --> 00:08:11,388
+that's not a problem at all,
+
+144
+00:08:11,439 --> 00:08:13,341
+because we're looking
+
+145
+00:08:13,387 --> 00:08:15,405
+at the abstract syntax tree of the code
+
+146
+00:08:15,440 --> 00:08:17,271
+rather than just doing text based searching.
+
+147
+00:08:19,769 --> 00:08:21,716
+What we're going to do here is we're going to add
+
+148
+00:08:21,748 --> 00:08:24,715
+a condition that the format specifier
+
+149
+00:08:24,777 --> 00:08:26,578
+should contain a %s.
+
+150
+00:08:30,815 --> 00:08:35,082
+We use zero-based indexing
+for the arguments of a call.
+
+151
+00:08:35,131 --> 00:08:38,075
+The format specifier is going to be argument number two,
+
+152
+00:08:38,602 --> 00:08:40,710
+and we can get the string
+
+153
+00:08:40,763 --> 00:08:41,530
+like this
+
+154
+00:08:41,554 --> 00:08:43,820
+and then we can do regular expression matching on it
+
+155
+00:08:43,911 --> 00:08:44,911
+like this.
+
+156
+00:08:46,911 --> 00:08:49,762
+We want it to contain some stuff followed
+
+157
+00:08:49,792 --> 00:08:51,959
+by a %s, followed
+
+158
+00:08:51,990 --> 00:08:53,023
+by some more stuff.
+
+159
+00:08:54,836 --> 00:08:55,537
+Now, by default,
+
+160
+00:08:55,561 --> 00:09:00,942
+regular expressions don't match strings
+
+161
+00:09:00,942 --> 00:09:02,578
+that contain a newline character.
+
+162
+00:09:05,687 --> 00:09:07,505
+Format strings very frequently
+
+163
+00:09:07,553 --> 00:09:09,304
+do contain newline characters.
+
+164
+00:09:09,751 --> 00:09:14,387
+We're going to add this special syntax here.
+
+165
+00:09:14,435 --> 00:09:17,973
+We're using Java's regular expression syntax.
+
+166
+00:09:18,413 --> 00:09:20,345
+That's how you tell it to
+
+167
+00:09:20,400 --> 00:09:23,277
+also match strings that contain newline characters.
+
+168
+00:09:24,725 --> 00:09:25,725
+Let's run that now.
+
+169
+00:09:27,125 --> 00:09:28,863
+Now we're down to 18 results.
+
+170
+00:09:30,807 --> 00:09:32,197
+Let's have a look at some of these.
+
+171
+00:09:33,059 --> 00:09:36,781
+Now, that one is in fact, the vulnerability.
+
+172
+00:09:37,302 --> 00:09:38,616
+We'll get back to that later.
+
+173
+00:09:38,663 --> 00:09:39,406
+Let's first look
+
+174
+00:09:39,430 --> 00:09:40,496
+at some others to see
+
+175
+00:09:42,498 --> 00:09:44,140
+which other ones we might want to rule out.
+
+176
+00:09:44,167 --> 00:09:47,422
+This is an example of a result
+
+177
+00:09:47,465 --> 00:09:48,715
+that we're not interested in
+
+178
+00:09:48,762 --> 00:09:53,146
+because here the result of the return value
+of snprintf is being used,
+
+179
+00:09:53,179 --> 00:09:54,603
+but it's being used in a safe way
+
+180
+00:09:54,662 --> 00:09:57,065
+because we're using it to check that
+
+181
+00:09:57,102 --> 00:10:02,068
+it didn't want to write
+more characters than we expected.
+
+182
+00:10:03,325 --> 00:10:09,279
+Really, the vulnerability that we're looking
+for is the case where you have a loop,
+
+183
+00:10:10,125 --> 00:10:15,276
+and the output of snprintf is getting fed back
+
+184
+00:10:15,316 --> 00:10:17,840
+into the size parameter of snprintf.
+
+185
+00:10:17,876 --> 00:10:24,831
+That's what allows the buffer overflow to happen,
+
+186
+00:10:24,880 --> 00:10:28,059
+and also the negative integer overflow here,
+
+187
+00:10:28,172 --> 00:10:32,182
+which is essential in order to have the vulnerability.
+
+188
+00:10:34,018 --> 00:10:35,347
+What I'm going to do now is
+
+189
+00:10:35,403 --> 00:10:37,876
+I'm going to use one of our libraries,
+
+190
+00:10:37,911 --> 00:10:39,466
+I'm going to use the taint tracking library
+
+191
+00:10:39,483 --> 00:10:41,700
+to see if there's data flow
+
+192
+00:10:42,313 --> 00:10:45,658
+from the output back into the size argument.
+
+193
+00:10:49,160 --> 00:10:51,094
+I have to have an import statement first.
+
+194
+00:10:56,484 --> 00:11:00,078
+I'm going to import dataflow's TaintTracking library.
+
+195
+00:11:03,790 --> 00:11:08,162
+Then we're going to add a source and a sink.
+
+196
+00:11:09,222 --> 00:11:11,531
+Normally, when you use the TaintTracking library,
+
+197
+00:11:11,832 --> 00:11:13,110
+the source would be something
+
+198
+00:11:13,175 --> 00:11:15,180
+that you believe to be tainted
+
+199
+00:11:15,211 --> 00:11:17,804
+because it's controllable by an attacker,
+
+200
+00:11:18,649 --> 00:11:22,101
+and the sink would be a place
+
+201
+00:11:22,157 --> 00:11:23,645
+where it's used in a dangerous way.
+
+202
+00:11:25,241 --> 00:11:26,241
+QL is flexible,
+
+203
+00:11:26,473 --> 00:11:27,517
+and so we can use it
+
+204
+00:11:27,896 --> 00:11:31,753
+for this kind of situation
+as well which is slightly different,
+
+205
+00:11:31,829 --> 00:11:32,922
+where we're going to just say,
+
+206
+00:11:33,483 --> 00:11:36,065
+"I want the source to be the call to snprintf,
+
+207
+00:11:36,110 --> 00:11:41,137
+and I want the sink to be the size argument of snprintf."
+
+208
+00:11:42,214 --> 00:11:43,538
+It will do that equally well.
+
+209
+00:11:48,139 --> 00:11:49,298
+"call" is the source,
+
+210
+00:11:50,066 --> 00:12:00,507
+and the sink is the size argument.
+
+211
+00:12:05,733 --> 00:12:06,733
+In fact,
+
+212
+00:12:06,998 --> 00:12:10,367
+because the call is the source of the data flow,
+
+213
+00:12:12,068 --> 00:12:13,059
+this thing
+
+214
+00:12:13,083 --> 00:12:16,273
+about it being in a void context is now redundant,
+
+215
+00:12:16,303 --> 00:12:18,431
+so we can remove that
+from the query if we want.
+
+216
+00:12:19,629 --> 00:12:21,040
+It would be harmless if we left it in.
+
+217
+00:12:22,772 --> 00:12:23,632
+Now, we need to actually
+
+218
+00:12:23,656 --> 00:12:25,618
+use the TaintTracking library
+
+219
+00:12:25,671 --> 00:12:26,713
+to connect up the source
+
+220
+00:12:26,762 --> 00:12:27,786
+and the sink.
+
+221
+00:12:31,730 --> 00:12:35,276
+We only need to look for local data flow,
+
+222
+00:12:35,325 --> 00:12:38,469
+that means within the function.
+For this particular pattern,
+
+223
+00:12:39,108 --> 00:12:41,790
+it's always going to be within a single function.
+
+224
+00:12:42,074 --> 00:12:43,909
+In general, our TaintTracking library
+
+225
+00:12:44,522 --> 00:12:48,845
+supports interprocedural taint tracking as well,
+
+226
+00:12:49,177 --> 00:12:50,000
+we just don't need it here,
+
+227
+00:12:50,023 --> 00:12:52,202
+so that's why we're using the localTaint function.
+
+228
+00:12:56,102 --> 00:12:57,500
+Let's run that.
+
+229
+00:12:58,193 --> 00:13:04,690
+Now we have exactly one result,
+which is the vulnerability right here.
+
+230
+00:13:05,637 --> 00:13:07,283
+Now, there's one final thing
+
+231
+00:13:07,283 --> 00:13:08,642
+that I'd like to show you.
+
+232
+00:13:09,184 --> 00:13:16,074
+It's often useful to use range analysis
+
+233
+00:13:16,113 --> 00:13:20,330
+to see what sizes we're able to deduce.
+
+234
+00:13:21,064 --> 00:13:25,794
+In this particular case, we're interested
+in the size argument into snprintf.
+
+235
+00:13:25,794 --> 00:13:29,793
+If we were to apply range analysis to it
+
+236
+00:13:29,838 --> 00:13:35,206
+and find out that it has an upper bound of 10,
+then there's probably nothing wrong.
+
+237
+00:13:36,580 --> 00:13:39,646
+I'm just going to show you
+how to use the range analysis library.
+
+238
+00:13:40,435 --> 00:13:41,685
+We need to import it like this,
+
+239
+00:13:46,968 --> 00:13:52,405
+and then we can just add
+an extra column to the results list.
+
+240
+00:13:53,083 --> 00:13:54,253
+We're going to include
+
+241
+00:13:54,290 --> 00:13:57,858
+the upper bound of the size argument.
+
+242
+00:14:04,174 --> 00:14:09,012
+What I'm also going to do is I'm going to apply
+getFullyConverted to the size argument.
+
+243
+00:14:12,062 --> 00:14:13,089
+Just run that now.
+
+244
+00:14:15,174 --> 00:14:22,655
+When you call a function like snprintf,
+the argument that it expects is a size_t,
+
+245
+00:14:22,688 --> 00:14:25,525
+so an unsigned integer.
+
+246
+00:14:27,172 --> 00:14:29,790
+Now, if the argument
+
+247
+00:14:29,821 --> 00:14:30,750
+that you pass in is something
+
+248
+00:14:30,774 --> 00:14:32,121
+like an int, then there's going
+
+249
+00:14:32,121 --> 00:14:33,334
+to be an implicit conversion,
+
+250
+00:14:33,931 --> 00:14:35,308
+and those implicit conversions
+
+251
+00:14:35,347 --> 00:14:38,742
+can often be the source of integer overflows.
+
+252
+00:14:39,222 --> 00:14:41,125
+In this particular case,
+
+253
+00:14:42,859 --> 00:14:44,381
+the reason that there's a problem is
+
+254
+00:14:44,428 --> 00:14:49,835
+because we subtract something from this size,
+
+255
+00:14:50,521 --> 00:14:53,446
+and we get a negative integer overflow,
+
+256
+00:14:53,821 --> 00:14:56,900
+which means that the size parameter
+that we call snprintf with
+
+257
+00:14:56,900 --> 00:14:59,917
+is actually a huge 64 bit number.
+
+258
+00:15:02,924 --> 00:15:04,629
+It's more interesting here to look
+
+259
+00:15:04,675 --> 00:15:10,505
+at the size of the fully converted argument
+
+260
+00:15:12,593 --> 00:15:13,627
+so that we can make sure
+
+261
+00:15:13,663 --> 00:15:17,613
+that we're seeing what's going to happen
+
+262
+00:15:17,645 --> 00:15:19,624
+after the conversion.
+
+263
+00:15:19,673 --> 00:15:24,256
+Sure enough, range analysis knows
+
+264
+00:15:24,334 --> 00:15:26,992
+that there's a possibility
+of a negative integer overflow,
+
+265
+00:15:27,069 --> 00:15:30,106
+and so it's saying that the upper bound
+
+266
+00:15:30,526 --> 00:15:40,003
+is the full range of 64 bit unsigned numbers.
+
+267
+00:15:41,613 --> 00:15:42,023
+Okay.
+
+268
+00:15:42,047 --> 00:15:43,249
+That's the end of the demo.
+
+269
+00:15:43,809 --> 00:15:49,349
+What I've done is shown you that, using QL,
+
+270
+00:15:49,400 --> 00:15:52,273
+you can very quickly search for problems
+
+271
+00:15:52,320 --> 00:15:53,539
+like this.
+
+272
+00:15:53,840 --> 00:15:56,050
+I started out
+
+273
+00:15:56,113 --> 00:15:57,930
+with something that is equivalent to grep,
+
+274
+00:15:57,976 --> 00:15:59,146
+but then I very quickly moved on
+
+275
+00:15:59,208 --> 00:16:00,890
+to something that is much more powerful.
+
+276
+00:16:02,318 --> 00:16:05,872
+Now, of course, what we do
+
+277
+00:16:05,974 --> 00:16:09,144
+at Semmle is we also take queries
+
+278
+00:16:09,182 --> 00:16:09,904
+like this,
+
+279
+00:16:09,928 --> 00:16:12,394
+and we turn them into much more polished queries
+
+280
+00:16:12,412 --> 00:16:15,052
+which can then be included in our default suite.
+
+281
+00:16:15,386 --> 00:16:18,033
+My colleague Geoffrey White took
+
+282
+00:16:18,078 --> 00:16:21,206
+this rough query which I wrote in 2017
+
+283
+00:16:21,241 --> 00:16:23,504
+and turned it into a much more polished version,
+
+284
+00:16:23,551 --> 00:16:24,454
+which is now included
+
+285
+00:16:24,478 --> 00:16:28,858
+in our default suite on lgtm.com.
+
+286
+00:16:29,340 --> 00:16:32,231
+In fact, it was Geoffrey's version of this query
+
+287
+00:16:32,782 --> 00:16:36,434
+that found the vulnerability in rsyslog,
+
+288
+00:16:36,751 --> 00:16:38,579
+and Bas was the one
+
+289
+00:16:38,579 --> 00:16:40,639
+who was looking through the results
+
+290
+00:16:40,719 --> 00:16:41,873
+and noticed it,
+
+291
+00:16:42,208 --> 00:16:44,833
+and that then led to us reporting
+
+292
+00:16:44,978 --> 00:16:46,590
+the vulnerability to Rainer.
+
diff --git a/ql_demos/csharp/.project b/ql_demos/csharp/.project
new file mode 100644
index 0000000..9ac1023
--- /dev/null
+++ b/ql_demos/csharp/.project
@@ -0,0 +1,12 @@
+
+
+ ql-demos-csharp
+
+
+
+
+
+
+ com.semmle.plugin.qdt.core.qlnature
+
+
diff --git a/ql_demos/csharp/.qlpath b/ql_demos/csharp/.qlpath
new file mode 100644
index 0000000..ded3be5
--- /dev/null
+++ b/ql_demos/csharp/.qlpath
@@ -0,0 +1,10 @@
+
+
+
+ com.semmle.code.csharp.library
+
+ com.semmle.code.csharp.dbscheme
+
+ csharp
+
+
diff --git a/ql_demos/csharp/ZipSlip/01_Sources.ql b/ql_demos/csharp/ZipSlip/01_Sources.ql
new file mode 100644
index 0000000..04c9d35
--- /dev/null
+++ b/ql_demos/csharp/ZipSlip/01_Sources.ql
@@ -0,0 +1,5 @@
+import csharp
+
+from Property p
+where p.hasName("FullName")
+select p.getAnAccess()
diff --git a/ql_demos/csharp/ZipSlip/02_Sources.ql b/ql_demos/csharp/ZipSlip/02_Sources.ql
new file mode 100644
index 0000000..4c22f9f
--- /dev/null
+++ b/ql_demos/csharp/ZipSlip/02_Sources.ql
@@ -0,0 +1,7 @@
+import csharp
+
+from Property p
+where
+ p.hasName("FullName") and
+ p.getDeclaringType().hasName("ZipArchiveEntry")
+select p.getAnAccess()
diff --git a/ql_demos/csharp/ZipSlip/03_Sinks.ql b/ql_demos/csharp/ZipSlip/03_Sinks.ql
new file mode 100644
index 0000000..ce9ac02
--- /dev/null
+++ b/ql_demos/csharp/ZipSlip/03_Sinks.ql
@@ -0,0 +1,5 @@
+import csharp
+
+from MethodCall c
+where c.getTarget().hasName("ExtractToFile")
+select c
diff --git a/ql_demos/csharp/ZipSlip/04_SinkArgument.ql b/ql_demos/csharp/ZipSlip/04_SinkArgument.ql
new file mode 100644
index 0000000..5164334
--- /dev/null
+++ b/ql_demos/csharp/ZipSlip/04_SinkArgument.ql
@@ -0,0 +1,5 @@
+import csharp
+
+from MethodCall c
+where c.getTarget().hasName("ExtractToFile")
+select c.getArgument(1)
diff --git a/ql_demos/csharp/ZipSlip/05_LocalFlow.ql b/ql_demos/csharp/ZipSlip/05_LocalFlow.ql
new file mode 100644
index 0000000..9f80c98
--- /dev/null
+++ b/ql_demos/csharp/ZipSlip/05_LocalFlow.ql
@@ -0,0 +1,12 @@
+import csharp
+import semmle.code.csharp.dataflow.TaintTracking
+
+from DataFlow::Node source, DataFlow::Node sink, MethodCall c, Property p
+where
+ c.getTarget().hasName("ExtractToFile") and
+ p.hasName("FullName") and
+ p.getDeclaringType().hasName("ZipArchiveEntry") and
+ sink.asExpr() = c.getAnArgument() and
+ source.asExpr() = p.getAnAccess() and
+ TaintTracking::localTaint(source, sink)
+select sink, "ZipSlip from $@.", source, source.toString()
diff --git a/ql_demos/csharp/ZipSlip/06_GlobalFlow.ql b/ql_demos/csharp/ZipSlip/06_GlobalFlow.ql
new file mode 100644
index 0000000..d46d234
--- /dev/null
+++ b/ql_demos/csharp/ZipSlip/06_GlobalFlow.ql
@@ -0,0 +1,24 @@
+import csharp
+import semmle.code.csharp.dataflow.TaintTracking
+
+class ZipSlipTaintTracking extends TaintTracking::Configuration {
+ ZipSlipTaintTracking() { this = "Zip Slip taint tracking" }
+
+ override predicate isSource(DataFlow::Node node) {
+ exists(Property p |
+ p.hasName("FullName") and
+ p.getDeclaringType().hasName("ZipArchiveEntry") and
+ node.asExpr() = p.getAnAccess()
+ )
+ }
+
+ override predicate isSink(DataFlow::Node node) {
+ exists(MethodCall call | call.getTarget().hasName("ExtractToFile") |
+ node.asExpr() = call.getAnArgument()
+ )
+ }
+}
+
+from ZipSlipTaintTracking config, DataFlow::Node source, DataFlow::Node sink
+where config.hasFlow(source, sink)
+select sink, "Zip Slip vulnerability from $@.", source, source.toString()
diff --git a/ql_demos/csharp/ZipSlip/README.md b/ql_demos/csharp/ZipSlip/README.md
new file mode 100644
index 0000000..03822bb
--- /dev/null
+++ b/ql_demos/csharp/ZipSlip/README.md
@@ -0,0 +1,52 @@
+# C# Zip Slip demo
+
+## Snapshot
+
+Use [this snapshot](http://downloads.lgtm.com/snapshots/csharp/microsoft/powershell/PowerShell_PowerShell_csharp-srcVersion_450d884668ca477c6581ce597958f021fac30bff-dist_odasa-lgtm-2018-09-11-e5cbe16-linux64.zip)
+of PowerShell.
+
+## Introduction
+
+The "Zip Slip" vulnerability was announced on June 5th 2018, by [Snyk](https://snyk.io/research/zip-slip-vulnerability).
+You can see on [this page](https://snyk.io/research/zip-slip-vulnerability#dot-net) some sample code that shows the vulnerable code.
+
+Microsoft immediately wanted to search their codebase to see if any of their own code was vulnerable. Within a few days,
+they had written a basic query and run it against a number of critical codebases, turning up multiple valuable results.
+Because Semmle has a close working relationship with Microsoft, we then helped Microsoft to refine
+that query further and submit it as a [pull request](https://github.com/Semmle/ql/pull/54) against our open source QL repository.
+
+It was deployed to [LGTM.com](https://lgtm.com) within 2 weeks where it was run over thousands of open source C# projects.
+
+Here are some [sample results](https://lgtm.com/rules/1506511188430/alerts/) for the ZipSlip query.
+One of those projects was Microsoft PowerShell.
+
+As a result of this query, [a senior Microsoft engineer](https://github.com/TravisEz13)
+fixed this vulnerability in November 2018 in
+[this PR](https://lgtm.com/projects/g/PowerShell/PowerShell/rev/b39a41109d86d9ba75f966e2d7b52b81fa629150).
+
+So how did they do it?
+
+## Query 1-4: Exploring sources and sinks
+
+Open the snapshot in QL4E, and show QL as a simple query language for identifying sources and sinks. Look for sinks
+(calls to `ExtractToFile`), noting that we actually want to identify the vulnerable argument.
+
+Sources are identified by for example the `FullName` property of `ZipArchiveEntry`. If we omit the
+name of the declaring type, we get too many results, and the full query would use the qualified name of
+the property.
+
+Already we've found the vulnerable code.
+
+## Query 5
+
+This query uses local dataflow to find data flow from the source to the sink. We actually need to use taint tracking
+due to the use of `Path.Combine`.
+
+## Query 6
+
+This uses a global taint tracking configuration.
+
+# Final query
+
+The [final query](https://lgtm.com/rules/1506511188430/) includes query help, and identifies various other sources and sinks,
+but uses the same general structure. It also includes metadata for LGTM.
diff --git a/ql_demos/csharp/qlpack.yml b/ql_demos/csharp/qlpack.yml
new file mode 100644
index 0000000..a6e22f5
--- /dev/null
+++ b/ql_demos/csharp/qlpack.yml
@@ -0,0 +1,3 @@
+name: codeql-demos-csharp
+version: 0.0.0
+libraryPathDependencies: codeql-csharp
diff --git a/ql_demos/csharp/queries.xml b/ql_demos/csharp/queries.xml
new file mode 100644
index 0000000..56abe77
--- /dev/null
+++ b/ql_demos/csharp/queries.xml
@@ -0,0 +1 @@
+
diff --git a/ql_demos/java/.project b/ql_demos/java/.project
new file mode 100644
index 0000000..dd7052d
--- /dev/null
+++ b/ql_demos/java/.project
@@ -0,0 +1,12 @@
+
+
+ ql-demos-java
+
+
+
+
+
+
+ com.semmle.plugin.qdt.core.qlnature
+
+
diff --git a/ql_demos/java/.qlpath b/ql_demos/java/.qlpath
new file mode 100644
index 0000000..f7ad1ce
--- /dev/null
+++ b/ql_demos/java/.qlpath
@@ -0,0 +1,10 @@
+
+
+
+ com.semmle.code.java.library
+
+ com.semmle.code.java.dbscheme
+
+ java
+
+
diff --git a/ql_demos/java/Apache_Struts_CVE-2017-9805/README.md b/ql_demos/java/Apache_Struts_CVE-2017-9805/README.md
new file mode 100644
index 0000000..99db29e
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2017-9805/README.md
@@ -0,0 +1,7 @@
+[Blog post](https://lgtm.com/blog/apache_struts_CVE-2017-9805)
+
+[This snapshot](https://downloads.lgtm.com/snapshots/java/apache/struts/apache-struts-91ae344-CVE-2017-9805.zip) has the bug. Also, Mo has greated a copy of the project so that you can see [the result](https://lgtm.com/projects/g/mmosemmle/struts_9805/alerts/?mode=list&id=java%2Funsafe-deserialization) on [lgtm.com](https://lgtm.com/projects/g/mmosemmle/struts_9805).
+
+This directory contains a copy of `UnsafeDeserialization.qll`, because I get a syntax error when I try to do `import Security.CWE.CWE-502.UnsafeDeserialization`.
+
+The query is based on an earlier version of one of our default queries: `UnsafeDeserialization.ql`. When Mo discovered the vulnerability, the standard query did not detect the problem. But Mo realized by studying previous vulnerabilities in Struts that [ContentTypeHandler](http://struts.apache.org/maven/struts2-plugins/struts2-rest-plugin/apidocs/org/apache/struts2/rest/handler/ContentTypeHandler.html) is a source of untrusted input in Struts, so he modified the query to make it a taint source. With that modification, the query found the RCE vulnerability. Our Java team have since improved `UnsafeDeserialization.ql` so that it is able to detect this vulnerability, so this is a great example of how the work of the Semmle Security Team helps to improve our queries for all our users. It is interesting to compare the result of Mo's query with the new default query, which you can find in the directory `Security/CWE/CWE-502/`. The source found by the default query is buried deeper in the library than the one found by Mo's query.
diff --git a/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserialization.qll b/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserialization.qll
new file mode 100644
index 0000000..d01dc3e
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserialization.qll
@@ -0,0 +1,79 @@
+/**
+ * This is a copy of UnsafeDeserialization.qll from the QL standard library
+ * for Java. It is copied here for the purpose of recreating the scenario
+ * when Mo first discovered the vulnerability. At the time, the standard
+ * unsafe deserialization query (UnsafeDeserialization.ql) did not detect the
+ * bug. Mo discovered the vulnerability by customizing the standard query,
+ * using knowledge that he had about the design of Struts. (The standard query
+ * has since been improved so that it will automatically detect similar
+ * vulnerabilities in future.)
+ */
+
+import semmle.code.java.frameworks.Kryo
+import semmle.code.java.frameworks.XStream
+import semmle.code.java.dataflow.DataFlow
+
+class ObjectInputStreamReadObjectMethod extends Method {
+ ObjectInputStreamReadObjectMethod() {
+ this.getDeclaringType().getASourceSupertype*().hasQualifiedName("java.io", "ObjectInputStream") and
+ (this.hasName("readObject") or this.hasName("readUnshared"))
+ }
+}
+
+class XMLDecoderReadObjectMethod extends Method {
+ XMLDecoderReadObjectMethod() {
+ this.getDeclaringType().hasQualifiedName("java.beans", "XMLDecoder") and
+ this.hasName("readObject")
+ }
+}
+
+class SafeXStream extends DataFlow::Configuration {
+ SafeXStream() { this = "UnsafeDeserialization::SafeXStream" }
+ override predicate isSource(DataFlow::Node src) {
+ any(XStreamEnableWhiteListing ma).getQualifier().(VarAccess).getVariable().getAnAccess() = src.asExpr()
+ }
+ override predicate isSink(DataFlow::Node sink) {
+ exists(MethodAccess ma |
+ sink.asExpr() = ma.getQualifier() and
+ ma.getMethod() instanceof XStreamReadObjectMethod
+ )
+ }
+}
+
+class SafeKryo extends DataFlow::Configuration {
+ SafeKryo() { this = "UnsafeDeserialization::SafeKryo" }
+ override predicate isSource(DataFlow::Node src) {
+ any(KryoEnableWhiteListing ma).getQualifier().(VarAccess).getVariable().getAnAccess() = src.asExpr()
+ }
+ override predicate isSink(DataFlow::Node sink) {
+ exists(MethodAccess ma |
+ sink.asExpr() = ma.getQualifier() and
+ ma.getMethod() instanceof KryoReadObjectMethod
+ )
+ }
+}
+
+predicate unsafeDeserialization(MethodAccess ma, Expr sink) {
+ exists(Method m | m = ma.getMethod() |
+ m instanceof ObjectInputStreamReadObjectMethod and
+ sink = ma.getQualifier()
+ or
+ m instanceof XMLDecoderReadObjectMethod and
+ sink = ma.getQualifier()
+ or
+ m instanceof XStreamReadObjectMethod and
+ sink = ma.getAnArgument() and
+ not exists(SafeXStream sxs | sxs.hasFlowToExpr(ma.getQualifier()))
+ or
+ m instanceof KryoReadObjectMethod and
+ sink = ma.getAnArgument() and
+ not exists(SafeKryo sk | sk.hasFlowToExpr(ma.getQualifier()))
+ )
+}
+
+class UnsafeDeserializationSink extends DataFlow::ExprNode {
+ UnsafeDeserializationSink() {
+ unsafeDeserialization(_, this.getExpr())
+ }
+ MethodAccess getMethodAccess() { unsafeDeserialization(result, this.getExpr()) }
+}
diff --git a/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserializationStruts.ql b/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserializationStruts.ql
new file mode 100644
index 0000000..27437fd
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2017-9805/UnsafeDeserializationStruts.ql
@@ -0,0 +1,55 @@
+/**
+ * @name Deserialization of user-controlled data (Apache Struts)
+ * @description Deserializing user-controlled data may allow attackers to
+ * execute arbitrary code.
+ * @kind path-problem
+ * @problem.severity error
+ * @precision high
+ * @id java/unsafe-deserialization
+ * @tags security
+ * external/cwe/cwe-502
+ */
+import java
+import UnsafeDeserialization
+import semmle.code.java.dataflow.DataFlow3::DataFlow3 as DF
+import DF::PathGraph
+
+/** The interface `org.apache.struts2.rest.handler.ContentTypeHandler`. */
+class ContentTypeHandler extends RefType {
+ ContentTypeHandler() {
+ this.hasQualifiedName("org.apache.struts2.rest.handler", "ContentTypeHandler")
+ }
+}
+
+/** A `toObject` method on a subtype of `org.apache.struts2.rest.handler.ContentTypeHandler`. */
+class ContentTypeHandlerDeserialization extends Method {
+ ContentTypeHandlerDeserialization() {
+ this.getDeclaringType().getASupertype*() instanceof ContentTypeHandler and
+ this.hasName("toObject")
+ }
+}
+
+/**
+ * The first parameter of a `toObject` method on a `ContentTypeHandler`, which is
+ * a source of tainted user data.
+ */
+class ContentTypeHandlerInput extends DF::Node {
+ ContentTypeHandlerInput() {
+ this.asParameter() = any(ContentTypeHandlerDeserialization des).getParameter(0)
+ }
+}
+
+/**
+ * Configuration that tracks the flow of taint from the first parameter of
+ * `ContentTypeHandler.toObject` to an instance of unsafe deserialization.
+ */
+class StrutsUnsafeDeserializationConfig extends DF::Configuration {
+ StrutsUnsafeDeserializationConfig() { this = "StrutsUnsafeDeserializationConfig" }
+ override predicate isSource(DF::Node source) { source instanceof ContentTypeHandlerInput }
+ override predicate isSink(DF::Node sink) { sink instanceof UnsafeDeserializationSink }
+}
+
+from DF::PathNode source, DF::PathNode sink, StrutsUnsafeDeserializationConfig conf
+where conf.hasFlowPath(source, sink)
+ and sink.getNode() instanceof UnsafeDeserializationSink
+select sink.getNode().(UnsafeDeserializationSink).getMethodAccess(), source, sink, "Unsafe deserialization of $@.", source, "user input"
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/01_compileAndExecute.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/01_compileAndExecute.ql
new file mode 100644
index 0000000..9e18fc5
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/01_compileAndExecute.ql
@@ -0,0 +1,13 @@
+/**
+ * @name 01_compileAndExecute
+ */
+
+import java
+
+/* Find the method named "compileAndExecute". This method is used
+ * to execute OGNL, so it is going to be the sink of our dataflow
+ * analysis.
+ */
+from Method m
+where m.getName() = "compileAndExecute"
+select m
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/02_compileAndExecute.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/02_compileAndExecute.ql
new file mode 100644
index 0000000..747b3e9
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/02_compileAndExecute.ql
@@ -0,0 +1,12 @@
+/**
+ * @name 02_compileAndExecute
+ */
+
+import java
+
+/* Find calls to "compileAndExecute". */
+from Method m, MethodAccess ma
+where
+ m.getName() = "compileAndExecute" and
+ ma.getMethod() = m
+select m, ma
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/03_compileAndExecute.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/03_compileAndExecute.ql
new file mode 100644
index 0000000..e96b143
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/03_compileAndExecute.ql
@@ -0,0 +1,14 @@
+/**
+ * @name 03_compileAndExecute
+ */
+
+import java
+
+/* We are actually interested in argument 0 of compileAndExecute,
+ * because that's the string that will get executed.
+ */
+from Method m, MethodAccess ma
+where
+ m.getName() = "compileAndExecute" and
+ ma.getMethod() = m
+select ma.getArgument(0)
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/04_compileAndExecute.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/04_compileAndExecute.ql
new file mode 100644
index 0000000..242d5c5
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/04_compileAndExecute.ql
@@ -0,0 +1,20 @@
+/**
+ * @name 04_compileAndExecute
+ */
+
+import java
+
+/* Refactor the logic into a predicate. */
+predicate isOgnlSink(Expr arg) {
+ exists (Method m, MethodAccess ma
+ | m.getName() = "compileAndExecute" and
+ ma.getMethod() = m and
+ arg = ma.getArgument(0))
+}
+
+/* This query produces identical results to the previous one. We have just
+ * refactored the logic into a separate predicate.
+ */
+from Expr arg
+where isOgnlSink(arg)
+select arg
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/05_getNamespace.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/05_getNamespace.ql
new file mode 100644
index 0000000..d1d7fa7
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/05_getNamespace.ql
@@ -0,0 +1,18 @@
+/**
+ * @name 05_getNamespace
+ */
+
+import java
+
+/* This predicate is currently unused, but we will need it again later. */
+predicate isOgnlSink(Expr arg) {
+ exists (Method m, MethodAccess ma
+ | m.getName() = "compileAndExecute" and
+ ma.getMethod() = m and
+ arg = ma.getArgument(0))
+}
+
+/* Find methods named "getNamespace". */
+from Method m
+where m.getName() = "getNamespace"
+select m
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/06_getNamespace.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/06_getNamespace.ql
new file mode 100644
index 0000000..eb92ba9
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/06_getNamespace.ql
@@ -0,0 +1,20 @@
+/**
+ * @name 06_getNamespace
+ */
+
+import java
+
+predicate isOgnlSink(Expr arg) {
+ exists (Method m, MethodAccess ma
+ | m.getName() = "compileAndExecute" and
+ ma.getMethod() = m and
+ arg = ma.getArgument(0))
+}
+
+/* We are only interested in methods that override ActionProxy::getNamespace. */
+from Method m, Method n
+where
+ m.getName() = "getNamespace" and
+ m.getDeclaringType().getName() = "ActionProxy" and
+ n.overrides*(m)
+select n
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/07_getNamespace.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/07_getNamespace.ql
new file mode 100644
index 0000000..f62a892
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/07_getNamespace.ql
@@ -0,0 +1,21 @@
+/**
+ * @name 07_getNamespace
+ */
+
+import java
+
+predicate isOgnlSink(Expr arg) {
+ exists (Method m, MethodAccess ma
+ | m.getName() = "compileAndExecute" and
+ ma.getMethod() = m and
+ arg = ma.getArgument(0))
+}
+
+/* Find calls to getNamespace. */
+from Method m, Method n, MethodAccess ma
+where
+ m.getName() = "getNamespace" and
+ m.getDeclaringType().getName() = "ActionProxy" and
+ n.overrides*(m) and
+ ma.getMethod() = n
+select ma
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/08_getNamespace.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/08_getNamespace.ql
new file mode 100644
index 0000000..d66d0b1
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/08_getNamespace.ql
@@ -0,0 +1,28 @@
+/**
+ * @name 08_getNamespace
+ */
+
+import java
+
+predicate isOgnlSink(Expr arg) {
+ exists (Method m, MethodAccess ma
+ | m.getName() = "compileAndExecute" and
+ ma.getMethod() = m and
+ arg = ma.getArgument(0))
+}
+
+/* Refactor the logic into a predicate. */
+predicate isActionProxySource(MethodAccess ma) {
+ exists (Method m, Method n
+ | m.getName() = "getNamespace" and
+ m.getDeclaringType().getName() = "ActionProxy" and
+ n.overrides*(m) and
+ ma.getMethod() = n)
+}
+
+/* This query produces identical results to the previous one. We have just
+ * refactored the logic into a separate predicate.
+ */
+from MethodAccess ma
+where isActionProxySource(ma)
+select ma
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/09_dataflow.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/09_dataflow.ql
new file mode 100644
index 0000000..9d3173f
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/09_dataflow.ql
@@ -0,0 +1,42 @@
+/**
+ * @name 09_dataflow
+ * @kind path-problem
+ */
+
+import java
+import semmle.code.java.dataflow.DataFlow
+import DataFlow::PathGraph
+
+predicate isOgnlSink(Expr arg) {
+ exists (Method m, MethodAccess ma
+ | m.getName() = "compileAndExecute" and
+ ma.getMethod() = m and
+ arg = ma.getArgument(0))
+}
+
+predicate isActionProxySource(MethodAccess ma) {
+ exists (Method m, Method n
+ | m.getName() = "getNamespace" and
+ m.getDeclaringType().getName() = "ActionProxy" and
+ n.overrides*(m) and
+ ma.getMethod() = n)
+}
+
+class OgnlCfg extends DataFlow::Configuration {
+ OgnlCfg() { this = "ognl" }
+
+ override predicate isSource(DataFlow::Node source) {
+ isActionProxySource(source.asExpr())
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ isOgnlSink(sink.asExpr())
+ }
+}
+
+/* First version of the dataflow query. We use isActionProxySource
+ * as the source and isOgnlSink as the sink.
+ */
+from OgnlCfg cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select source, source, sink, "ognl"
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/10_dataflow_with_barrier.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/10_dataflow_with_barrier.ql
new file mode 100644
index 0000000..16332e2
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/10_dataflow_with_barrier.ql
@@ -0,0 +1,50 @@
+/**
+ * @name 10_dataflow_with_barrier
+ * @kind path-problem
+ */
+
+import java
+import semmle.code.java.dataflow.DataFlow
+import DataFlow::PathGraph
+
+predicate isOgnlSink(Expr arg) {
+ exists (Method m, MethodAccess ma
+ | m.getName() = "compileAndExecute" and
+ ma.getMethod() = m and
+ arg = ma.getArgument(0))
+}
+
+predicate isActionProxySource(MethodAccess ma) {
+ exists (Method m, Method n
+ | m.getName() = "getNamespace" and
+ m.getDeclaringType().getName() = "ActionProxy" and
+ n.overrides*(m) and
+ ma.getMethod() = n)
+}
+
+class OgnlCfg extends DataFlow::Configuration {
+ OgnlCfg() { this = "ognl" }
+
+ override predicate isSource(DataFlow::Node source) {
+ isActionProxySource(source.asExpr())
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ isOgnlSink(sink.asExpr())
+ }
+
+ override predicate isBarrier(DataFlow::Node node) {
+ node.getEnclosingCallable().getDeclaringType().getName() = "ValueStackShadowMap"
+ }
+}
+
+/* If you look at the results of the previous query in the path viewer
+ * then you will see that a lot of the results are not interesting
+ * because they go via the class named "ValueStackShadowMap". This class
+ * is rarely used in practice, so we want to exclude paths that go
+ * through it. In this version of the query, we have overridden
+ * `isBarrier` to exclude those paths.
+ */
+from OgnlCfg cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select source, source, sink, "ognl"
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/11_dataflow_with_additional_flow_step.ql b/ql_demos/java/Apache_Struts_CVE-2018-11776/11_dataflow_with_additional_flow_step.ql
new file mode 100644
index 0000000..8cecdf1
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/11_dataflow_with_additional_flow_step.ql
@@ -0,0 +1,65 @@
+/**
+ * @name 11_dataflow_with_additional_flow_step
+ * @kind path-problem
+ */
+
+import java
+import semmle.code.java.dataflow.DataFlow
+import semmle.code.java.dataflow.TaintTracking
+import DataFlow::PathGraph
+
+predicate isOgnlSink(Expr arg) {
+ exists (Method m, MethodAccess ma
+ | m.getName() = "compileAndExecute" and
+ ma.getMethod() = m and
+ arg = ma.getArgument(0))
+}
+
+predicate isActionProxySource(MethodAccess ma) {
+ exists (Method m, Method n
+ | m.getName() = "getNamespace" and
+ m.getDeclaringType().getName() = "ActionProxy" and
+ n.overrides*(m) and
+ ma.getMethod() = n)
+}
+
+class OgnlCfg extends DataFlow::Configuration {
+ OgnlCfg() { this = "ognl" }
+
+ override predicate isSource(DataFlow::Node source) {
+ isActionProxySource(source.asExpr())
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ isOgnlSink(sink.asExpr())
+ }
+
+ override predicate isBarrier(DataFlow::Node node) {
+ node.getEnclosingCallable().getDeclaringType().getName() = "ValueStackShadowMap"
+ }
+
+ override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+ TaintTracking::localTaintStep(node1, node2)
+ }
+}
+
+/* The previous query found one good result. The query described in
+ * Mo's blog post finds more results than that. What's the difference?
+ *
+ * The difference is that Mo's query includes more dataflow steps.
+ * The default DataFlow configuration is very conservative. It only
+ * tracks dataflow when the value is passed unmodified from one
+ * method to the next. But if the value is modified in any way, even
+ * something very simple like adding 1 to an integer, then it will
+ * stop tracking. Often we want to continue tracking the flow in
+ * such cases. We can do that by adding extra flow steps in the
+ * configuration. In this query, we have added "local taint" edges.
+ * Now we get a second result (in `ActionChainResult.java`), which
+ * is one of the serious RCE vulnerabilities.
+ *
+ * The full query adds even more additional dataflow steps, which is
+ * why it finds 10 results, rather than just 2.
+ */
+from OgnlCfg cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select source, source, sink, "ognl"
diff --git a/ql_demos/java/Apache_Struts_CVE-2018-11776/README.md b/ql_demos/java/Apache_Struts_CVE-2018-11776/README.md
new file mode 100644
index 0000000..eb5e7bd
--- /dev/null
+++ b/ql_demos/java/Apache_Struts_CVE-2018-11776/README.md
@@ -0,0 +1,14 @@
+# Apache Struts CVE-2018-11776
+
+[Blog post](https://lgtm.com/blog/apache_struts_CVE-2018-11776)
+
+[This snapshot](https://downloads.lgtm.com/snapshots/java/apache/struts/apache-struts-7fd1622-CVE-2018-11776.zip) has the bug.
+
+The queries in this directory are slightly simplified to make the demo easier to follow. As a result, they don't find as many variants as the query described in the blog post. The full query can be found [here](https://github.com/Semmle/SecurityQueries/blob/e5c2be7d5eec46cd5a4a8ebdbe8cb63be2e36665/semmle-security-java/queries/struts/cve_2018_11776/final.ql).
+
+# Suggested workflow
+
+* First run the [final query](https://github.com/Semmle/SecurityQueries/blob/e5c2be7d5eec46cd5a4a8ebdbe8cb63be2e36665/semmle-security-java/queries/struts/cve_2018_11776/final.ql).
+** Show the result in the path viewer.
+* Show how to build a similar query step by step.
+** We will build a slightly simplified version of the query, so it won't find as many results, but it still finds one of the RCEs.
diff --git a/ql_demos/java/qlpack.yml b/ql_demos/java/qlpack.yml
new file mode 100644
index 0000000..42457d1
--- /dev/null
+++ b/ql_demos/java/qlpack.yml
@@ -0,0 +1,3 @@
+name: codeql-demos-java
+version: 0.0.0
+libraryPathDependencies: codeql-java
diff --git a/ql_demos/java/queries.xml b/ql_demos/java/queries.xml
new file mode 100644
index 0000000..0d33187
--- /dev/null
+++ b/ql_demos/java/queries.xml
@@ -0,0 +1 @@
+
diff --git a/ql_demos/javascript/.project b/ql_demos/javascript/.project
new file mode 100644
index 0000000..fea4fb6
--- /dev/null
+++ b/ql_demos/javascript/.project
@@ -0,0 +1,12 @@
+
+
+ ql-demos-javascript
+
+
+
+
+
+
+ com.semmle.plugin.qdt.core.qlnature
+
+
diff --git a/ql_demos/javascript/.qlpath b/ql_demos/javascript/.qlpath
new file mode 100644
index 0000000..d705efa
--- /dev/null
+++ b/ql_demos/javascript/.qlpath
@@ -0,0 +1,10 @@
+
+
+
+ com.semmle.code.javascript.library
+
+ com.semmle.code.javascript.dbscheme
+
+ javascript
+
+
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/01_HTTP_handlers.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/01_HTTP_handlers.ql
new file mode 100644
index 0000000..586316a
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/01_HTTP_handlers.ql
@@ -0,0 +1,20 @@
+import javascript
+
+/**
+ * A function with `req` and `res` parameters, and hence most likely an
+ * HTTP route handler.
+ */
+class LikelyRouteHandler extends DataFlow::FunctionNode {
+ DataFlow::ParameterNode req;
+ DataFlow::ParameterNode res;
+
+ LikelyRouteHandler() {
+ req = getParameter(0) and req.getName() = "req" and
+ res = getParameter(1) and res.getName() = "res"
+ }
+}
+
+// Find HTTP route handlers, using the heuristic of looking for parameters
+// named `req` and `res`.
+from LikelyRouteHandler l
+select l
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/02_getASendMethodCall.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/02_getASendMethodCall.ql
new file mode 100644
index 0000000..b38edd5
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/02_getASendMethodCall.ql
@@ -0,0 +1,42 @@
+import javascript
+
+/**
+ * A function with `req` and `res` parameters, and hence most likely an
+ * HTTP route handler.
+ */
+class LikelyRouteHandler extends DataFlow::FunctionNode {
+ DataFlow::ParameterNode req;
+ DataFlow::ParameterNode res;
+
+ LikelyRouteHandler() {
+ req = getParameter(0) and req.getName() = "req" and
+ res = getParameter(1) and res.getName() = "res"
+ }
+
+ /** Gets a method of `res` that sends an HTTP response. */
+ string getASendMethodName() {
+ // res.send
+ result = "send"
+ or
+ // or a method `m` such that there is an assignment `res.m = res.n` where `n`
+ // is already known to be a send method
+ exists (DataFlow::PropWrite pwn |
+ pwn = res.getAPropertyWrite(result) and
+ pwn.getRhs() = getASendMethodReference()
+ )
+ }
+
+ /** Gets a reference to `res.send` or some other known send method. */
+ DataFlow::PropRead getASendMethodReference() {
+ result = res.getAPropertyRead(getASendMethodName())
+ }
+
+ /** Gets a call to the send method. */
+ DataFlow::CallNode getASendMethodCall() {
+ result = getASendMethodReference().getACall()
+ }
+}
+
+// Find `send` calls, which is where the code is sending a reply message.
+from LikelyRouteHandler l
+select l.getASendMethodCall()
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/03_LikelySendArgument.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/03_LikelySendArgument.ql
new file mode 100644
index 0000000..d15eac0
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/03_LikelySendArgument.ql
@@ -0,0 +1,51 @@
+import javascript
+import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss
+
+/**
+ * A function with `req` and `res` parameters, and hence most likely an
+ * HTTP route handler.
+ */
+class LikelyRouteHandler extends DataFlow::FunctionNode {
+ DataFlow::ParameterNode req;
+ DataFlow::ParameterNode res;
+
+ LikelyRouteHandler() {
+ req = getParameter(0) and req.getName() = "req" and
+ res = getParameter(1) and res.getName() = "res"
+ }
+
+ /** Gets a method of `res` that sends an HTTP response. */
+ string getASendMethodName() {
+ // res.send
+ result = "send"
+ or
+ // or a method `m` such that there is an assignment `res.m = res.n` where `n`
+ // is already known to be a send method
+ exists (DataFlow::PropWrite pwn |
+ pwn = res.getAPropertyWrite(result) and
+ pwn.getRhs() = getASendMethodReference()
+ )
+ }
+
+ /** Gets a reference to `res.send` or some other known send method. */
+ DataFlow::PropRead getASendMethodReference() {
+ result = res.getAPropertyRead(getASendMethodName())
+ }
+
+ /** Gets a call to the send method. */
+ DataFlow::CallNode getASendMethodCall() {
+ result = getASendMethodReference().getACall()
+ }
+}
+
+/** An argument passed to `res.send`, marked as an XSS sink. */
+class LikelySendArgument extends Sink {
+ LikelySendArgument() {
+ this = any(LikelyRouteHandler rh).getASendMethodCall().getAnArgument()
+ }
+}
+
+// We will treat arguments of the `send` functions as the sinks in our
+// data flow analysis.
+from LikelySendArgument arg
+select arg
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/04_LikelyRequestParameter.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/04_LikelyRequestParameter.ql
new file mode 100644
index 0000000..6d80d79
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/04_LikelyRequestParameter.ql
@@ -0,0 +1,68 @@
+import javascript
+import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss
+
+/**
+ * A function with `req` and `res` parameters, and hence most likely an
+ * HTTP route handler.
+ */
+class LikelyRouteHandler extends DataFlow::FunctionNode {
+ DataFlow::ParameterNode req;
+ DataFlow::ParameterNode res;
+
+ LikelyRouteHandler() {
+ req = getParameter(0) and req.getName() = "req" and
+ res = getParameter(1) and res.getName() = "res"
+ }
+
+ DataFlow::ParameterNode getRequestParameter() {
+ result = req
+ }
+
+ /** Gets a method of `res` that sends an HTTP response. */
+ string getASendMethodName() {
+ // res.send
+ result = "send"
+ or
+ // or a method `m` such that there is an assignment `res.m = res.n` where `n`
+ // is already known to be a send method
+ exists (DataFlow::PropWrite pwn |
+ pwn = res.getAPropertyWrite(result) and
+ pwn.getRhs() = getASendMethodReference()
+ )
+ }
+
+ /** Gets a reference to `res.send` or some other known send method. */
+ DataFlow::PropRead getASendMethodReference() {
+ result = res.getAPropertyRead(getASendMethodName())
+ }
+
+ /** Gets a call to the send method. */
+ DataFlow::CallNode getASendMethodCall() {
+ result = getASendMethodReference().getACall()
+ }
+}
+
+/** An argument passed to `res.send`, marked as an XSS sink. */
+class LikelySendArgument extends Sink {
+ LikelySendArgument() {
+ this = any(LikelyRouteHandler rh).getASendMethodCall().getAnArgument()
+ }
+}
+
+/** An access to a request parameter, marked as an XSS source. */
+class LikelyRequestParameter extends Source, DataFlow::SourceNode {
+ LikelyRequestParameter() {
+ exists (DataFlow::SourceNode base | this = base.getAPropertyRead() |
+ // either a property access on `req` itself
+ base = any(LikelyRouteHandler rh).getRequestParameter()
+ or
+ // or a more deeply nested property access
+ base instanceof LikelyRequestParameter
+ )
+ }
+}
+
+// We will treat the parameters of the route handler functions as the
+// sources in our data flow analysis.
+from LikelyRequestParameter p
+select p
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/05_DataFlow.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/05_DataFlow.ql
new file mode 100644
index 0000000..d6b1369
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/05_DataFlow.ql
@@ -0,0 +1,78 @@
+/**
+ * @name Etherpad Reflected File Download CVE-2018-6835
+ * @description Returning an unsanitized string in a HTTP response
+ * could cause a Reflected File Download (RFD) vulnerability.
+ * @kind path-problem
+ * @problem.severity warning
+ * @id etherpad/javascript/rfd-cve-2018-6835
+ */
+
+import javascript
+import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss
+import DataFlow::PathGraph
+
+/**
+ * A function with `req` and `res` parameters, and hence most likely an
+ * HTTP route handler.
+ */
+class LikelyRouteHandler extends DataFlow::FunctionNode {
+ DataFlow::ParameterNode req;
+ DataFlow::ParameterNode res;
+
+ LikelyRouteHandler() {
+ req = getParameter(0) and req.getName() = "req" and
+ res = getParameter(1) and res.getName() = "res"
+ }
+
+ DataFlow::ParameterNode getRequestParameter() {
+ result = req
+ }
+
+ /** Gets a method of `res` that sends an HTTP response. */
+ string getASendMethodName() {
+ // res.send
+ result = "send"
+ or
+ // or a method `m` such that there is an assignment `res.m = res.n` where `n`
+ // is already known to be a send method
+ exists (DataFlow::PropWrite pwn |
+ pwn = res.getAPropertyWrite(result) and
+ pwn.getRhs() = getASendMethodReference()
+ )
+ }
+
+ /** Gets a reference to `res.send` or some other known send method. */
+ DataFlow::PropRead getASendMethodReference() {
+ result = res.getAPropertyRead(getASendMethodName())
+ }
+
+ /** Gets a call to the send method. */
+ DataFlow::CallNode getASendMethodCall() {
+ result = getASendMethodReference().getACall()
+ }
+}
+
+/** An argument passed to `res.send`, marked as an XSS sink. */
+class LikelySendArgument extends Sink {
+ LikelySendArgument() {
+ this = any(LikelyRouteHandler rh).getASendMethodCall().getAnArgument()
+ }
+}
+
+/** An access to a request parameter, marked as an XSS source. */
+class LikelyRequestParameter extends Source, DataFlow::SourceNode {
+ LikelyRequestParameter() {
+ exists (DataFlow::SourceNode base | this = base.getAPropertyRead() |
+ // either a property access on `req` itself
+ base = any(LikelyRouteHandler rh).getRequestParameter()
+ or
+ // or a more deeply nested property access
+ base instanceof LikelyRequestParameter
+ )
+ }
+}
+
+// We are now ready to run the data flow analysis.
+from Configuration xss, DataFlow::PathNode source, DataFlow::PathNode sink
+where xss.hasFlowPath(source, sink)
+select sink, source, sink, "Reflected File Download (RFD) vulnerability"
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/06_DataFlow_With_Sanitizer.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/06_DataFlow_With_Sanitizer.ql
new file mode 100644
index 0000000..55e2e08
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/06_DataFlow_With_Sanitizer.ql
@@ -0,0 +1,99 @@
+/**
+ * @name Etherpad Reflected File Download CVE-2018-6835
+ * @description Returning an unsanitized string in a HTTP response
+ * could cause a Reflected File Download (RFD) vulnerability.
+ * @kind path-problem
+ * @problem.severity warning
+ * @id etherpad/javascript/rfd-cve-2018-6835
+ */
+
+import javascript
+import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss
+import DataFlow::PathGraph
+
+/**
+ * A function with `req` and `res` parameters, and hence most likely an
+ * HTTP route handler.
+ */
+class LikelyRouteHandler extends DataFlow::FunctionNode {
+ DataFlow::ParameterNode req;
+ DataFlow::ParameterNode res;
+
+ LikelyRouteHandler() {
+ req = getParameter(0) and req.getName() = "req" and
+ res = getParameter(1) and res.getName() = "res"
+ }
+
+ DataFlow::ParameterNode getRequestParameter() {
+ result = req
+ }
+
+ /** Gets a method of `res` that sends an HTTP response. */
+ string getASendMethodName() {
+ // res.send
+ result = "send"
+ or
+ // or a method `m` such that there is an assignment `res.m = res.n` where `n`
+ // is already known to be a send method
+ exists (DataFlow::PropWrite pwn |
+ pwn = res.getAPropertyWrite(result) and
+ pwn.getRhs() = getASendMethodReference()
+ )
+ }
+
+ /** Gets a reference to `res.send` or some other known send method. */
+ DataFlow::PropRead getASendMethodReference() {
+ result = res.getAPropertyRead(getASendMethodName())
+ }
+
+ /** Gets a call to the send method. */
+ DataFlow::CallNode getASendMethodCall() {
+ result = getASendMethodReference().getACall()
+ }
+}
+
+/** An argument passed to `res.send`, marked as an XSS sink. */
+class LikelySendArgument extends Sink {
+ LikelySendArgument() {
+ this = any(LikelyRouteHandler rh).getASendMethodCall().getAnArgument()
+ }
+}
+
+/** An access to a request parameter, marked as an XSS source. */
+class LikelyRequestParameter extends Source, DataFlow::SourceNode {
+ LikelyRequestParameter() {
+ exists (DataFlow::SourceNode base | this = base.getAPropertyRead() |
+ // either a property access on `req` itself
+ base = any(LikelyRouteHandler rh).getRequestParameter()
+ or
+ // or a more deeply nested property access
+ base instanceof LikelyRequestParameter
+ )
+ }
+}
+
+/** A call to `is-var-name`, considered as a sanitizer for untrusted user input. */
+class IsVarNameSanitizer extends TaintTracking::AdditionalSanitizerGuardNode, DataFlow::CallNode {
+ IsVarNameSanitizer() {
+ this = DataFlow::moduleImport("is-var-name").getACall() or
+ this = DataFlow::moduleMember("./isValidJSONPName", "check").getACall()
+ }
+
+ override predicate appliesTo(TaintTracking::Configuration cfg) {
+ any()
+ }
+
+ override predicate sanitizes(boolean outcome, Expr e) {
+ outcome = true and
+ e = getArgument(0).asExpr()
+ }
+}
+
+// The vulnerability was fixed on 2018-03-23 by adding a call to isValidJSONPName:
+//
+// https://lgtm.com/projects/g/ether/etherpad-lite/rev/dd7894d3c9389a000d11d3a89962d9fcc9c6c44b
+//
+// This version of the query adds a sanitizer to exclude those results.
+from Configuration xss, DataFlow::PathNode source, DataFlow::PathNode sink
+where xss.hasFlowPath(source, sink)
+select sink, source, sink, "Reflected File Download (RFD) vulnerability"
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/README.md b/ql_demos/javascript/Etherpad_CVE-2018-6835/README.md
new file mode 100644
index 0000000..95a12a7
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/README.md
@@ -0,0 +1,5 @@
+[Blog post](https://lgtm.com/blog/etherpad_CVE-2018-6835)
+
+[This snapshot](https://downloads.lgtm.com/snapshots/javascript/ether/etherpad-lite/Etherpad_1.6.2.zip) has the vulnerability.
+
+For the final query, which shows how to detect the sanitization function after the bug was fixed, use [this snapshot](https://downloads.lgtm.com/snapshots/javascript/ether/etherpad-lite/Etherpad_42e0646327527ff0db7bcbd93fb9d16ff738905b.zip).
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/01_ReflectedXss.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/01_ReflectedXss.ql
new file mode 100644
index 0000000..1a6a0f1
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/01_ReflectedXss.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Reflected cross-site scripting vulnerability
+ * @kind path-problem
+ * @problem.severity warning
+ * @id js/reflected-xss
+ */
+
+import javascript
+import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss
+import DataFlow::PathGraph
+
+from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
+ source.getNode(), "user-provided value"
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/02_SwaggerRouteHandler.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/02_SwaggerRouteHandler.ql
new file mode 100644
index 0000000..8ee8c58
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/02_SwaggerRouteHandler.ql
@@ -0,0 +1,34 @@
+import javascript
+
+/** Gets a data flow node that represents an instance of `swagger-node`. */
+DataFlow::Node swaggerInstance() {
+ result = DataFlow::moduleImport("swagger-node-express")
+ or
+ result.getAPredecessor() = swaggerInstance()
+ or
+ result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr()
+ or
+ result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew")
+}
+
+/** An Express route handler installed via `swagger-node`. */
+class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode {
+ SwaggerRouteHandler() {
+ exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource |
+ addGet.calls(swaggerInstance(), "addGet") and
+ resource = addGet.getArgument(0).getALocalSource() and
+ this = resource.getAPropertySource("action")
+ )
+ }
+
+ override SimpleParameter getRouteHandlerParameter(string kind) {
+ kind = "request" and result = getParameter(0).getParameter()
+ or
+ kind = "response" and result = getParameter(1).getParameter()
+ }
+
+ override HTTP::HeaderDefinition getAResponseHeader(string name) { none() }
+}
+
+from SwaggerRouteHandler srh
+select srh
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/03_ResponseSendArgument.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/03_ResponseSendArgument.ql
new file mode 100644
index 0000000..ed41454
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/03_ResponseSendArgument.ql
@@ -0,0 +1,34 @@
+import javascript
+
+/** Gets a data flow node that represents an instance of `swagger-node`. */
+DataFlow::Node swaggerInstance() {
+ result = DataFlow::moduleImport("swagger-node-express")
+ or
+ result.getAPredecessor() = swaggerInstance()
+ or
+ result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr()
+ or
+ result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew")
+}
+
+/** An Express route handler installed via `swagger-node`. */
+class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode {
+ SwaggerRouteHandler() {
+ exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource |
+ addGet.calls(swaggerInstance(), "addGet") and
+ resource = addGet.getArgument(0).getALocalSource() and
+ this = resource.getAPropertySource("action")
+ )
+ }
+
+ override SimpleParameter getRouteHandlerParameter(string kind) {
+ kind = "request" and result = getParameter(0).getParameter()
+ or
+ kind = "response" and result = getParameter(1).getParameter()
+ }
+
+ override HTTP::HeaderDefinition getAResponseHeader(string name) { none() }
+}
+
+from HTTP::ResponseSendArgument rsa
+select rsa
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/04_ResponseSendAccess.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/04_ResponseSendAccess.ql
new file mode 100644
index 0000000..5b4333f
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/04_ResponseSendAccess.ql
@@ -0,0 +1,35 @@
+import javascript
+
+/** Gets a data flow node that represents an instance of `swagger-node`. */
+DataFlow::Node swaggerInstance() {
+ result = DataFlow::moduleImport("swagger-node-express")
+ or
+ result.getAPredecessor() = swaggerInstance()
+ or
+ result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr()
+ or
+ result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew")
+}
+
+/** An Express route handler installed via `swagger-node`. */
+class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode {
+ SwaggerRouteHandler() {
+ exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource |
+ addGet.calls(swaggerInstance(), "addGet") and
+ resource = addGet.getArgument(0).getALocalSource() and
+ this = resource.getAPropertySource("action")
+ )
+ }
+
+ override SimpleParameter getRouteHandlerParameter(string kind) {
+ kind = "request" and result = getParameter(0).getParameter()
+ or
+ kind = "response" and result = getParameter(1).getParameter()
+ }
+
+ override HTTP::HeaderDefinition getAResponseHeader(string name) { none() }
+}
+
+from SwaggerRouteHandler rh, PropAccess send
+where send.accesses(rh.getAResponseExpr(), "send")
+select send
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/05_ResponseSendArgumentWithAliasing.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/05_ResponseSendArgumentWithAliasing.ql
new file mode 100644
index 0000000..f4dcfed
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/05_ResponseSendArgumentWithAliasing.ql
@@ -0,0 +1,68 @@
+import javascript
+
+/** Gets a data flow node that represents an instance of `swagger-node`. */
+DataFlow::Node swaggerInstance() {
+ result = DataFlow::moduleImport("swagger-node-express")
+ or
+ result.getAPredecessor() = swaggerInstance()
+ or
+ result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr()
+ or
+ result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew")
+}
+
+/** An Express route handler installed via `swagger-node`. */
+class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode {
+ SwaggerRouteHandler() {
+ exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource |
+ addGet.calls(swaggerInstance(), "addGet") and
+ resource = addGet.getArgument(0).getALocalSource() and
+ this = resource.getAPropertySource("action")
+ )
+ }
+
+ override SimpleParameter getRouteHandlerParameter(string kind) {
+ kind = "request" and result = getParameter(0).getParameter()
+ or
+ kind = "response" and result = getParameter(1).getParameter()
+ }
+
+ override HTTP::HeaderDefinition getAResponseHeader(string name) { none() }
+}
+
+/** Holds if `name` may be an alias for the `send` method on `res`. */
+predicate sendMethodName(HTTP::Servers::ResponseSource res, string name) {
+ name = "send"
+ or
+ exists (DataFlow::PropWrite pw |
+ res.flowsTo(pw.getBase()) and
+ pw.getPropertyName() = name and
+ sendMethodRef(pw.getRhs(), res)
+ )
+}
+
+/** Holds if `pr` may be an access to the `send` method on `res`. */
+predicate sendMethodRef(DataFlow::PropRead pr, HTTP::Servers::ResponseSource res) {
+ res.flowsTo(pr.getBase()) and
+ sendMethodName(res, pr.getPropertyName())
+}
+
+/** Recognize potentially aliased calls to `send`. */
+class PotentiallyAliasedResponseSendArgument extends HTTP::ResponseSendArgument {
+ HTTP::RouteHandler rh;
+
+ PotentiallyAliasedResponseSendArgument() {
+ exists (DataFlow::CallNode call, HTTP::Servers::ResponseSource res |
+ sendMethodRef(call.getCalleeNode(), res) and
+ rh = res.getRouteHandler() and
+ this = call.getArgument(0).asExpr()
+ )
+ }
+
+ override HTTP::RouteHandler getRouteHandler() {
+ result = rh
+ }
+}
+
+from PotentiallyAliasedResponseSendArgument arg
+select arg
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/06_ReflectedXss.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/06_ReflectedXss.ql
new file mode 100644
index 0000000..046c583
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/06_ReflectedXss.ql
@@ -0,0 +1,79 @@
+/**
+ * @name Reflected cross-site scripting vulnerability
+ * @kind path-problem
+ * @problem.severity warning
+ * @id js/reflected-xss
+ */
+
+import javascript
+import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss
+import DataFlow::PathGraph
+
+/** Gets a data flow node that represents an instance of `swagger-node`. */
+DataFlow::Node swaggerInstance() {
+ result = DataFlow::moduleImport("swagger-node-express")
+ or
+ result.getAPredecessor() = swaggerInstance()
+ or
+ result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr()
+ or
+ result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew")
+}
+
+/** An Express route handler installed via `swagger-node`. */
+class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode {
+ SwaggerRouteHandler() {
+ exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource |
+ addGet.calls(swaggerInstance(), "addGet") and
+ resource = addGet.getArgument(0).getALocalSource() and
+ this = resource.getAPropertySource("action")
+ )
+ }
+
+ override SimpleParameter getRouteHandlerParameter(string kind) {
+ kind = "request" and result = getParameter(0).getParameter()
+ or
+ kind = "response" and result = getParameter(1).getParameter()
+ }
+
+ override HTTP::HeaderDefinition getAResponseHeader(string name) { none() }
+}
+
+/** Holds if `name` may be an alias for the `send` method on `res`. */
+predicate sendMethodName(HTTP::Servers::ResponseSource res, string name) {
+ name = "send"
+ or
+ exists (DataFlow::PropWrite pw |
+ res.flowsTo(pw.getBase()) and
+ pw.getPropertyName() = name and
+ sendMethodRef(pw.getRhs(), res)
+ )
+}
+
+/** Holds if `pr` may be an access to the `send` method on `res`. */
+predicate sendMethodRef(DataFlow::PropRead pr, HTTP::Servers::ResponseSource res) {
+ res.flowsTo(pr.getBase()) and
+ sendMethodName(res, pr.getPropertyName())
+}
+
+/** Recognize potentially aliased calls to `send`. */
+class PotentiallyAliasedResponseSendArgument extends HTTP::ResponseSendArgument {
+ HTTP::RouteHandler rh;
+
+ PotentiallyAliasedResponseSendArgument() {
+ exists (DataFlow::CallNode call, HTTP::Servers::ResponseSource res |
+ sendMethodRef(call.getCalleeNode(), res) and
+ rh = res.getRouteHandler() and
+ this = call.getArgument(0).asExpr()
+ )
+ }
+
+ override HTTP::RouteHandler getRouteHandler() {
+ result = rh
+ }
+}
+
+from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
+ source.getNode(), "user-provided value"
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/07_ReflectedXssWithSanitizer.ql b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/07_ReflectedXssWithSanitizer.ql
new file mode 100644
index 0000000..02a2a55
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/07_ReflectedXssWithSanitizer.ql
@@ -0,0 +1,95 @@
+/**
+ * @name Reflected cross-site scripting vulnerability
+ * @kind path-problem
+ * @problem.severity warning
+ * @id js/reflected-xss
+ */
+
+import javascript
+import semmle.javascript.security.dataflow.ReflectedXss::ReflectedXss
+import DataFlow::PathGraph
+
+/** Gets a data flow node that represents an instance of `swagger-node`. */
+DataFlow::Node swaggerInstance() {
+ result = DataFlow::moduleImport("swagger-node-express")
+ or
+ result.getAPredecessor() = swaggerInstance()
+ or
+ result.(DataFlow::CallNode).getACallee().getAReturnedExpr() = swaggerInstance().asExpr()
+ or
+ result.(DataFlow::MethodCallNode).calls(swaggerInstance(), "createNew")
+}
+
+/** An Express route handler installed via `swagger-node`. */
+class SwaggerRouteHandler extends Express::RouteHandler, DataFlow::FunctionNode {
+ SwaggerRouteHandler() {
+ exists(DataFlow::MethodCallNode addGet, DataFlow::ObjectLiteralNode resource |
+ addGet.calls(swaggerInstance(), "addGet") and
+ resource = addGet.getArgument(0).getALocalSource() and
+ this = resource.getAPropertySource("action")
+ )
+ }
+
+ override SimpleParameter getRouteHandlerParameter(string kind) {
+ kind = "request" and result = getParameter(0).getParameter()
+ or
+ kind = "response" and result = getParameter(1).getParameter()
+ }
+
+ override HTTP::HeaderDefinition getAResponseHeader(string name) { none() }
+}
+
+/** Holds if `name` may be an alias for the `send` method on `res`. */
+predicate sendMethodName(HTTP::Servers::ResponseSource res, string name) {
+ name = "send"
+ or
+ exists (DataFlow::PropWrite pw |
+ res.flowsTo(pw.getBase()) and
+ pw.getPropertyName() = name and
+ sendMethodRef(pw.getRhs(), res)
+ )
+}
+
+/** Holds if `pr` may be an access to the `send` method on `res`. */
+predicate sendMethodRef(DataFlow::PropRead pr, HTTP::Servers::ResponseSource res) {
+ res.flowsTo(pr.getBase()) and
+ sendMethodName(res, pr.getPropertyName())
+}
+
+/** Recognize potentially aliased calls to `send`. */
+class PotentiallyAliasedResponseSendArgument extends HTTP::ResponseSendArgument {
+ HTTP::RouteHandler rh;
+
+ PotentiallyAliasedResponseSendArgument() {
+ exists (DataFlow::CallNode call, HTTP::Servers::ResponseSource res |
+ sendMethodRef(call.getCalleeNode(), res) and
+ rh = res.getRouteHandler() and
+ this = call.getArgument(0).asExpr()
+ )
+ }
+
+ override HTTP::RouteHandler getRouteHandler() {
+ result = rh
+ }
+}
+
+/** A call to `is-var-name`, considered as a sanitizer for untrusted user input. */
+class IsVarNameSanitizer extends TaintTracking::AdditionalSanitizerGuardNode, DataFlow::CallNode {
+ IsVarNameSanitizer() {
+ this = DataFlow::moduleImport("is-var-name").getACall()
+ }
+
+ override predicate appliesTo(TaintTracking::Configuration cfg) {
+ any()
+ }
+
+ override predicate sanitizes(boolean outcome, Expr e) {
+ outcome = true and
+ e = getArgument(0).asExpr()
+ }
+}
+
+from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
+ source.getNode(), "user-provided value"
diff --git a/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/README.md b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/README.md
new file mode 100644
index 0000000..96ee549
--- /dev/null
+++ b/ql_demos/javascript/Etherpad_CVE-2018-6835/alternative/README.md
@@ -0,0 +1,36 @@
+This is an alternative presentation of the query from the blog post about
+[Detecting Reflected File Download vulnerabilities using QL](https://lgtm.com/blog/etherpad_CVE-2018-6835),
+phrasing it as a customization of Semmle's standard Reflected XSS query.
+
+Use [this snapshot](https://downloads.lgtm.com/snapshots/javascript/ether/etherpad-lite/Etherpad_1.6.2.zip) (etherpad-lite v1.6.2)
+for the initial stages of the development. All snapshots were built using version 1.9.3 of the Semmle toolchain; if you are using
+1.20 or newer you will need to upgrade them.
+
+ - [01_ReflectedXss.ql](01_ReflectedXss.ql): The standard query; finds nothing. The reason is that etherpad-lite uses
+ [Swagger](https://www.npmjs.com/package/swagger-node-express) for implementing its REST API, which is not supported out of the box.
+ The next few queries build up a basic model of Swagger that is rich enough to find the vulnerability.
+ - [02_SwaggerRouteHandler.ql](02_SwaggerRouteHandler.ql): A query for finding places where Swagger is used to install an
+ Express HTTP route handler. Note that the newly introduced class extends `Express::RouteHandler` from the standard library,
+ so the standard HTTP server model can now be used to reason about Swagger-based APIs.
+ - [03_ResponseSendArgument.ql](03_ResponseSendArgument.ql): A query for finding HTTP responses using the standard HTTP
+ server model plus the extension developed in the previous query. Results are disappointing.
+ - [04_ResponseSendAccess.ql](04_ResponseSendAccess.ql): To figure out what's going on, we instead just look for all
+ references to the HTTP `send` method. The results show that etherpad-lite wraps that method and replaces it with a custom
+ send method that ultimately delegates to the standard method.
+ - [05_ResponseSendArgumentWithAliasing.ql](05_ResponseSendArgumentWithAliasing.ql): We can model this wrapping process,
+ yielding an improved version of `03_ResponseSendArgument.ql`.
+ - [06_ReflectedXss.ql](06_ReflectedXss.ql): Including all of the above extensions in the standard XSS query finds a
+ true-positive result. This is the one we reported.
+
+The developers [fixed](https://github.com/ether/etherpad-lite/commit/a2992b3) the vulnerability by introducing a sanitizer using the
+[is-var-name](https://www.npmjs.com/package/is-var-name) npm package.
+[This snapshot](https://downloads.lgtm.com/snapshots/javascript/ether/etherpad-lite/Etherpad_a2992b3.zip) corresponds to the fix commit.
+
+The standard library does not include a model for `is-var-name` (it is not a very widely used package), but
+[07_ReflectedXssWithSanitizer.ql](07_ReflectedXssWithSanitizer.ql) shows that it is very easy to add, making
+the result go away.
+
+Later on, this sanitizer was [replaced](https://github.com/ether/etherpad-lite/commit/dd7894d) with a custom sanitizer, which is,
+unfortunately, ineffective. ([This snapshot](https://downloads.lgtm.com/snapshots/javascript/ether/etherpad-lite/Etherpad_1.6.4.zip)
+of etherpad-lite v1.6.4 contains the new sanitizer.) However, all browsers mitigate against reflected file download
+vulnerabilities these days, so while the vulnerability still exists, it is no longer exploitable.
diff --git a/ql_demos/javascript/qlpack.yml b/ql_demos/javascript/qlpack.yml
new file mode 100644
index 0000000..0c9e262
--- /dev/null
+++ b/ql_demos/javascript/qlpack.yml
@@ -0,0 +1,3 @@
+name: codeql-demos-javascript
+version: 0.0.0
+libraryPathDependencies: codeql-javascript
diff --git a/ql_demos/javascript/queries.xml b/ql_demos/javascript/queries.xml
new file mode 100644
index 0000000..d434629
--- /dev/null
+++ b/ql_demos/javascript/queries.xml
@@ -0,0 +1 @@
+