Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ class ConfigFileCreationPass(cpg: Cpg, requirementsTxt: String = "requirement.tx
pathEndFilter(requirementsTxt),
// Pipfile
pathEndFilter("Pipfile"),
pathEndFilter("Pipfile.lock")
pathEndFilter("Pipfile.lock"),
// setup.cfg
pathEndFilter("setup.cfg")
)

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,173 @@ import org.slf4j.{Logger, LoggerFactory}

import scala.util.matching.Regex

// This pass takes information out of specific CONFIG_FILE nodes in order to add DEPENDENCY nodes to the graph.
/*
example of a `requirements.txt` file that is valid for the pass:
```
click==7.1.2
Flask==1.1.2
itsdangerous==1.1.0
Jinja2==2.11.3
MarkupSafe==1.1.1
Werkzeug==1.0.1
```
*/
/** This pass takes information out of specific CONFIG_FILE nodes in order to add DEPENDENCY nodes to the graph.
*
* Supports:
* - requirements.txt (with all PEP 440 version specifiers, extras, environment markers)
* - pyproject.toml (PEP 621 `[project.dependencies]` and Poetry `[tool.poetry.dependencies]`)
* - setup.cfg (`[options] install_requires`)
*/
class DependenciesFromRequirementsTxtPass(cpg: Cpg) extends CpgPass(cpg) {
private val logger: Logger = LoggerFactory.getLogger(classOf[DependenciesFromRequirementsTxtPass])

/** Regex for requirements.txt lines: package name (with optional extras), optional version specifier, optional env
* marker. Supports: ==, >=, <=, ~=, !=, >, < specifiers.
*/
private val RequirementsLinePattern: Regex =
"""^\s*([A-Za-z0-9][\w.\-]*)(?:\[[^\]]*\])?\s*(?:(~=|==|!=|>=|<=|>|<)\s*([^\s;,#]+))?\s*(?:;.*)?(?:#.*)?$""".r

override def run(dstGraph: DiffGraphBuilder): Unit = {
cpg.configFile.filter(_.name.endsWith("requirements.txt")).foreach { node =>
val lines = node.content.split("\n")
lines.filter(_.matches("^[^=]+==[^=]+$")).foreach { line =>
val keyValPattern: Regex = "^([^=]+)==([^=]+)$".r
for (patternMatch <- keyValPattern.findAllMatchIn(line)) {
val name = patternMatch.group(1)
val version = patternMatch.group(2)
val node = NewDependency().name(name).version(version).dependencyGroupId(name)
dstGraph.addNode(node)
cpg.configFile.foreach { node =>
val name = node.name
if (name.endsWith("requirements.txt")) {
parseRequirementsTxt(node.content, dstGraph)
} else if (name.endsWith("pyproject.toml")) {
parsePyprojectToml(node.content, dstGraph)
} else if (name.endsWith("setup.cfg")) {
parseSetupCfg(node.content, dstGraph)
}
}
}

private def parseRequirementsTxt(content: String, dstGraph: DiffGraphBuilder): Unit = {
content.split("\n").foreach { rawLine =>
val line = rawLine.trim
// Skip empty lines, comments, includes (-r, -c), and option flags (--)
if (line.nonEmpty && !line.startsWith("#") && !line.startsWith("-r ") && !line.startsWith("-c ") && !line
.startsWith("--")) {
line match {
case RequirementsLinePattern(pkgName, specifier, version) =>
val depVersion = Option(version).getOrElse("")
val dep = NewDependency().name(pkgName.trim).version(depVersion).dependencyGroupId(pkgName.trim)
dstGraph.addNode(dep)
case _ => // skip lines that don't match (e.g., URLs, editable installs)
}
}
}
}

private def parsePyprojectToml(content: String, dstGraph: DiffGraphBuilder): Unit = {
// Try PEP 621 [project] dependencies first
parsePep621Dependencies(content, dstGraph)
// Try Poetry [tool.poetry.dependencies]
parsePoetryDependencies(content, dstGraph)
}

/** Parse PEP 621 style: [project] dependencies = ["flask>=2.0", "requests"] */
private def parsePep621Dependencies(content: String, dstGraph: DiffGraphBuilder): Unit = {
val lines = content.split("\n")
var inProject = false
var inDepArray = false
val depLines = scala.collection.mutable.ArrayBuffer[String]()

for (line <- lines) {
val trimmed = line.trim
if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
if (inDepArray) inDepArray = false
inProject = trimmed == "[project]"
} else if (inProject && trimmed.startsWith("dependencies")) {
// Could be single-line or multi-line array
val afterEquals = trimmed.dropWhile(_ != '=').drop(1).trim
if (afterEquals.startsWith("[")) {
if (afterEquals.contains("]")) {
// Single-line array
extractQuotedStrings(afterEquals).foreach(depLines.addOne)
} else {
inDepArray = true
}
}
} else if (inDepArray) {
if (trimmed.startsWith("]")) {
inDepArray = false
} else {
extractQuotedStrings(trimmed).foreach(depLines.addOne)
}
}
}

depLines.foreach(depStr => parseRequirementString(depStr, dstGraph))
}

/** Parse Poetry style: [tool.poetry.dependencies] flask = "^2.0" requests = {version = "^2.28", optional = true} */
private def parsePoetryDependencies(content: String, dstGraph: DiffGraphBuilder): Unit = {
val lines = content.split("\n")
var inPoetryDeps = false

for (line <- lines) {
val trimmed = line.trim
if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
inPoetryDeps = trimmed == "[tool.poetry.dependencies]"
} else if (inPoetryDeps && trimmed.contains("=") && !trimmed.startsWith("#")) {
val parts = trimmed.split("=", 2)
val pkgName = parts(0).trim
if (pkgName != "python" && pkgName.nonEmpty) {
val versionPart = parts(1).trim.stripPrefix("\"").stripSuffix("\"")
// Handle table syntax {version = "^1.0", ...}
val version = if (versionPart.startsWith("{")) {
val versionMatch = """version\s*=\s*"([^"]+)"""".r.findFirstMatchIn(versionPart)
versionMatch.map(_.group(1)).getOrElse("")
} else {
versionPart
}
val dep = NewDependency().name(pkgName).version(version).dependencyGroupId(pkgName)
dstGraph.addNode(dep)
}
}
}
}

/** Parse setup.cfg [options] install_requires = flask>=2.0 requests */
private def parseSetupCfg(content: String, dstGraph: DiffGraphBuilder): Unit = {
val lines = content.split("\n")
var inOptions = false
var inInstallReqs = false
var foundFirstIndent = false

for (line <- lines) {
val trimmed = line.trim
if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
inOptions = trimmed == "[options]"
inInstallReqs = false
foundFirstIndent = false
} else if (inOptions && trimmed.startsWith("install_requires")) {
inInstallReqs = true
// Check if there are deps on the same line after '='
val afterEquals = trimmed.dropWhile(_ != '=').drop(1).trim
if (afterEquals.nonEmpty) {
parseRequirementString(afterEquals, dstGraph)
}
} else if (inInstallReqs) {
// Continuation lines must be indented
if (line.nonEmpty && (line.startsWith(" ") || line.startsWith("\t"))) {
if (trimmed.nonEmpty && !trimmed.startsWith("#")) {
parseRequirementString(trimmed, dstGraph)
}
} else {
inInstallReqs = false
}
}
}
}

/** Parse a single requirement string like "flask>=2.0" or "requests" into a dependency node. */
private def parseRequirementString(reqStr: String, dstGraph: DiffGraphBuilder): Unit = {
val cleaned = reqStr.stripPrefix("\"").stripSuffix("\"").stripPrefix("'").stripSuffix("'").trim
.replaceAll(",\\s*$", "") // strip trailing comma
if (cleaned.nonEmpty) {
cleaned match {
case RequirementsLinePattern(pkgName, _, version) =>
val depVersion = Option(version).getOrElse("")
val dep = NewDependency().name(pkgName.trim).version(depVersion).dependencyGroupId(pkgName.trim)
dstGraph.addNode(dep)
case _ => // skip
}
}
}

private def extractQuotedStrings(s: String): Seq[String] = {
""""([^"]+)"|'([^']+)'""".r.findAllMatchIn(s).map { m =>
Option(m.group(1)).getOrElse(m.group(2))
}.toSeq
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package io.joern.pysrc2cpg.passes

import io.joern.pysrc2cpg.testfixtures.PySrc2CpgFixture
import io.shiftleft.semanticcpg.language.*

class DependencyPassTests extends PySrc2CpgFixture(withOssDataflow = false) {

"requirements.txt with exact pinning (==)" should {
lazy val cpg = code(
"""Flask==1.1.2
|requests==2.28.0
|""".stripMargin,
"requirements.txt"
)

"create dependency nodes with name and version" in {
val deps = cpg.dependency.l.sortBy(_.name)
deps.size shouldBe 2
deps.head.name shouldBe "Flask"
deps.head.version shouldBe "1.1.2"
deps.last.name shouldBe "requests"
deps.last.version shouldBe "2.28.0"
}
}

"requirements.txt with flexible specifiers" should {
lazy val cpg = code(
"""flask>=2.0.0
|requests~=2.28
|numpy<=1.24.0
|pandas!=1.5.0
|scipy>1.9
|matplotlib<3.8
|bare-package
|# this is a comment
|-r other-requirements.txt
|--index-url https://pypi.org/simple
|package-with-extras[security]>=1.0
|conditional-pkg>=1.0; python_version >= "3.8"
|""".stripMargin,
"requirements.txt"
)

"create dependency nodes for all specifier styles" in {
val deps = cpg.dependency.l
val depMap = deps.map(d => d.name -> d.version).toMap
depMap("flask") shouldBe "2.0.0"
depMap("requests") shouldBe "2.28"
depMap("numpy") shouldBe "1.24.0"
depMap("pandas") shouldBe "1.5.0"
depMap("scipy") shouldBe "1.9"
depMap("matplotlib") shouldBe "3.8"
depMap("bare-package") shouldBe ""
depMap("package-with-extras") shouldBe "1.0"
depMap("conditional-pkg") shouldBe "1.0"
}

"skip comments, includes, and option flags" in {
val depNames = cpg.dependency.name.l.toSet
depNames should not contain "comment"
depNames should not contain "other-requirements.txt"
depNames should not contain "index-url"
}
}

"pyproject.toml with PEP 621 dependencies" should {
lazy val cpg = code(
"""[project]
|name = "my-project"
|dependencies = [
| "flask>=2.0",
| "requests~=2.28",
| "click",
|]
|
|[tool.other]
|something = "else"
|""".stripMargin,
"pyproject.toml"
)

"create dependency nodes from PEP 621 format" in {
val deps = cpg.dependency.l
val depMap = deps.map(d => d.name -> d.version).toMap
depMap("flask") shouldBe "2.0"
depMap("requests") shouldBe "2.28"
depMap("click") shouldBe ""
}
}

"pyproject.toml with Poetry dependencies" should {
lazy val cpg = code(
"""[tool.poetry.dependencies]
|python = "^3.8"
|flask = "^2.0"
|requests = {version = "^2.28", optional = true}
|
|[tool.poetry.dev-dependencies]
|pytest = "^7.0"
|""".stripMargin,
"pyproject.toml"
)

"create dependency nodes from Poetry format" in {
val deps = cpg.dependency.l
val depNames = deps.map(_.name).toSet
depNames should contain("flask")
depNames should contain("requests")
depNames should not contain "python"

val depMap = deps.map(d => d.name -> d.version).toMap
depMap("flask") shouldBe "^2.0"
depMap("requests") shouldBe "^2.28"
}
}

"setup.cfg with install_requires" should {
lazy val cpg = code(
"""[metadata]
|name = my-project
|
|[options]
|install_requires =
| flask>=2.0
| requests~=2.28
| click
|
|[options.extras_require]
|dev = pytest
|""".stripMargin,
"setup.cfg"
)

"create dependency nodes from setup.cfg format" in {
val deps = cpg.dependency.l
val depMap = deps.map(d => d.name -> d.version).toMap
depMap("flask") shouldBe "2.0"
depMap("requests") shouldBe "2.28"
depMap("click") shouldBe ""
}
}

}
Loading
Loading