Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions src/class/SimpleData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import addBins_ from "../methods/analyzing/addBins.js"
import addOutliers_ from "../methods/analyzing/addOutliers.js"
import excludeOutliers_ from "../methods/analyzing/excludeOutliers.js"
import correlation_ from "../methods/analyzing/correlation.js"
import linearRegression_ from "../methods/analyzing/linearRegression.js"
import addItems_ from "../methods/restructuring/addItems.js"
import getUniqueValues_ from "../methods/exporting/getUniqueValues.js"
import summarize_ from "../methods/analyzing/summarize.js"
Expand Down Expand Up @@ -1004,6 +1005,7 @@ export default class SimpleData {
correlation({
key1,
key2,
nbDigits = 4,
overwrite = true,
nbTestedValues = 10000,
}: {
Expand All @@ -1018,6 +1020,35 @@ export default class SimpleData {
cloneData(this._data),
key1,
key2,
nbDigits,
this.verbose,
this.noTests ? 0 : nbTestedValues
)
overwrite && this.#updateSimpleData(this._tempData)

return this
}

@logCall()
linearRegression({
key1,
key2,
nbDigits = 4,
overwrite = true,
nbTestedValues = 10000,
}: {
key1?: string
key2?: string | string[]
overwrite?: boolean
nbDigits?: number
nbTestedValues?: number
} = {}): this {
this._overwrite = overwrite
this._tempData = linearRegression_(
cloneData(this._data),
key1,
key2,
nbDigits,
this.verbose,
this.noTests ? 0 : nbTestedValues
)
Expand Down
3 changes: 2 additions & 1 deletion src/methods/analyzing/correlation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export default function correlation(
data: SimpleDataItem[],
key1?: string,
key2?: string | string[],
nbDigits = 4,
verbose = false,
nbTestedValues = 10000
): SimpleDataItem[] {
Expand Down Expand Up @@ -83,7 +84,7 @@ export default function correlation(

correlationData.push({
...corr,
correlation: Number.isNaN(result) ? NaN : round(result, 4),
correlation: Number.isNaN(result) ? NaN : round(result, nbDigits),
})
}

Expand Down
95 changes: 95 additions & 0 deletions src/methods/analyzing/linearRegression.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { SimpleDataItem } from "../../types/SimpleData.types.js"
import { linearRegression as linReg, combinations } from "simple-statistics"
import checkTypeOfKey from "../../helpers/checkTypeOfKey.js"
import hasKey from "../../helpers/hasKey.js"
import round from "../../helpers/round.js"

export default function linearRegression(
data: SimpleDataItem[],
key1?: string,
key2?: string | string[],
nbDigits = 4,
verbose = false,
nbTestedValues = 10000
): SimpleDataItem[] {
const linearRegressions = []

if (
key1 === undefined &&
(key2 === undefined || (Array.isArray(key2) && key2.length === 0))
) {
const keys = Object.keys(data[0]).filter((d) =>
checkTypeOfKey(data, d, "number", 1, nbTestedValues, verbose)
)
const combi = combinations(keys, 2)

for (const c of combi) {
linearRegressions.push({
key1: c[0],
key2: c[1],
})
}
} else if (typeof key1 === "string" && Array.isArray(key2)) {
if (!hasKey(data[0], key1)) {
throw new Error(`No key ${key1} in data`)
}
if (!checkTypeOfKey(data, key1, "number", 1, nbTestedValues, verbose)) {
throw new Error(`At least one value in ${key1} is not a number.`)
}

for (const key of key2) {
if (!hasKey(data[0], key)) {
throw new Error(`No key ${key} in data`)
}
if (
!checkTypeOfKey(data, key, "number", 1, nbTestedValues, verbose)
) {
throw new Error(`At least one value in ${key} is not a number.`)
}
linearRegressions.push({
key1: key1,
key2: key,
})
}
} else if (typeof key1 === "string" && typeof key2 === "string") {
if (!hasKey(data[0], key1)) {
throw new Error(`No key ${key1} in data`)
}
if (!checkTypeOfKey(data, key1, "number", 1, nbTestedValues, verbose)) {
throw new Error(`At least one value in ${key1} is not a number.`)
}
if (!hasKey(data[0], key2)) {
throw new Error(`No key ${key2} in data`)
}
if (!checkTypeOfKey(data, key2, "number", 1, nbTestedValues, verbose)) {
throw new Error(`At least one value in ${key2} is not a number.`)
}
linearRegressions.push({
key1: key1,
key2: key2,
})
} else {
throw new Error(
"key1 should be a string and key2 should be a string or array of strings"
)
}

const linearRegressionData: SimpleDataItem[] = []

for (const lr of linearRegressions) {
const coords = data.map((d) => [
d[lr.key1] as number,
d[lr.key2] as number,
])

const result = linReg(coords)

linearRegressionData.push({
...lr,
slope: Number.isNaN(result.m) ? NaN : round(result.m, nbDigits),
intersect: Number.isNaN(result.b) ? NaN : round(result.b, nbDigits),
})
}

return linearRegressionData
}
1 change: 1 addition & 0 deletions test/integration/SimpleData.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ async function main() {
})
.correlation({ overwrite: false })
.correlation({ key1: "salary", key2: "bonus", overwrite: false })
.linearRegression({ key1: "salary", key2: "bonus", overwrite: false })
.summarize({ overwrite: false })
.summarize({
keyValue: simpleDataMerged.getKeys(),
Expand Down
14 changes: 14 additions & 0 deletions test/unit/methods/analyzing/correlation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,18 @@ describe("correlation", function () {
{ key1: "key2", key2: "key3", correlation: 0.9042 },
])
})

it("should compute all correlations if key1 is undefined and key2 is an empty array, with only two decimals.", function () {
const data = [
{ key1: 1, key2: 2, key3: 3 },
{ key1: 11, key2: 22, key3: 4 },
{ key1: 111, key2: 222, key3: 5 },
]
const correlationData = correlation(data, undefined, [], 2)
assert.deepEqual(correlationData, [
{ key1: "key1", key2: "key2", correlation: 1 },
{ key1: "key1", key2: "key3", correlation: 0.9 },
{ key1: "key2", key2: "key3", correlation: 0.9 },
])
})
})
73 changes: 73 additions & 0 deletions test/unit/methods/analyzing/linearRegression.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import assert from "assert"
import linearRegression from "../../../../src/methods/analyzing/linearRegression.js"

describe("correlation", function () {
it("should apply linear regression", function () {
const data = [
{ key1: 0, key2: 0 },
{ key1: 1, key2: 1 },
]
const linearRegressionData = linearRegression(data, "key1", "key2")
assert.deepEqual(linearRegressionData, [
{ key1: "key1", key2: "key2", slope: 1, intersect: 0 },
])
})

it("should compute all linear regressions if key1 and key2 are undefined", function () {
const data = [
{ key1: 1, key2: 2, key3: 3 },
{ key1: 11, key2: 22, key3: 4 },
{ key1: 111, key2: 222, key3: 5 },
]
const linearRegressionData = linearRegression(data)
assert.deepEqual(linearRegressionData, [
{ key1: "key1", key2: "key2", slope: 2, intersect: 0 },
{ key1: "key1", key2: "key3", slope: 0.0149, intersect: 3.3905 },
{ key1: "key2", key2: "key3", slope: 0.0074, intersect: 3.3905 },
])
})

it("should compute multiple linear regressions if key2 is an array", function () {
const data = [
{ key1: 1, key2: 2, key3: 3 },
{ key1: 11, key2: 22, key3: 4 },
{ key1: 111, key2: 222, key3: 5 },
]
const linearRegressionData = linearRegression(data, "key1", [
"key2",
"key3",
])
assert.deepEqual(linearRegressionData, [
{ key1: "key1", key2: "key2", slope: 2, intersect: 0 },
{ key1: "key1", key2: "key3", slope: 0.0149, intersect: 3.3905 },
])
})

it("should compute all linear regressions if key1 is undefined and key2 is an empty array", function () {
const data = [
{ key1: 1, key2: 2, key3: 3 },
{ key1: 11, key2: 22, key3: 4 },
{ key1: 111, key2: 222, key3: 5 },
]
const linearRegressionData = linearRegression(data, undefined, [])
assert.deepEqual(linearRegressionData, [
{ key1: "key1", key2: "key2", slope: 2, intersect: 0 },
{ key1: "key1", key2: "key3", slope: 0.0149, intersect: 3.3905 },
{ key1: "key2", key2: "key3", slope: 0.0074, intersect: 3.3905 },
])
})

it("should compute all linear regressions if key1 is undefined and key2 is an empty array, with only two decimals", function () {
const data = [
{ key1: 1, key2: 2, key3: 3 },
{ key1: 11, key2: 22, key3: 4 },
{ key1: 111, key2: 222, key3: 5 },
]
const linearRegressionData = linearRegression(data, undefined, [], 2)
assert.deepEqual(linearRegressionData, [
{ key1: "key1", key2: "key2", slope: 2, intersect: 0 },
{ key1: "key1", key2: "key3", slope: 0.01, intersect: 3.39 },
{ key1: "key2", key2: "key3", slope: 0.01, intersect: 3.39 },
])
})
})