| line | % | coverage | branch |
| 30 | 50 | T | F | sample__mix64(void) |
| 40 | 100 | T | F | { |
| 52 | 100 | T | F | /* Uniform integer in [0, upper) â rejection loop, no modulo bias */ |
| 73 | 100 | T | F | // Helpers for Random Number Generation |
| 79 | 100 | T | F | // This perfectly replicates R's pt(..., ncp) exactness without requiring complex Beta functions. |
| 81 | 100 | T | F | if (df <= 0.0) return 0.0; |
| 50 | T | F | if (df <= 0.0) return 0.0; |
| 83 | 100 | T | F | double step = 1.0 / n_steps; |
| 87 | 100 | T | F | double root_half = 0.70710678118654752440; // 1 / sqrt(2) |
| 93 | 100 | T | F | double log_M = log_coef + (df - 1.0) * log(w) - half_df * w * w; |
| 94 | 100 | T | F | double M = exp(log_M); |
| 97 | 100 | T | F | double pnorm_val = 0.5 * erfc(-z * root_half); |
| 98 | 100 | T | F | double weight = (i % 2 != 0) ? 4.0 : 2.0; |
| 111 | 100 | T | F | |
| 113 | 100 | T | F | return ((RankInfo*)a)->idx - ((RankInfo*)b)->idx; |
| 117 | 100 | T | F | RankInfo *restrict items = safemalloc(n * sizeof(RankInfo)); |
| 124 | 100 | T | F | for (size_t i = 0; i < n; ) { |
| 126 | 100 | T | F | while (j < n && items[j].val == items[i].val) j++; |
| 127 | 100 | T | F | double avg_rank = (i + 1 + j) / 2.0; |
| 135 | 100 | T | F | // Generates a single binomial random variate. |
| 143 | 100 | T | F | if (Drand01() <= prob) successes++; |
| 148 | 50 | T | F | static double log_choose(size_t n, size_t k) { |
| 0 | T | F | static double log_choose(size_t n, size_t k) { |
| 149 | 50 | T | F | return lgamma((double)n + 1.0) - lgamma((double)k + 1.0) - lgamma((double)(n - k) + 1.0); |
| 150 | 100 | T | F | } |
| 153 | 50 | T | F | static void calc_tails_logspace(size_t a, size_t min_x, size_t max_x, double omega, const double *logdc, double *restrict lower_tail, double *restrict upper_tail) { |
| 156 | 100 | T | F | for(size_t k = 0; k <= max_x - min_x; ++k) { |
| 158 | 100 | T | F | if (d_val > max_d) max_d = d_val; |
| 161 | 100 | T | F | double sum_d = 0.0; |
| 168 | 100 | T | F | |
| 170 | 100 | T | F | double p_prob = exp(logdc[k] + log_omega * (min_x + k) - max_d) / sum_d; |
| 179 | 100 | T | F | size_t r1 = a + b, r2 = c + d, c1 = a + c; |
| 180 | 100 | T | F | size_t min_x = (r2 > c1) ? 0 : c1 - r2; |
| 181 | 50 | T | F | size_t max_x = (r1 < c1) ? r1 : c1; |
| 183 | 50 | T | F | bool is_less = (strcmp(alt, "less") == 0); |
| 188 | 100 | T | F | for(size_t x = min_x; x <= max_x; ++x) { |
| 189 | 100 | T | F | logdc[x - min_x] = log_choose(r1, x) + log_choose(r2, c1 - x) - denom; |
| 191 | 100 | T | F | |
| 198 | 100 | T | F | for (unsigned short int i = 0; i < 3000; i++) { |
| 199 | 100 | T | F | double log_mid = 0.5 * (log_low + log_high); |
| 200 | 100 | T | F | double max_d = -1e300; |
| 202 | 50 | T | F | double d_val = logdc[k] + log_mid * (min_x + k); |
| 207 | 100 | T | F | double p_prob = exp(logdc[k] + log_mid * (min_x + k) - max_d); |
| 208 | 100 | T | F | sum_d += p_prob; |
| 210 | 100 | T | F | } |
| 221 | 100 | T | F | *ci_high = INFINITY; |
| 226 | 100 | T | F | if (a != min_x) { |
| 232 | 100 | T | F | double err = fabs(ut - target_alpha); |
| 233 | 100 | T | F | if (err < best_err) { best_err = err; best = mid; } |
| 234 | 100 | T | F | if (ut > target_alpha) log_high = log_mid; |
| 235 | 100 | T | F | else log_low = log_mid; |
| 239 | 100 | T | F | } |
| 241 | 100 | T | F | |
| 246 | 50 | T | F | double log_low = -100.0, log_high = 100.0, best = 1.0, best_err = 1e9, lt, ut; |
| 261 | 100 | T | F | } |
| 266 | 100 | T | F | size_t min_x = (r2 > c1) ? 0 : c1 - r2; |
| 269 | 100 | T | F | double *logdc = (double*)safemalloc((max_x - min_x + 1) * sizeof(double)); |
| 50 | T | F | double *logdc = (double*)safemalloc((max_x - min_x + 1) * sizeof(double)); |
| 272 | 100 | T | F | logdc[x - min_x] = log_choose(r1, x) + log_choose(r2, c1 - x) - denom; |
| 281 | 100 | T | F | } else { |
| 282 | 100 | T | F | double p_obs = exp(logdc[a - min_x]); |
| 283 | 100 | T | F | double relErr = 1.0 + 1e-7; |
| 100 | T | F | double relErr = 1.0 + 1e-7; |
| 286 | 100 | T | F | if (p_cur <= p_obs * relErr) p_val += p_cur; |
| 299 | 100 | T | F | * Utilizes a relative tolerance check to prevent dropping micro-variance features. |
| 301 | 50 | T | F | static int sweep_matrix_ols(double *restrict A, size_t n, bool *restrict aliased) { |
| 50 | T | F | static int sweep_matrix_ols(double *restrict A, size_t n, bool *restrict aliased) { |
| 50 | T | F | static int sweep_matrix_ols(double *restrict A, size_t n, bool *restrict aliased) { |
| 305 | 50 | T | F | // Save the original diagonal values to use as a baseline for relative variance |
| 307 | 50 | T | F | aliased[k] = FALSE; |
| 50 | T | F | aliased[k] = FALSE; |
| 50 | T | F | aliased[k] = FALSE; |
| 312 | 50 | T | F | // Check pivot for collinearity using a RELATIVE tolerance |
| 100 | T | F | // Check pivot for collinearity using a RELATIVE tolerance |
| 313 | 100 | T | F | // (Fallback to a tiny absolute tolerance of 1e-24 to catch literal zero vectors) |
| 322 | 50 | T | F | } |
| 325 | 100 | T | F | A[k * n + k] = 1.0; |
| 328 | 0 | T | F | if (i != k && A[i * n + k] != 0.0) { |
| 0 | T | F | if (i != k && A[i * n + k] != 0.0) { |
| 0 | T | F | if (i != k && A[i * n + k] != 0.0) { |
| 331 | 0 | T | F | for (size_t j = 0; j < n; j++) { |
| 340 | 50 | T | F | |
| 50 | T | F | |
| 344 | 100 | T | F | if (row_hashes) { |
| 350 | 50 | T | F | } else if (data_hoa) { |
| 50 | T | F | } else if (data_hoa) { |
| 353 | 50 | T | F | AV*restrict av = (AV*)SvRV(*col); |
| 355 | 0 | T | F | } |
| 359 | 0 | T | F | return NAN; // Catch strings like "blue" |
| 366 | 0 | T | F | AV *cols = newAV(); |
| 367 | 0 | T | F | if (data_hoa) { |
| 376 | 100 | T | F | while ((entry = hv_iternext(row_hashes[0]))) { |
| 378 | 100 | T | F | } |
| 380 | 100 | T | F | return cols; |
| 50 | T | F | return cols; |
| 50 | T | F | return cols; |
| 384 | 50 | T | F | static double evaluate_term(HV *restrict data_hoa, HV **restrict row_hashes, unsigned int i, const char *restrict term) { |
| 386 | 50 | T | F | |
| 50 | T | F | |
| 50 | T | F | |
| 391 | 100 | T | F | double left = evaluate_term(data_hoa, row_hashes, i, term_cpy); |
| 50 | T | F | double left = evaluate_term(data_hoa, row_hashes, i, term_cpy); |
| 392 | 100 | T | F | double right = evaluate_term(data_hoa, row_hashes, i, colon + 1); |
| 402 | 50 | T | F | char *restrict caret = strchr(inner, '^'); |
| 404 | 0 | T | F | if (caret) { |
| 0 | T | F | if (caret) { |
| 0 | T | F | if (caret) { |
| 408 | 50 | T | F | double v = get_data_value(data_hoa, row_hashes, i, inner); |
| 410 | 50 | T | F | |
| 50 | T | F | |
| 50 | T | F | |
| 415 | 50 | T | F | Safefree(term_cpy); |
| 50 | T | F | Safefree(term_cpy); |
| 430 | 100 | T | F | SV **restrict col = hv_fetch(data_hoa, var, strlen(var), 0); |
| 431 | 50 | T | F | if (col && SvROK(*col) && SvTYPE(SvRV(*col)) == SVt_PVAV) { |
| 447 | 100 | T | F | if (row_hashes) { |
| 448 | 100 | T | F | val = hv_fetch(row_hashes[i], var, strlen(var), 0); |
| 456 | 50 | T | F | AV*restrict av = (AV*)SvRV(*col); |
| 457 | 100 | T | F | val = av_fetch(av, i, 0); |
| 461 | 100 | T | F | return savepv(SvPV_nolen(*val)); /* Allocates and returns string */ |
| 464 | 100 | T | F | } |
| 100 | T | F | } |
| 467 | 100 | T | F | typedef struct { |
| 477 | 100 | T | F | /* Stabilize sort by falling back to original index */ |
| 483 | 50 | T | F | /* Item used to sort values while remembering their original index, |
| 497 | 100 | T | F | /* Compute 1-based average ranks with tie-breaking into out[]. |
| 498 | 100 | T | F | * in[] is not modified. */ |
| 501 | 100 | T | F | Newx(ri, n, RankItem); |
| 50 | T | F | Newx(ri, n, RankItem); |
| 502 | 100 | T | F | for (size_t i = 0; i < n; i++) { ri[i].val = in[i]; ri[i].idx = i; } |
| 503 | 50 | T | F | qsort(ri, n, sizeof(RankItem), cmp_rank_item); |
| 504 | 50 | T | F | |
| 509 | 50 | T | F | while (j + 1 < n && ri[j + 1].val == ri[j].val) j++; |
| 517 | 100 | T | F | |
| 519 | 50 | T | F | * Returns NAN when either variable has zero variance (matches R). */ |
| 50 | T | F | * Returns NAN when either variable has zero variance (matches R). */ |
| 526 | 100 | T | F | double num = (double)n * sxy - sx * sy; |
| 542 | 50 | T | F | for (size_t i = 0; i < n - 1; i++) { |
| 544 | 50 | T | F | int sx = (x[i] > x[j]) - (x[i] < x[j]); /* sign of x[i]-x[j] */ |
| 548 | 50 | T | F | else if (sy == 0) tie_y++; |
| 550 | 50 | T | F | else D++; |
| 554 | 50 | T | F | if (denom == 0.0) return NAN; |
| 556 | 50 | T | F | } |
| 558 | 100 | T | F | /* Single dispatch: compute correlation according to method string. |
| 564 | 50 | T | F | Newx(rx, n, double); Newx(ry, n, double); |
| 565 | 100 | T | F | rank_data(x, rx, n); |
| 567 | 100 | T | F | double r = pearson_corr(rx, ry, n); |
| 574 | 100 | T | F | return pearson_corr(x, y, n); |
| 100 | T | F | return pearson_corr(x, y, n); |
| 575 | 100 | T | F | } |
| 50 | T | F | } |
| 583 | 100 | T | F | int m; |
| 586 | 50 | T | F | c = 1.0; d = 1.0 - qab * x / qap; |
| 589 | 50 | T | F | for (m = 1; m <= MAX_ITER; m++) { |
| 592 | 100 | T | F | d = 1.0 + aa * d; |
| 597 | 100 | T | F | aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2)); |
| 609 | 0 | T | F | if (x <= 0.0) return 0.0; |
| 618 | 50 | T | F | double prob_2tail = incbeta(df / 2.0, 0.5, x); |
| 620 | 100 | T | F | if (strcmp(alt, "greater") == 0) return (t > 0) ? 0.5 * prob_2tail : 1.0 - 0.5 * prob_2tail; |
| 625 | 100 | T | F | static double qt_tail(double df, double p_tail) { |
| 626 | 100 | T | F | double low = 0.0, high = 1.0; |
| 629 | 50 | T | F | low = high; |
| 50 | T | F | low = high; |
| 50 | T | F | low = high; |
| 633 | 100 | T | F | // Bisect to find the root |
| 639 | 100 | T | F | } else { |
| 100 | T | F | } else { |
| 643 | 100 | T | F | } |
| 50 | T | F | } |
| 648 | 50 | T | F | double da = *(const double*restrict)a; |
| 653 | 100 | T | F | static size_t calculate_sturges_bins(size_t n) { |
| 655 | 100 | T | F | return (size_t)(log((double)n) / log(2.0) + 1.0); |
| 658 | 50 | T | F | // Logic for distributing data into bins (Optimized to O(N)) |
| 681 | 100 | T | F | /* Adjust for exact boundaries (R's right-inclusive default: (a, b]) */ |
| 687 | 100 | T | F | // Conversely, if floating-point truncation placed it too low, push it up |
| 691 | 100 | T | F | counts[idx]++; |
| 709 | 100 | T | F | double approx_pnorm(double x) { |
| 724 | 100 | T | F | double x, r, y; |
| 50 | T | F | double x, r, y; |
| 50 | T | F | double x, r, y; |
| 727 | 100 | T | F | r = y * y; |
| 728 | 100 | T | F | x = y * (((a[3]*r + a[2])*r + a[1])*r + a[0]) / |
| 730 | 100 | T | F | } else { |
| 735 | 100 | T | F | r * (c[5] + r * (c[6] + r * (c[7] + r * c[8]))))))); |
| 100 | T | F | r * (c[5] + r * (c[6] + r * (c[7] + r * c[8]))))))); |
| 100 | T | F | r * (c[5] + r * (c[6] + r * (c[7] + r * c[8]))))))); |
| 751 | 50 | T | F | static double spearman_exact_pvalue(double s_obs, size_t n, const char *restrict alt) { |
| 752 | 50 | T | F | int *restrict perm = (int*)safemalloc(n * sizeof(int)); |
| 754 | 50 | T | F | for (size_t i = 0; i < n; i++) { perm[i] = i + 1; c[i] = 0; } |
| 755 | 50 | T | F | |
| 764 | 100 | T | F | if (s_ <= s_obs + 1e-9) count_le++; \ |
| 767 | 100 | T | F | } while (0) |
| 769 | 100 | T | F | TALLY_PERM(); /* initial permutation [1, 2, ..., n] */ |
| 771 | 100 | T | F | unsigned int k = 1; |
| 773 | 100 | T | F | if (c[k] < k) { |
| 100 | T | F | if (c[k] < k) { |
| 784 | 50 | T | F | c[k] = 0; |
| 785 | 50 | T | F | k++; |
| 787 | 100 | T | F | } |
| 789 | 100 | T | F | |
| 791 | 50 | T | F | /* p_le = P(S ⤠s_obs) â¡ P(rho ⥠rho_obs) â upper rho tail |
| 792 | 100 | T | F | * p_ge = P(S ⥠s_obs) â¡ P(rho ⤠rho_obs) â lower rho tail */ |
| 794 | 50 | T | F | double p_ge = (double)count_ge / (double)total; |
| 795 | 50 | T | F | |
| 799 | 50 | T | F | double p = 2.0 * (p_le < p_ge ? p_le : p_ge); |
| 808 | 100 | T | F | double *restrict dp = (double*)safemalloc((max_inv + 1) * sizeof(double)); |
| 810 | 50 | T | F | dp[0] = 1.0; |
| 816 | 100 | T | F | for (int k = 0; k <= current_max_inv; k++) { |
| 817 | 100 | T | F | double sum = 0; |
| 819 | 100 | T | F | sum += dp[k - j]; |
| 825 | 100 | T | F | dp = next_dp; |
| 830 | 100 | T | F | if (i_obs > max_inv) i_obs = max_inv; |
| 834 | 100 | T | F | for (long k = 0; k <= i_obs; k++) p_ge += dp[k]; |
| 836 | 100 | T | F | if (strcmp(alt, "greater") == 0) return p_ge; |
| 839 | 100 | T | F | double p = 2.0 * (p_ge < p_le ? p_ge : p_le); |
| 844 | 100 | T | F | if (f <= 0.0) return 0.0; |
| 847 | 100 | T | F | } |
| 863 | 50 | T | F | } |
| 50 | T | F | } |
| 866 | 100 | T | F | continue; |
| 867 | 50 | T | F | } // Collinear or zero column |
| 874 | 50 | T | F | norm = sqrt(norm); |
| 876 | 100 | T | F | double u1 = X[r][k] - s; |
| 100 | T | F | double u1 = X[r][k] - s; |
| 50 | T | F | double u1 = X[r][k] - s; |
| 50 | T | F | double u1 = X[r][k] - s; |
| 880 | 100 | T | F | double dot = u1 * X[r][j]; |
| 882 | 100 | T | F | double tau = dot / (s * u1); |
| 883 | 100 | T | F | X[r][j] += tau * u1; |
| 899 | 100 | T | F | // --- write_table Helpers --- |
| 900 | 100 | T | F | |
| 901 | 50 | T | F | // Sorts string arrays alphabetically |
| 912 | 50 | T | F | if (!isdigit(s[i])) return 1; |
| 50 | T | F | if (!isdigit(s[i])) return 1; |
| 913 | 50 | T | F | } |
| 916 | 100 | T | F | |
| 920 | 100 | T | F | bool needs_quotes = 0; |
| 933 | 50 | T | F | } |
| 937 | 50 | T | F | PerlIO_puts(fh, str); |
| 939 | 50 | T | F | } |
| 943 | 100 | T | F | size_t sep_len = strlen(sep); |
| 951 | 50 | T | F | } |
| 952 | 50 | T | F | PerlIO_putc(fh, '\n'); |
| 963 | 50 | T | F | double term = 1.0 / a; |
| 50 | T | F | double term = 1.0 / a; |
| 964 | 50 | T | F | double n = 1.0; |
| 966 | 100 | T | F | term *= x / (a + n); |
| 977 | 100 | T | F | double h = d, i = 1.0; |
| 978 | 50 | T | F | while (i < 10000) { // Safety bound |
| 983 | 100 | T | F | c = b + an / c; |
| 984 | 100 | T | F | if (fabs(c) < 1e-30) c = 1e-30; |
| 985 | 100 | T | F | d = 1.0 / d; |
| 989 | 100 | T | F | i += 1.0; |
| 1003 | 50 | T | F | #define M_SQRT1_2 0.70710678118654752440 |
| 1004 | 100 | T | F | #endif |
| 1009 | 100 | T | F | if (k > n / 2) k = n - k; |
| 1010 | 100 | T | F | long double res = 1.0L; |
| 1014 | 100 | T | F | return res; |
| 1030 | 50 | T | F | for (int i = max_u; i >= j + m; i--) w[i] -= w[i - j - m]; |
| 1035 | 100 | T | F | |
| 1037 | 100 | T | F | double result = (double)(cum_p / total); |
| 100 | T | F | double result = (double)(cum_p / total); |
| 1039 | 100 | T | F | Safefree(w); |
| 1041 | 100 | T | F | } |
| 1059 | 50 | T | F | for (int i = 0; i <= k; i++) cum_p += w[i]; |
| 1060 | 50 | T | F | |
| 1062 | 100 | T | F | double result = (double)(cum_p / total); |
| 1070 | 0 | T | F | double db = ((const RankInfo*)b)->val; |
| 1071 | 0 | T | F | return (da > db) - (da < db); |
| 1073 | 0 | T | F | |
| 1078 | 0 | T | F | double tie_adj = 0.0; |
| 1082 | 0 | T | F | while (j < n && ri[j].val == ri[i].val) j++; |
| 1087 | 0 | T | F | i = j; |
| 1092 | 0 | T | F | #ifndef M_PI_2 |
| 1105 | 100 | T | F | if (n < 0) return 1.0 / r_pow_di(x, -n); |
| 1106 | 100 | T | F | double val = 1.0; |
| 1108 | 100 | T | F | return val; |
| 1115 | 100 | T | F | if(x <= 0.) { |
| 1116 | 100 | T | F | if(lower) p = 0.; |
| 1124 | 100 | T | F | s += exp(k * k * z - w); |
| 1125 | 100 | T | F | } |
| 1131 | 50 | T | F | s = -1.0; |
| 1132 | 0 | T | F | if(lower) { |
| 1146 | 100 | T | F | } |
| 1147 | 100 | T | F | |
| 1148 | 100 | T | F | // Auxiliary routines used by K2x() for matrix operations |
| 1152 | 100 | T | F | double s = 0.; |
| 1156 | 50 | T | F | } |
| 1158 | 100 | T | F | |
| 1159 | 100 | T | F | static void m_power(double *A, int eA, double *V, int *eV, int m, int n) { |
| 1160 | 100 | T | F | if(n == 1) { |
| 1161 | 100 | T | F | for(int i = 0; i < m * m; i++) V[i] = A[i]; |
| 1170 | 100 | T | F | for(int i = 0; i < m * m; i++) V[i] = B[i]; |
| 1172 | 50 | T | F | } else { |
| 1191 | 100 | T | F | for(int i = 0; i < m; i++) { |
| 50 | T | F | for(int i = 0; i < m; i++) { |
| 1193 | 50 | T | F | if(i - j + 1 < 0) H[i * m + j] = 0; |
| 100 | T | F | if(i - j + 1 < 0) H[i * m + j] = 0; |
| 100 | T | F | if(i - j + 1 < 0) H[i * m + j] = 0; |
| 1194 | 50 | T | F | else H[i * m + j] = 1; |
| 1197 | 100 | T | F | for(int i = 0; i < m; i++) { |
| 100 | T | F | for(int i = 0; i < m; i++) { |
| 1198 | 100 | T | F | H[i * m] -= r_pow_di(h, i + 1); |
| 100 | T | F | H[i * m] -= r_pow_di(h, i + 1); |
| 1204 | 100 | T | F | for(int j = 0; j < m; j++) { |
| 1205 | 100 | T | F | if(i - j + 1 > 0) { |
| 1206 | 100 | T | F | for(int g = 1; g <= i - j + 1; g++) H[i * m + j] /= g; |
| 1215 | 100 | T | F | for(int i = 1; i <= n; i++) { |
| 1225 | 100 | T | F | return s; |
| 1226 | 100 | T | F | } |
| 1229 | 100 | T | F | static void calc_2sample_stats(double *x, size_t nx, double *y, size_t ny, |
| 1230 | 100 | T | F | double *d, double *d_plus, double *d_minus) { |
| 1231 | 100 | T | F | qsort(x, nx, sizeof(double), compare_doubles); |
| 1232 | 100 | T | F | qsort(y, ny, sizeof(double), compare_doubles); |
| 1247 | 50 | T | F | double diff = cdf1 - cdf2; |
| 1255 | 50 | T | F | *d_minus = max_d_minus; |
| 0 | T | F | *d_minus = max_d_minus; |
| 1268 | 50 | T | F | u[0] = 0.; |
| 1269 | 50 | T | F | |
| 1272 | 100 | T | F | else u[j] = u[j - 1]; |
| 1275 | 50 | T | F | if(psmirnov_exact_test(q, i / md, 0., two_sided)) u[0] = 1.; |
| 1279 | 50 | T | F | double v = (double)(i) / (double)(i + j); |
| 1283 | 100 | T | F | } |
| 1288 | 100 | T | F | } |
| 1304 | 50 | T | F | // Default: 1 - P(T < qu) |
| 50 | T | F | // Default: 1 - P(T < qu) |
| 50 | T | F | // Default: 1 - P(T < qu) |
| 1309 | 50 | T | F | |
| 1310 | 100 | T | F | // Bisection algorithm to find the inverse F-distribution (Quantile function) |
| 50 | T | F | // Bisection algorithm to find the inverse F-distribution (Quantile function) |
| 1313 | 50 | T | F | if (p <= 0.0) return 0.0; |
| 1320 | 100 | T | F | if (high > 1e100) break; /* Fallback limit */ |
| 1323 | 50 | T | F | // Bisect to find the root |
| 1324 | 50 | T | F | for (unsigned short int i = 0; i < 150; i++) { |
| 1325 | 50 | T | F | double mid = low + (high - low) / 2.0; |
| 1326 | 0 | T | F | double p_mid = pf(mid, df1, df2); |
| 1329 | 50 | T | F | low = mid; |
| 1333 | 50 | T | F | if (high - low < 1e-12) break; |
| 50 | T | F | if (high - low < 1e-12) break; |
| 50 | T | F | if (high - low < 1e-12) break; |
| 1341 | 100 | T | F | CODE: |
| 100 | T | F | CODE: |
| 50 | T | F | CODE: |
| 1347 | 50 | T | F | |
| 1352 | 100 | T | F | } |
| 1354 | 50 | T | F | if (arg_idx < items) { |
| 50 | T | F | if (arg_idx < items) { |
| 50 | T | F | if (arg_idx < items) { |
| 1363 | 50 | T | F | |
| 100 | T | F | |
| 50 | T | F | |
| 1369 | 100 | T | F | else if (strEQ(key, "y")) y_sv = val; |
| 1371 | 50 | T | F | if (!SvOK(val)) exact = -1; |
| 50 | T | F | if (!SvOK(val)) exact = -1; |
| 50 | T | F | if (!SvOK(val)) exact = -1; |
| 1376 | 50 | T | F | } |
| 50 | T | F | } |
| 1385 | 100 | T | F | |
| 1386 | 100 | T | F | if (!is_two_sided && !is_greater && !is_less) { |
| 1391 | 50 | T | F | size_t nx = av_len(x_av) + 1; |
| 1392 | 50 | T | F | if (nx == 0) croak("Not enough 'x' observations"); |
| 1398 | 100 | T | F | SV**restrict el = av_fetch(x_av, i, 0); |
| 1399 | 100 | T | F | if (el && SvOK(*el) && looks_like_number(*el)) { |
| 1403 | 100 | T | F | |
| 1404 | 50 | T | F | double statistic = 0.0, p_value = 0.0; |
| 1407 | 50 | T | F | // --- TWO SAMPLE --- |
| 50 | T | F | // --- TWO SAMPLE --- |
| 1411 | 50 | T | F | |
| 1418 | 0 | T | F | } |
| 1427 | 50 | T | F | calc_2sample_stats(x_data, valid_nx, y_data, valid_ny, &d, &d_plus, &d_minus); |
| 50 | T | F | calc_2sample_stats(x_data, valid_nx, y_data, valid_ny, &d, &d_plus, &d_minus); |
| 1429 | 50 | T | F | // Map alternative to the correct statistic |
| 1432 | 100 | T | F | else statistic = d; |
| 1440 | 50 | T | F | // Check for ties in combined set |
| 1441 | 100 | T | F | size_t total_n = valid_nx + valid_ny; |
| 1442 | 100 | T | F | double *restrict comb = (double *)safemalloc(total_n * sizeof(double)); |
| 1443 | 50 | T | F | for(size_t i=0; i<valid_nx; i++) comb[i] = x_data[i]; |
| 1445 | 100 | T | F | qsort(comb, total_n, sizeof(double), compare_doubles); |
| 1446 | 50 | T | F | |
| 1448 | 50 | T | F | for(size_t i = 1; i < total_n; i++) { |
| 1449 | 50 | T | F | if(comb[i] == comb[i-1]) { has_ties = TRUE; break; } |
| 1451 | 50 | T | F | Safefree(comb); |
| 1452 | 50 | T | F | if (use_exact && has_ties) { |
| 1454 | 50 | T | F | use_exact = FALSE; |
| 1464 | 0 | T | F | p_value = K2l(z, 0, 1e-9); |
| 1476 | 50 | T | F | double cdf_obs_low = (double)i / valid_nx; |
| 1477 | 50 | T | F | double cdf_obs_high = (double)(i + 1) / valid_nx; |
| 1498 | 50 | T | F | p_value = 1.0 - K2x(valid_nx, statistic); |
| 100 | T | F | p_value = 1.0 - K2x(valid_nx, statistic); |
| 50 | T | F | p_value = 1.0 - K2x(valid_nx, statistic); |
| 1503 | 50 | T | F | } |
| 100 | T | F | } |
| 50 | T | F | } |
| 1508 | 50 | T | F | else p_value = exp(-2.0 * z * z); |
| 1512 | 100 | T | F | croak("ks_test: Unsupported 1-sample distribution '%s'. Use arrays for 2-sample.", dist); |
| 1515 | 100 | T | F | Safefree(x_data); |
| 1516 | 100 | T | F | croak("ks_test: Invalid arguments for 'y'."); |
| 1517 | 100 | T | F | } |
| 1518 | 50 | T | F | Safefree(x_data); |
| 1519 | 100 | T | F | if (p_value > 1.0) p_value = 1.0; |
| 1520 | 50 | T | F | if (p_value < 0.0) p_value = 0.0; |
| 1521 | 0 | T | F | HV *restrict res = newHV(); |
| 1524 | 50 | T | F | hv_stores(res, "method", newSVpv(method_desc, 0)); |
| 1528 | 100 | T | F | OUTPUT: |
| 50 | T | F | OUTPUT: |
| 50 | T | F | OUTPUT: |
| 1532 | 50 | T | F | CODE: |
| 1536 | 100 | T | F | double mu = 0.0; |
| 50 | T | F | double mu = 0.0; |
| 50 | T | F | double mu = 0.0; |
| 1544 | 100 | T | F | } |
| 100 | T | F | } |
| 1547 | 100 | T | F | y_sv = ST(arg_idx); |
| 1549 | 50 | T | F | } |
| 50 | T | F | } |
| 50 | T | F | } |
| 1555 | 100 | T | F | for (; arg_idx < items; arg_idx += 2) { |
| 1557 | 50 | T | F | SV *restrict val = ST(arg_idx + 1); |
| 50 | T | F | SV *restrict val = ST(arg_idx + 1); |
| 50 | T | F | SV *restrict val = ST(arg_idx + 1); |
| 1563 | 50 | T | F | else if (strEQ(key, "exact")) { |
| 1564 | 50 | T | F | if (!SvOK(val)) exact = -1; |
| 1569 | 100 | T | F | } |
| 100 | T | F | } |
| 1572 | 50 | T | F | croak("wilcox_test: 'x' is a required argument and must be an ARRAY reference"); |
| 1573 | 50 | T | F | AV *restrict x_av = (AV*)SvRV(x_sv); |
| 1574 | 50 | T | F | size_t nx = av_len(x_av) + 1; |
| 50 | T | F | size_t nx = av_len(x_av) + 1; |
| 100 | T | F | size_t nx = av_len(x_av) + 1; |
| 1576 | 100 | T | F | |
| 50 | T | F | |
| 1580 | 100 | T | F | y_av = (AV*)SvRV(y_sv); |
| 1585 | 100 | T | F | bool use_exact = FALSE; |
| 1586 | 50 | T | F | // --- TWO SAMPLE (Mann-Whitney) --- |
| 1588 | 0 | T | F | RankInfo *restrict ri = (RankInfo *)safemalloc((nx + ny) * sizeof(RankInfo)); |
| 1592 | 50 | T | F | if (el && SvOK(*el) && looks_like_number(*el)) { |
| 1598 | 50 | T | F | for (size_t i = 0; i < ny; i++) { |
| 1599 | 50 | T | F | SV**restrict el = av_fetch(y_av, i, 0); |
| 100 | T | F | SV**restrict el = av_fetch(y_av, i, 0); |
| 1600 | 0 | T | F | if (el && SvOK(*el) && looks_like_number(*el)) { |
| 1601 | 0 | T | F | ri[valid_nx + valid_ny].val = SvNV(*el); |
| 1605 | 50 | T | F | } |
| 1606 | 50 | T | F | if (valid_nx == 0) { Safefree(ri); croak("not enough (non-missing) 'x' observations"); } |
| 1611 | 100 | T | F | double w_rank_sum = 0.0; |
| 50 | T | F | double w_rank_sum = 0.0; |
| 100 | T | F | double w_rank_sum = 0.0; |
| 1615 | 100 | T | F | if (exact == 1) use_exact = TRUE; |
| 1617 | 50 | T | F | else use_exact = (valid_nx < 50 && valid_ny < 50 && !has_ties); |
| 50 | T | F | else use_exact = (valid_nx < 50 && valid_ny < 50 && !has_ties); |
| 50 | T | F | else use_exact = (valid_nx < 50 && valid_ny < 50 && !has_ties); |
| 1620 | 100 | T | F | warn("cannot compute exact p-value with ties; falling back to approximation"); |
| 1622 | 50 | T | F | } |
| 50 | T | F | } |
| 50 | T | F | } |
| 1625 | 50 | T | F | double p_less = exact_pwilcox(statistic, valid_nx, valid_ny); |
| 1629 | 50 | T | F | else if (strcmp(alternative, "greater") == 0) p_value = p_greater; |
| 1633 | 50 | T | F | } |
| 1638 | 100 | T | F | double z = statistic - exp; |
| 1645 | 100 | T | F | } |
| 1646 | 100 | T | F | z = (z - CORRECTION) / sqrt(var); |
| 1648 | 50 | T | F | if (strcmp(alternative, "less") == 0) p_value = approx_pnorm(z); |
| 1649 | 50 | T | F | else if (strcmp(alternative, "greater") == 0) p_value = 1.0 - approx_pnorm(z); |
| 1650 | 50 | T | F | else p_value = 2.0 * approx_pnorm(-fabs(z)); |
| 50 | T | F | else p_value = 2.0 * approx_pnorm(-fabs(z)); |
| 1651 | 50 | T | F | } |
| 50 | T | F | } |
| 1655 | 50 | T | F | double *restrict diffs = (double *)safemalloc(nx * sizeof(double)); |
| 50 | T | F | double *restrict diffs = (double *)safemalloc(nx * sizeof(double)); |
| 1659 | 50 | T | F | SV**restrict x_el = av_fetch(x_av, i, 0); |
| 1664 | 50 | T | F | SV**restrict y_el = av_fetch(y_av, i, 0); |
| 1665 | 50 | T | F | if (!y_el || !SvOK(*y_el) || !looks_like_number(*y_el)) continue; |
| 1667 | 50 | T | F | double d = dx - dy - mu; |
| 1671 | 0 | T | F | double d = dx - mu; |
| 1676 | 0 | T | F | if (n_nz == 0) { |
| 1677 | 0 | T | F | Safefree(diffs); |
| 0 | T | F | Safefree(diffs); |
| 1678 | 0 | T | F | croak("not enough (non-missing) observations"); |
| 1679 | 0 | T | F | } |
| 1683 | 0 | T | F | ri[i].idx = (diffs[i] > 0); |
| 1684 | 0 | T | F | } |
| 1689 | 50 | T | F | if (ri[i].idx) statistic += ri[i].rank; |
| 1705 | 50 | T | F | double p_greater = 1.0 - exact_psignrank(statistic - 1.0, n_nz); |
| 1708 | 50 | T | F | else if (strcmp(alternative, "greater") == 0) p_value = p_greater; |
| 100 | T | F | else if (strcmp(alternative, "greater") == 0) p_value = p_greater; |
| 50 | T | F | else if (strcmp(alternative, "greater") == 0) p_value = p_greater; |
| 1711 | 50 | T | F | p_value = 2.0 * p; |
| 1719 | 100 | T | F | if (correct) { |
| 50 | T | F | if (correct) { |
| 100 | T | F | if (correct) { |
| 1722 | 100 | T | F | else if (strcmp(alternative, "less") == 0) CORRECTION = -0.5; |
| 1725 | 100 | T | F | |
| 1726 | 100 | T | F | if (strcmp(alternative, "less") == 0) p_value = approx_pnorm(z); |
| 1727 | 100 | T | F | else if (strcmp(alternative, "greater") == 0) p_value = 1.0 - approx_pnorm(z); |
| 1730 | 100 | T | F | Safefree(ri); Safefree(diffs); |
| 1738 | 100 | T | F | RETVAL = newRV_noinc((SV*)res); |
| 1742 | 100 | T | F | |
| 1747 | 100 | T | F | AV*restrict obs_av = (AV*)SvRV(data_ref); |
| 1750 | 50 | T | F | SV**restrict first_elem = av_fetch(obs_av, 0, 0); |
| 1762 | 100 | T | F | int yates = (is_2d && r == 2 && c == 2) ? 1 : 0; |
| 1767 | 100 | T | F | double *restrict col_sum = (double*)safemalloc(c * sizeof(double)); |
| 1781 | 100 | T | F | for (unsigned int i = 0; i < r; i++) { |
| 1782 | 100 | T | F | AV*restrict exp_row = newAV(); |
| 1805 | 50 | T | F | for (unsigned int j = 0; j < c; j++) { |
| 50 | T | F | for (unsigned int j = 0; j < c; j++) { |
| 1807 | 100 | T | F | grand_total += SvNV(*val_sv); |
| 50 | T | F | grand_total += SvNV(*val_sv); |
| 1812 | 50 | T | F | double O = SvNV(*val_sv); |
| 1822 | 100 | T | F | hv_store(results, "p_value", 7, newSVnv(p_val), 0); |
| 1823 | 50 | T | F | hv_store(results, "expected", 8, newRV_noinc((SV*)expected_av), 0); |
| 1826 | 50 | T | F | hv_store(results, "method", 6, newSVpv("Pearson's Chi-squared test with Yates' continuity correction", 0), 0); |
| 1827 | 100 | T | F | } else { |
| 1828 | 50 | T | F | hv_store(results, "method", 6, newSVpv("Pearson's Chi-squared test", 0), 0); |
| 1829 | 100 | T | F | } |
| 1830 | 50 | T | F | } else { |
| 1834 | 50 | T | F | } |
| 50 | T | F | } |
| 1838 | 100 | T | F | PROTOTYPES: ENABLE |
| 50 | T | F | PROTOTYPES: ENABLE |
| 1842 | 50 | T | F | { |
| 50 | T | F | { |
| 1845 | 100 | T | F | unsigned int arg_idx = 0; |
| 50 | T | F | unsigned int arg_idx = 0; |
| 1846 | 100 | T | F | |
| 50 | T | F | |
| 1855 | 100 | T | F | if (arg_idx < items) { |
| 1857 | 50 | T | F | arg_idx++; |
| 1861 | 50 | T | F | const char *restrict undef_val = "NA"; |
| 50 | T | F | const char *restrict undef_val = "NA"; |
| 1865 | 100 | T | F | // Read the remaining Hash-style arguments |
| 50 | T | F | // Read the remaining Hash-style arguments |
| 1871 | 100 | T | F | else if (strEQ(key, "col.names")) col_names_sv = val; |
| 1873 | 50 | T | F | else if (strEQ(key, "row.names")) row_names_sv = val; |
| 50 | T | F | else if (strEQ(key, "row.names")) row_names_sv = val; |
| 50 | T | F | else if (strEQ(key, "row.names")) row_names_sv = val; |
| 1874 | 0 | T | F | else if (strEQ(key, "sep")) sep = SvPV_nolen(val); |
| 1877 | 100 | T | F | } |
| 1880 | 100 | T | F | croak("write_table: 'data' must be a HASH or ARRAY reference\n"); |
| 1886 | 50 | T | F | |
| 1888 | 50 | T | F | const char *restrict file = SvPV_nolen(file_sv); |
| 50 | T | F | const char *restrict file = SvPV_nolen(file_sv); |
| 50 | T | F | const char *restrict file = SvPV_nolen(file_sv); |
| 50 | T | F | const char *restrict file = SvPV_nolen(file_sv); |
| 1892 | 100 | T | F | croak("write_table: 'col.names' must be an ARRAY reference\n"); |
| 1894 | 50 | T | F | } |
| 50 | T | F | } |
| 50 | T | F | } |
| 50 | T | F | } |
| 1902 | 50 | T | F | if (hv_iterinit(hv) == 0) XSRETURN_EMPTY; |
| 1905 | 50 | T | F | SV *restrict first_val = hv_iterval(hv, entry); |
| 100 | T | F | SV *restrict first_val = hv_iterval(hv, entry); |
| 1909 | 100 | T | F | int first_type = SvTYPE(SvRV(first_val)); |
| 1910 | 100 | T | F | if (first_type != SVt_PVHV && first_type != SVt_PVAV) { |
| 50 | T | F | if (first_type != SVt_PVHV && first_type != SVt_PVAV) { |
| 1912 | 100 | T | F | } |
| 1914 | 50 | T | F | is_hoa = (first_type == SVt_PVAV); |
| 50 | T | F | is_hoa = (first_type == SVt_PVAV); |
| 1920 | 100 | T | F | } |
| 1924 | 100 | T | F | hv_iterinit(hv); |
| 1930 | 100 | T | F | AV *restrict av = (AV*)data_ref; |
| 1935 | 100 | T | F | } |
| 1943 | 50 | T | F | is_aoh = 1; |
| 1944 | 100 | T | F | } |
| 1946 | 50 | T | F | PerlIO *restrict fh = PerlIO_open(file, "w"); |
| 50 | T | F | PerlIO *restrict fh = PerlIO_open(file, "w"); |
| 1953 | 100 | T | F | // ----- Hash of Hashes ----- |
| 1961 | 100 | T | F | } else { |
| 1963 | 50 | T | F | hv_iterinit((HV*)data_ref); |
| 1966 | 50 | T | F | HV *restrict inner = (HV*)SvRV(hv_iterval((HV*)data_ref, entry)); |
| 1968 | 100 | T | F | HE *restrict inner_entry; |
| 1970 | 50 | T | F | hv_store_ent(col_map, hv_iterkeysv(inner_entry), newSViv(1), 0); |
| 50 | T | F | hv_store_ent(col_map, hv_iterkeysv(inner_entry), newSViv(1), 0); |
| 1971 | 50 | T | F | } |
| 1972 | 100 | T | F | } |
| 100 | T | F | } |
| 1973 | 100 | T | F | unsigned num_cols = hv_iterinit(col_map); |
| 1977 | 50 | T | F | col_array[i] = SvPV_nolen(hv_iterkeysv(ce)); |
| 1978 | 50 | T | F | } |
| 1990 | 100 | T | F | SV**restrict h_ptr = av_fetch(headers_av, i, 0); |
| 1995 | 100 | T | F | |
| 1998 | 100 | T | F | for(size_t i=0; i<num_rows; i++) { |
| 2001 | 100 | T | F | qsort(row_array, num_rows, sizeof(char*), cmp_string_wt); |
| 50 | T | F | qsort(row_array, num_rows, sizeof(char*), cmp_string_wt); |
| 2003 | 100 | T | F | HV *restrict data_hv = (HV*)data_ref; |
| 2005 | 50 | T | F | |
| 50 | T | F | |
| 2010 | 100 | T | F | SV **restrict inner_hv_ptr = hv_fetch(data_hv, row_array[i], strlen(row_array[i]), 0); |
| 2015 | 100 | T | F | const char *restrict col_name = (h_ptr && SvOK(*h_ptr)) ? SvPV_nolen(*h_ptr) : ""; |
| 2018 | 50 | T | F | if (SvROK(*cell_ptr)) { |
| 2019 | 100 | T | F | PerlIO_close(fh); |
| 50 | T | F | PerlIO_close(fh); |
| 2023 | 0 | T | F | if (rows_av) SvREFCNT_dec(rows_av); |
| 2025 | 0 | T | F | } |
| 0 | T | F | } |
| 2027 | 0 | T | F | } else { |
| 2037 | 100 | T | F | size_t max_rows = 0; |
| 2038 | 100 | T | F | hv_iterinit(data_hv); |
| 2040 | 50 | T | F | while((entry = hv_iternext(data_hv))) { |
| 50 | T | F | while((entry = hv_iternext(data_hv))) { |
| 2045 | 100 | T | F | |
| 2047 | 100 | T | F | AV *restrict c_av = (AV*)SvRV(col_names_sv); |
| 2048 | 50 | T | F | for(size_t i=0; i<=av_len(c_av); i++) { |
| 2050 | 0 | T | F | if(c && SvOK(*c)) av_push(headers_av, newSVsv(*c)); |
| 0 | T | F | if(c && SvOK(*c)) av_push(headers_av, newSVsv(*c)); |
| 2053 | 0 | T | F | unsigned int num_cols = hv_iterinit(data_hv); |
| 0 | T | F | unsigned int num_cols = hv_iterinit(data_hv); |
| 2054 | 0 | T | F | const char **restrict col_array = safemalloc(num_cols * sizeof(char*)); |
| 2057 | 0 | T | F | col_array[i] = SvPV_nolen(hv_iterkeysv(ce)); |
| 2073 | 100 | T | F | av_push(filtered_headers, newSVsv(h_sv)); |
| 2075 | 50 | T | F | } |
| 50 | T | F | } |
| 2077 | 50 | T | F | headers_av = filtered_headers; |
| 50 | T | F | headers_av = filtered_headers; |
| 2080 | 100 | T | F | const char **restrict header_row = safemalloc((num_headers + 1) * sizeof(char*)); |
| 100 | T | F | const char **restrict header_row = safemalloc((num_headers + 1) * sizeof(char*)); |
| 2081 | 50 | T | F | size_t h_idx = 0; |
| 2084 | 0 | T | F | SV**restrict h_ptr = av_fetch(headers_av, i, 0); |
| 2096 | 100 | T | F | AV *restrict rn_arr = (AV*)SvRV(*rn_arr_ptr); |
| 50 | T | F | AV *restrict rn_arr = (AV*)SvRV(*rn_arr_ptr); |
| 2099 | 50 | T | F | if (SvROK(*rn_val_ptr)) { |
| 2102 | 100 | T | F | if (headers_av) SvREFCNT_dec(headers_av); |
| 50 | T | F | if (headers_av) SvREFCNT_dec(headers_av); |
| 2104 | 100 | T | F | } |
| 2106 | 50 | T | F | } else { |
| 50 | T | F | } else { |
| 2110 | 100 | T | F | row_data[d_idx++] = undef_val; |
| 2112 | 50 | T | F | } else { |
| 50 | T | F | } else { |
| 2116 | 100 | T | F | } |
| 2123 | 100 | T | F | AV *restrict arr = (AV*)SvRV(*arr_ptr); |
| 2128 | 100 | T | F | safefree(row_data); |
| 2132 | 100 | T | F | row_data[d_idx++] = SvPV_nolen(*cell_ptr); |
| 50 | T | F | row_data[d_idx++] = SvPV_nolen(*cell_ptr); |
| 2135 | 0 | T | F | } |
| 2137 | 0 | T | F | row_data[d_idx++] = undef_val; |
| 0 | T | F | row_data[d_idx++] = undef_val; |
| 2139 | 0 | T | F | } |
| 2149 | 100 | T | F | for(size_t i=0; i<=av_len(c_av); i++) { |
| 2150 | 100 | T | F | SV **restrict c = av_fetch(c_av, i, 0); |
| 2152 | 50 | T | F | } |
| 50 | T | F | } |
| 2157 | 100 | T | F | if (row_ptr && SvROK(*row_ptr)) { |
| 2160 | 50 | T | F | HE *restrict entry; |
| 50 | T | F | HE *restrict entry; |
| 2161 | 100 | T | F | while((entry = hv_iternext(row_hv))) { |
| 2162 | 50 | T | F | hv_store_ent(col_map, hv_iterkeysv(entry), newSViv(1), 0); |
| 2163 | 0 | T | F | } |
| 2164 | 0 | T | F | } |
| 0 | T | F | } |
| 2165 | 0 | T | F | } |
| 2168 | 0 | T | F | for(unsigned int i=0; i<num_cols; i++) { |
| 2182 | 100 | T | F | if (!h_ptr || !*h_ptr) continue; |
| 2184 | 50 | T | F | if (strcmp(SvPV_nolen(h_sv), rownames_col) != 0) { |
| 50 | T | F | if (strcmp(SvPV_nolen(h_sv), rownames_col) != 0) { |
| 2185 | 50 | T | F | av_push(filtered_headers, newSVsv(h_sv)); |
| 2186 | 100 | T | F | } |
| 50 | T | F | } |
| 2187 | 50 | T | F | } |
| 2190 | 0 | T | F | } |
| 2199 | 100 | T | F | print_string_row(fh, header_row, h_idx, sep); |
| 50 | T | F | print_string_row(fh, header_row, h_idx, sep); |
| 2203 | 50 | T | F | size_t d_idx = 0; |
| 2204 | 100 | T | F | SV **restrict row_ptr = av_fetch(data_av, i, 0); |
| 2221 | 50 | T | F | char buf[32]; |
| 50 | T | F | char buf[32]; |
| 50 | T | F | char buf[32]; |
| 2226 | 50 | T | F | |
| 2227 | 50 | T | F | for(size_t j=0; j<num_headers; j++) { |
| 2230 | 50 | T | F | SV **restrict cell_ptr = row_hv ? hv_fetch(row_hv, col_name, strlen(col_name), 0) : NULL; |
| 2235 | 100 | T | F | if (headers_av) SvREFCNT_dec(headers_av); |
| 2239 | 50 | T | F | } else { |
| 100 | T | F | } else { |
| 2241 | 50 | T | F | } |
| 100 | T | F | } |
| 2245 | 100 | T | F | } |
| 2248 | 50 | T | F | if (headers_av) SvREFCNT_dec(headers_av); |
| 2249 | 50 | T | F | if (rows_av) SvREFCNT_dec(rows_av); |
| 100 | T | F | if (rows_av) SvREFCNT_dec(rows_av); |
| 2251 | 50 | T | F | XSRETURN_EMPTY; |
| 2254 | 50 | T | F | SV* |
| 50 | T | F | SV* |
| 50 | T | F | SV* |
| 2259 | 100 | T | F | AV *restrict current_row = newAV(); |
| 2261 | 100 | T | F | bool in_quotes = 0, post_quote = 0; |
| 2262 | 100 | T | F | size_t sep_len, comment_len; |
| 2263 | 100 | T | F | SV *restrict line_sv; |
| 100 | T | F | SV *restrict line_sv; |
| 100 | T | F | SV *restrict line_sv; |
| 2266 | 100 | T | F | if (SvOK(callback) && SvROK(callback) && SvTYPE(SvRV(callback)) == SVt_PVCV) { |
| 2269 | 100 | T | F | data = newAV(); |
| 2272 | 100 | T | F | comment_len = comment_str ? strlen(comment_str) : 0; |
| 50 | T | F | comment_len = comment_str ? strlen(comment_str) : 0; |
| 50 | T | F | comment_len = comment_str ? strlen(comment_str) : 0; |
| 100 | T | F | comment_len = comment_str ? strlen(comment_str) : 0; |
| 2281 | 100 | T | F | char *restrict line = SvPV_nolen(line_sv); |
| 2290 | 50 | T | F | if (!in_quotes) { |
| 2294 | 50 | T | F | if (line[i] != ' ' && line[i] != '\t') { is_empty = 0; break; } |
| 2295 | 50 | T | F | } |
| 2298 | 50 | T | F | // Skip comments |
| 2310 | 100 | T | F | i++; // Skip the escaped second quote |
| 2312 | 50 | T | F | in_quotes = 0; // Close quotes |
| 2316 | 50 | T | F | } |
| 2317 | 50 | T | F | } else if (!in_quotes && sep_len > 0 && (len - i) >= sep_len && strncmp(line + i, sep_str, sep_len) == 0) { |
| 2320 | 50 | T | F | i += sep_len - 1; // Advance past multi-char separators |
| 2330 | 50 | T | F | post_quote = 0; // Reset post-quote state at row boundary |
| 2342 | 50 | T | F | call_sv(callback, G_DISCARD); |
| 50 | T | F | call_sv(callback, G_DISCARD); |
| 2345 | 50 | T | F | SvREFCNT_dec(current_row); // Frees the row from C memory if Perl didn't keep it |
| 50 | T | F | SvREFCNT_dec(current_row); // Frees the row from C memory if Perl didn't keep it |
| 2350 | 100 | T | F | } |
| 2351 | 100 | T | F | } |
| 2352 | 50 | T | F | PerlIO_close(fp); |
| 2361 | 50 | T | F | PUSHMARK(SP); |
| 2372 | 100 | T | F | } |
| 2377 | 50 | T | F | } else { |
| 50 | T | F | } else { |
| 50 | T | F | } else { |
| 2378 | 50 | T | F | RETVAL = newRV_noinc((SV*)data); |
| 50 | T | F | RETVAL = newRV_noinc((SV*)data); |
| 50 | T | F | RETVAL = newRV_noinc((SV*)data); |
| 2381 | 50 | T | F | RETVAL |
| 50 | T | F | RETVAL |
| 2389 | 50 | T | F | } |
| 2395 | 100 | T | F | if (strcmp(method, "pearson") != 0 && |
| 2397 | 100 | T | F | strcmp(method, "kendall") != 0) { |
| 2398 | 100 | T | F | croak("cov: unknown method '%s' (use 'pearson', 'spearman', or 'kendall')", method); |
| 2406 | 100 | T | F | if (nx != ny) { |
| 2415 | 100 | T | F | size_t n = 0; |
| 2427 | 100 | T | F | x_val[n] = xv; |
| 2483 | 50 | T | F | } |
| 2485 | 100 | T | F | RETVAL |
| 2488 | 100 | T | F | CODE: |
| 2489 | 100 | T | F | { |
| 2490 | 50 | T | F | const char *restrict formula = NULL; |
| 2493 | 50 | T | F | char f_cpy[512]; |
| 2494 | 50 | T | F | char *restrict src, *restrict dst, *restrict tilde, *restrict lhs, *restrict rhs, *restrict chunk; |
| 50 | T | F | char *restrict src, *restrict dst, *restrict tilde, *restrict lhs, *restrict rhs, *restrict chunk; |
| 2498 | 100 | T | F | bool *restrict is_dummy = NULL; |
| 50 | T | F | bool *restrict is_dummy = NULL; |
| 2506 | 100 | T | F | |
| 100 | T | F | |
| 50 | T | F | |
| 2510 | 50 | T | F | HV *restrict data_hoa = NULL; |
| 2514 | 50 | T | F | double *restrict W = NULL, *restrict Z = NULL, *restrict beta = NULL, *restrict beta_old = NULL; |
| 2515 | 50 | T | F | bool *restrict aliased = NULL; |
| 2518 | 100 | T | F | HV *restrict res_hv, *restrict coef_hv, *restrict fitted_hv, *restrict resid_hv, *restrict summary_hv; |
| 2519 | 50 | T | F | AV *restrict terms_av; |
| 2523 | 50 | T | F | |
| 50 | T | F | |
| 2528 | 50 | T | F | else if (strEQ(key, "data")) data_sv = val; |
| 2531 | 0 | T | F | } |
| 0 | T | F | } |
| 2532 | 0 | T | F | if (!formula) croak("glm: formula is required"); |
| 0 | T | F | if (!formula) croak("glm: formula is required"); |
| 2541 | 50 | T | F | Newx(exp_terms, exp_cap, char*); Newx(is_dummy, exp_cap, bool); |
| 0 | T | F | Newx(exp_terms, exp_cap, char*); Newx(is_dummy, exp_cap, bool); |
| 2547 | 100 | T | F | |
| 2549 | 100 | T | F | if (!tilde) croak("glm: invalid formula, missing '~'"); |
| 2550 | 50 | T | F | *tilde = '\0'; |
| 2552 | 50 | T | F | |
| 2558 | 50 | T | F | if (num_terms >= term_cap - 3) { |
| 2560 | 50 | T | F | Renew(terms, term_cap, char*); Renew(uniq_terms, term_cap, char*); |
| 2562 | 50 | T | F | if (strcmp(chunk, "1") == 0 || strcmp(chunk, "-1") == 0) { |
| 2564 | 50 | T | F | continue; |
| 100 | T | F | continue; |
| 2567 | 50 | T | F | if (star) { |
| 2568 | 100 | T | F | *star = '\0'; |
| 2572 | 50 | T | F | |
| 50 | T | F | |
| 2574 | 50 | T | F | terms[num_terms++] = savepv(right); |
| 50 | T | F | terms[num_terms++] = savepv(right); |
| 2576 | 100 | T | F | terms[num_terms] = (char*)safemalloc(inter_len); |
| 2584 | 0 | T | F | } |
| 2587 | 0 | T | F | bool found = FALSE; |
| 0 | T | F | bool found = FALSE; |
| 2588 | 0 | T | F | for (size_t j = 0; j < num_uniq; j++) { |
| 2590 | 0 | T | F | } |
| 0 | T | F | } |
| 0 | T | F | } |
| 2595 | 0 | T | F | // --- Data Extraction --- |
| 2603 | 100 | T | F | if (SvROK(val) && SvTYPE(SvRV(val)) == SVt_PVAV) { |
| 2604 | 50 | T | F | data_hoa = hv; |
| 2609 | 100 | T | F | row_names[i] = savepv(buf); |
| 2612 | 100 | T | F | n = hv_iterinit(hv); |
| 2614 | 50 | T | F | i = 0; |
| 2615 | 100 | T | F | while ((entry = hv_iternext(hv))) { |
| 2617 | 50 | T | F | row_names[i] = savepv(hv_iterkey(entry, &len)); |
| 2619 | 100 | T | F | i++; |
| 2620 | 100 | T | F | } |
| 2622 | 100 | T | F | } |
| 2623 | 50 | T | F | } else if (SvTYPE(ref) == SVt_PVAV) { |
| 0 | T | F | } else if (SvTYPE(ref) == SVt_PVAV) { |
| 2629 | 50 | T | F | if (val && SvROK(*val) && SvTYPE(SvRV(*val)) == SVt_PVHV) { |
| 2630 | 100 | T | F | row_hashes[i] = (HV*)SvRV(*val); |
| 2631 | 100 | T | F | char buf[32]; snprintf(buf, sizeof(buf), "%lu", i + 1); |
| 2632 | 50 | T | F | row_names[i] = savepv(buf); |
| 2637 | 100 | T | F | } |
| 2638 | 50 | T | F | } |
| 2649 | 100 | T | F | exp_terms[p_exp] = savepv("Intercept"); is_dummy[p_exp] = FALSE; p_exp++; continue; |
| 2660 | 50 | T | F | } |
| 50 | T | F | } |
| 2661 | 50 | T | F | if (!found) { |
| 2664 | 100 | T | F | } |
| 2666 | 50 | T | F | } |
| 2670 | 100 | T | F | for (size_t l2 = l1 + 1; l2 < num_levels; l2++) { |
| 2671 | 100 | T | F | if (strcmp(levels[l1], levels[l2]) > 0) { |
| 2673 | 100 | T | F | } |
| 2675 | 50 | T | F | } |
| 2676 | 100 | T | F | for (size_t l = 1; l < num_levels; l++) { |
| 2681 | 50 | T | F | } |
| 2684 | 50 | T | F | snprintf(exp_terms[p_exp], t_len, "%s%s", uniq_terms[j], levels[l]); |
| 2686 | 100 | T | F | p_exp++; |
| 2692 | 50 | T | F | } |
| 2693 | 0 | T | F | } else { |
| 2702 | 100 | T | F | // --- Listwise Deletion --- |
| 2705 | 100 | T | F | if (isnan(y_val)) { Safefree(row_names[i]); continue; } |
| 2707 | 100 | T | F | bool row_ok = TRUE; |
| 2708 | 100 | T | F | double *restrict row_x = (double*)safemalloc(p * sizeof(double)); |
| 2709 | 50 | T | F | for (size_t j = 0; j < p; j++) { |
| 50 | T | F | for (size_t j = 0; j < p; j++) { |
| 2713 | 100 | T | F | char* str_val = get_data_string_alloc(data_hoa, row_hashes, i, dummy_base[j]); |
| 2714 | 50 | T | F | if (str_val) { |
| 2723 | 50 | T | F | if (!row_ok) { Safefree(row_names[i]); Safefree(row_x); continue; } |
| 2724 | 100 | T | F | Y[valid_n] = y_val; |
| 2725 | 100 | T | F | for (size_t j = 0; j < p; j++) X[valid_n * p + j] = row_x[j]; |
| 2728 | 50 | T | F | Safefree(row_x); |
| 2737 | 100 | T | F | W = (double*)safemalloc(valid_n * sizeof(double)); Z = (double*)safemalloc(valid_n * sizeof(double)); |
| 100 | T | F | W = (double*)safemalloc(valid_n * sizeof(double)); Z = (double*)safemalloc(valid_n * sizeof(double)); |
| 2738 | 100 | T | F | beta = (double*)safemalloc(p * sizeof(double)); beta_old = (double*)safemalloc(p * sizeof(double)); |
| 2740 | 100 | T | F | XtWX = (double*)safemalloc(p * p * sizeof(double)); XtWZ = (double*)safemalloc(p * sizeof(double)); |
| 2743 | 100 | T | F | double sum_y = 0.0; |
| 2747 | 100 | T | F | if (is_binomial) { |
| 2748 | 50 | T | F | if (Y[i] < 0.0 || Y[i] > 1.0) croak("glm: binomial family requires response between 0 and 1"); |
| 2750 | 50 | T | F | eta[i] = log(mu[i] / (1.0 - mu[i])); |
| 100 | T | F | eta[i] = log(mu[i] / (1.0 - mu[i])); |
| 2756 | 100 | T | F | } else { |
| 2758 | 100 | T | F | eta[i] = mu[i]; |
| 2760 | 50 | T | F | } |
| 100 | T | F | } |
| 2762 | 100 | T | F | for (iter = 1; iter <= max_iter; iter++) { |
| 2765 | 50 | T | F | double varmu = mu[i] * (1.0 - mu[i]); |
| 2766 | 50 | T | F | double mu_eta = varmu; // Link derivative for logit |
| 2769 | 100 | T | F | W[i] = (mu_eta * mu_eta) / varmu; |
| 2770 | 50 | T | F | } else { |
| 2780 | 100 | T | F | XtWZ[i] += X[k * p + i] * w * z; |
| 100 | T | F | XtWZ[i] += X[k * p + i] * w * z; |
| 50 | T | F | XtWZ[i] += X[k * p + i] * w * z; |
| 2785 | 100 | T | F | final_rank = sweep_matrix_ols(XtWX, p, aliased); |
| 2788 | 100 | T | F | double sum = 0.0; |
| 2792 | 100 | T | F | } |
| 2795 | 100 | T | F | for (unsigned short int half = 0; half < 10; half++) { |
| 100 | T | F | for (unsigned short int half = 0; half < 10; half++) { |
| 2796 | 100 | T | F | deviance_new = 0.0; |
| 2797 | 100 | T | F | for (i = 0; i < valid_n; i++) { |
| 2798 | 50 | T | F | double linear_pred = 0.0; |
| 2799 | 100 | T | F | for (size_t j = 0; j < p; j++) if (!aliased[j]) linear_pred += X[i * p + j] * beta[j]; |
| 2801 | 100 | T | F | if (is_binomial) { |
| 2807 | 100 | T | F | double dev = 0.0; |
| 2808 | 100 | T | F | if (Y[i] == 0.0) dev = -2.0 * log(1.0 - mu[i]); |
| 2809 | 100 | T | F | else if (Y[i] == 1.0) dev = -2.0 * log(mu[i]); |
| 2810 | 50 | T | F | else dev = 2.0 * (Y[i] * log(Y[i] / mu[i]) + (1.0 - Y[i]) * log((1.0 - Y[i]) / (1.0 - mu[i]))); |
| 2818 | 100 | T | F | // Step halving divergence check |
| 2821 | 50 | T | F | } |
| 2827 | 100 | T | F | if (fabs(deviance_new - deviance_old) / (0.1 + fabs(deviance_new)) < epsilon) { |
| 50 | T | F | if (fabs(deviance_new - deviance_old) / (0.1 + fabs(deviance_new)) < epsilon) { |
| 2828 | 100 | T | F | converged = TRUE; break; |
| 2830 | 100 | T | F | deviance_old = deviance_new; |
| 2833 | 100 | T | F | // Final accurate calculation of W for standard errors |
| 2834 | 50 | T | F | for (i = 0; i < p; i++) { for (size_t j = 0; j < p; j++) XtWX[i * p + j] = 0.0; } |
| 2836 | 100 | T | F | double w = is_binomial ? (mu[k] * (1.0 - mu[k])) : 1.0; |
| 2845 | 100 | T | F | double wtdmu = mean_y; // Since weights are 1.0 initially |
| 2850 | 50 | T | F | else null_dev += 2.0 * (Y[i] * log(Y[i] / wtdmu) + (1.0 - Y[i]) * log((1.0 - Y[i]) / (1.0 - wtdmu))); |
| 2853 | 0 | T | F | null_dev += diff * diff; |
| 2854 | 0 | T | F | } |
| 2858 | 100 | T | F | double n_f = (double)valid_n; |
| 2862 | 100 | T | F | } |
| 2863 | 100 | T | F | // --- Return Structures --- |
| 2885 | 100 | T | F | hv_store(coef_hv, exp_terms[j], strlen(exp_terms[j]), newSVnv(beta[j]), 0); |
| 2887 | 100 | T | F | |
| 2889 | 100 | T | F | if (aliased[j]) { |
| 2891 | 100 | T | F | hv_store(row_hv, "Std. Error", 10, newSVpv("NaN", 0), 0); |
| 2898 | 100 | T | F | |
| 2908 | 50 | T | F | hv_store(res_hv, "coefficients", 12, newRV_noinc((SV*)coef_hv), 0); |
| 50 | T | F | hv_store(res_hv, "coefficients", 12, newRV_noinc((SV*)coef_hv), 0); |
| 2920 | 100 | T | F | hv_store(res_hv, "summary", 7, newRV_noinc((SV*)summary_hv), 0); |
| 2924 | 100 | T | F | for (i = 0; i < num_terms; i++) Safefree(terms[i]); |
| 2925 | 100 | T | F | Safefree(terms); |
| 2926 | 50 | T | F | for (i = 0; i < num_uniq; i++) Safefree(uniq_terms[i]); |
| 2927 | 100 | T | F | Safefree(uniq_terms); |
| 50 | T | F | Safefree(uniq_terms); |
| 2928 | 50 | T | F | for (size_t j = 0; j < p_exp; j++) { |
| 2941 | 50 | T | F | OUTPUT: |
| 50 | T | F | OUTPUT: |
| 50 | T | F | OUTPUT: |
| 2942 | 50 | T | F | RETVAL |
| 50 | T | F | RETVAL |
| 50 | T | F | RETVAL |
| 2950 | 50 | T | F | SV *restrict x_ref = ST(0), *restrict y_ref = ST(1); |
| 2956 | 100 | T | F | bool continuity = 0; |
| 2960 | 50 | T | F | const char *restrict key = SvPV_nolen(ST(i)); |
| 100 | T | F | const char *restrict key = SvPV_nolen(ST(i)); |
| 50 | T | F | const char *restrict key = SvPV_nolen(ST(i)); |
| 2961 | 50 | T | F | SV *restrict val = ST(i + 1); |
| 100 | T | F | SV *restrict val = ST(i + 1); |
| 50 | T | F | SV *restrict val = ST(i + 1); |
| 2964 | 100 | T | F | else if (strEQ(key, "method")) method = SvPV_nolen(val); |
| 100 | T | F | else if (strEQ(key, "method")) method = SvPV_nolen(val); |
| 2971 | 50 | T | F | AV *restrict x_av, *restrict y_av; |
| 2977 | 100 | T | F | bool is_spearman = (strcmp(method, "spearman") == 0); |
| 2980 | 100 | T | F | if (!SvOK(x_ref) || !SvROK(x_ref) || SvTYPE(SvRV(x_ref)) != SVt_PVAV || |
| 2989 | 50 | T | F | if (n_raw != av_len(y_av) + 1) croak("incompatible dimensions"); |
| 50 | T | F | if (n_raw != av_len(y_av) + 1) croak("incompatible dimensions"); |
| 3003 | 100 | T | F | if (!isnan(xv) && !isnan(yv)) { |
| 3005 | 100 | T | F | y[n] = yv; |
| 3006 | 100 | T | F | n++; |
| 3010 | 50 | T | F | if (n < 3) { |
| 0 | T | F | if (n < 3) { |
| 3011 | 50 | T | F | Safefree(x); |
| 3012 | 50 | T | F | Safefree(y); |
| 3013 | 100 | T | F | croak("not enough finite observations"); |
| 3018 | 50 | T | F | double mean_x = 0.0, mean_y = 0.0, M2_x = 0.0, M2_y = 0.0, cov = 0.0; |
| 3020 | 50 | T | F | double dx = x[i] - mean_x; |
| 50 | T | F | double dx = x[i] - mean_x; |
| 3024 | 50 | T | F | M2_x += dx * (x[i] - mean_x); |
| 0 | T | F | M2_x += dx * (x[i] - mean_x); |
| 3025 | 50 | T | F | M2_y += dy * (y[i] - mean_y); |
| 50 | T | F | M2_y += dy * (y[i] - mean_y); |
| 3030 | 50 | T | F | statistic = estimate * sqrt(df / (1.0 - estimate * estimate)); |
| 50 | T | F | statistic = estimate * sqrt(df / (1.0 - estimate * estimate)); |
| 3032 | 50 | T | F | // Confidence interval using Fisher's Z transform |
| 3040 | 0 | T | F | // HIGH-PRECISION P-VALUE USING INCOMPLETE BETA |
| 0 | T | F | // HIGH-PRECISION P-VALUE USING INCOMPLETE BETA |
| 3043 | 0 | T | F | int c = 0, d = 0, tie_x = 0, tie_y = 0; |
| 3045 | 0 | T | F | for (size_t j = i + 1; j < n; j++) { |
| 3051 | 50 | T | F | else if (sign_y == 0) tie_y++; |
| 3059 | 100 | T | F | bool has_ties = (tie_x > 0 || tie_y > 0); |
| 3068 | 50 | T | F | // If forced exact but ties exist, R overrides and falls back to approximation anyway |
| 50 | T | F | // If forced exact but ties exist, R overrides and falls back to approximation anyway |
| 3072 | 100 | T | F | double S_stat = c - d; |
| 3079 | 100 | T | F | if (continuity) S -= (S > 0 ? 1 : -1); |
| 3080 | 50 | T | F | statistic = S / sqrt(var_S); |
| 50 | T | F | statistic = S / sqrt(var_S); |
| 3085 | 50 | T | F | p_value = approx_pnorm(statistic); |
| 0 | T | F | p_value = approx_pnorm(statistic); |
| 3086 | 50 | T | F | } else { |
| 50 | T | F | } else { |
| 3091 | 50 | T | F | double *restrict rank_x = safemalloc(n * sizeof(double)); |
| 3096 | 0 | T | F | // Spearman rho = Pearson r of the ranks (Welford's Method) |
| 3113 | 100 | T | F | S_stat += diff * diff; |
| 3134 | 50 | T | F | double r = estimate; |
| 50 | T | F | double r = estimate; |
| 3141 | 50 | T | F | } else { |
| 3144 | 100 | T | F | } |
| 3146 | 50 | T | F | rhv = newHV(); |
| 50 | T | F | rhv = newHV(); |
| 3148 | 50 | T | F | hv_stores(rhv, "p.value", newSVnv(p_value)); |
| 3156 | 50 | T | F | av_push(ci_av, newSVnv(ci_upper)); |
| 50 | T | F | av_push(ci_av, newSVnv(ci_upper)); |
| 3163 | 100 | T | F | RETVAL |
| 3166 | 50 | T | F | SV *data |
| 3173 | 50 | T | F | if (!SvROK(data) || SvTYPE(SvRV(data)) != SVt_PVAV) { |
| 3177 | 0 | T | F | av = (AV *)SvRV(data); |
| 3183 | 50 | T | F | for (size_t i = 0; i < n_raw; i++) { |
| 3184 | 50 | T | F | SV **restrict elem = av_fetch(av, i, 0); |
| 3185 | 100 | T | F | if (elem && SvOK(*elem)) { |
| 3193 | 50 | T | F | } |
| 100 | T | F | } |
| 3195 | 100 | T | F | if (n < 3 || n > 5000) { |
| 3203 | 100 | T | F | ssq += (x[i] - mean) * (x[i] - mean); |
| 3207 | 100 | T | F | croak("Data is perfectly constant; cannot compute Shapiro-Wilk test"); |
| 3217 | 100 | T | F | // Exact P-value for n=3 |
| 3223 | 50 | T | F | Newx(a, n, double); |
| 3246 | 50 | T | F | for (size_t i = 0; i < n; i++) { |
| 3247 | 50 | T | F | b_val += a[i] * x[i]; |
| 3257 | 50 | T | F | // Royston's branch for 4 <= n <= 11 (AS R94, small-sample path). |
| 3267 | 100 | T | F | double sig_val= 1.3822 + nn * (-0.77857 + nn * ( 0.062767 - nn * 0.0020322)); |
| 3269 | 100 | T | F | z = (-log(gamma - y) - mu) / sigma; |
| 50 | T | F | z = (-log(gamma - y) - mu) / sigma; |
| 3272 | 100 | T | F | p_val = 0.5 * erfc(z * M_SQRT1_2); |
| 3274 | 50 | T | F | } else { |
| 100 | T | F | } else { |
| 3276 | 100 | T | F | double ln_n = log((double)n); |
| 50 | T | F | double ln_n = log((double)n); |
| 3285 | 100 | T | F | if (p_val > 1.0) p_val = 1.0; |
| 3287 | 100 | T | F | |
| 100 | T | F | |
| 3296 | 100 | T | F | EXTEND(SP, 1); |
| 3297 | 50 | T | F | PUSHs(sv_2mortal(newRV_noinc((SV *)ret_hash))); |
| 3308 | 100 | T | F | if (SvROK(arg) && SvTYPE(SvRV(arg)) == SVt_PVAV) { |
| 3310 | 100 | T | F | size_t len = av_len(av) + 1; |
| 50 | T | F | size_t len = av_len(av) + 1; |
| 3313 | 100 | T | F | if (tv && SvOK(*tv)) { |
| 3315 | 50 | T | F | if (first || val < min_val) { |
| 100 | T | F | if (first || val < min_val) { |
| 3317 | 100 | T | F | first = FALSE; |
| 100 | T | F | first = FALSE; |
| 3326 | 100 | T | F | if (first || val < min_val) { |
| 3328 | 100 | T | F | first = FALSE; |
| 100 | T | F | first = FALSE; |
| 3337 | 100 | T | F | OUTPUT: |
| 3338 | 50 | T | F | RETVAL |
| 3353 | 50 | T | F | SV** restrict tv = av_fetch(av, j, 0); |
| 3357 | 100 | T | F | max_val = val; |
| 3359 | 100 | T | F | } |
| 100 | T | F | } |
| 3361 | 100 | T | F | } else { |
| 3366 | 100 | T | F | double val = SvNV(arg); |
| 3371 | 50 | T | F | count++; |
| 3380 | 100 | T | F | |
| 3383 | 100 | T | F | { |
| 3386 | 50 | T | F | |
| 3394 | 50 | T | F | } |
| 3398 | 50 | T | F | if (i + 1 < items && SvPOK(ST(i))) { |
| 3400 | 50 | T | F | if (strEQ(key, "n")) { |
| 3404 | 100 | T | F | continue; |
| 3406 | 50 | T | F | min = SvNV(ST(i+1)); |
| 3422 | 50 | T | F | } else if (!min_set) { |
| 3423 | 100 | T | F | min = SvNV(ST(i)); |
| 3431 | 100 | T | F | i++; |
| 3435 | 100 | T | F | } |
| 3436 | 100 | T | F | // Ensure PRNG is seeded |
| 3437 | 50 | T | F | AUTO_SEED_PRNG(); |
| 3442 | 100 | T | F | const double range = max - min; |
| 100 | T | F | const double range = max - min; |
| 3443 | 100 | T | F | for (size_t j = 0; j < n; j++) { |
| 100 | T | F | for (size_t j = 0; j < n; j++) { |
| 3446 | 50 | T | F | r = NAN; // R behavior for inverted ranges |
| 3448 | 100 | T | F | r = min + range * Drand01(); |
| 3463 | 100 | T | F | croak("Usage: rbinom(n => 10, size => 100, prob => 0.5)"); |
| 100 | T | F | croak("Usage: rbinom(n => 10, size => 100, prob => 0.5)"); |
| 3468 | 100 | T | F | bool size_set = FALSE, prob_set = FALSE; |
| 3472 | 50 | T | F | SV* restrict val = ST(i + 1); |
| 3476 | 100 | T | F | else if (strEQ(key, "prob")) { prob = SvNV(val); prob_set = TRUE; } |
| 3478 | 50 | T | F | } |
| 50 | T | F | } |
| 3481 | 100 | T | F | if (!size_set || !prob_set) croak("rbinom: 'size' and 'prob' are required arguments"); |
| 3482 | 100 | T | F | if (prob < 0.0 || prob > 1.0) croak("rbinom: prob must be between 0 and 1"); |
| 3485 | 50 | T | F | if (n > 0) { |
| 3492 | 50 | T | F | RETVAL = newRV_noinc((SV*)result_av); |
| 3495 | 50 | T | F | RETVAL |
| 3497 | 50 | T | F | SV* |
| 3499 | 50 | T | F | CODE: |
| 50 | T | F | CODE: |
| 3505 | 50 | T | F | AV*restrict x_av = (AV*)SvRV(x_sv); |
| 0 | T | F | AV*restrict x_av = (AV*)SvRV(x_sv); |
| 3509 | 50 | T | F | // 2. Extract Data & Find Range |
| 3513 | 50 | T | F | double min_val = DBL_MAX, max_val = -DBL_MAX; |
| 3514 | 50 | T | F | |
| 3515 | 50 | T | F | for (size_t i = 0; i < n_raw; i++) { |
| 3516 | 50 | T | F | SV**restrict tv = av_fetch(x_av, i, 0); |
| 3520 | 100 | T | F | if (val < min_val) min_val = val; |
| 3533 | 100 | T | F | n_bins = (size_t)SvIV(ST(1)); |
| 3535 | 100 | T | F | /* Support named parameters even if mixed with positional arguments */ |
| 3563 | 50 | T | F | // 5. Compute Statistics |
| 100 | T | F | // 5. Compute Statistics |
| 50 | T | F | // 5. Compute Statistics |
| 3569 | 50 | T | F | AV*restrict av_counts = newAV(); |
| 3572 | 100 | T | F | for (size_t i = 0; i <= n_bins; i++) { |
| 3576 | 100 | T | F | av_push(av_mids, newSVnv(mids[i])); |
| 3577 | 50 | T | F | av_push(av_density, newSVnv(density[i])); |
| 3580 | 50 | T | F | hv_stores(res_hv, "breaks", newRV_noinc((SV*)av_breaks)); |
| 50 | T | F | hv_stores(res_hv, "breaks", newRV_noinc((SV*)av_breaks)); |
| 50 | T | F | hv_stores(res_hv, "breaks", newRV_noinc((SV*)av_breaks)); |
| 3584 | 50 | T | F | |
| 3588 | 50 | T | F | |
| 3590 | 100 | T | F | } |
| 3592 | 50 | T | F | RETVAL |
| 50 | T | F | RETVAL |
| 3596 | 50 | T | F | { |
| 3607 | 50 | T | F | /* --- 2. Remaining args must be key-value pairs --- */ |
| 50 | T | F | /* --- 2. Remaining args must be key-value pairs --- */ |
| 50 | T | F | /* --- 2. Remaining args must be key-value pairs --- */ |
| 3611 | 100 | T | F | for (; arg_idx < items; arg_idx += 2) { |
| 3613 | 50 | T | F | SV *restrict val = ST(arg_idx + 1); |
| 50 | T | F | SV *restrict val = ST(arg_idx + 1); |
| 3614 | 50 | T | F | |
| 50 | T | F | |
| 3621 | 0 | T | F | AV *restrict x_av = (AV*)SvRV(x_sv); |
| 3627 | 100 | T | F | Newx(x, n_raw, double); |
| 3630 | 100 | T | F | SV **restrict tv = av_fetch(x_av, i, 0); |
| 3632 | 100 | T | F | x[n++] = SvNV(*tv); |
| 3634 | 100 | T | F | } |
| 3648 | 50 | T | F | n_probs = av_len(p_av) + 1; |
| 3672 | 100 | T | F | q = x[n - 1]; /* Prevent out-of-bounds mapping */ |
| 3674 | 100 | T | F | q = x[0]; |
| 50 | T | F | q = x[0]; |
| 3677 | 100 | T | F | double h = (n - 1) * p; |
| 3679 | 50 | T | F | double gamma = h - j; |
| 100 | T | F | double gamma = h - j; |
| 3686 | 100 | T | F | |
| 3693 | 100 | T | F | hv_store(res_hv, key, strlen(key), newSVnv(q), 0); |
| 3694 | 100 | T | F | } |
| 3704 | 100 | T | F | |
| 3706 | 100 | T | F | PROTOTYPE: @ |
| 50 | T | F | PROTOTYPE: @ |
| 3709 | 100 | T | F | size_t count = 0; |
| 3711 | 50 | T | F | for (size_t i = 0; i < items; i++) { |
| 100 | T | F | for (size_t i = 0; i < items; i++) { |
| 3718 | 100 | T | F | if (tv && SvOK(*tv)) { |
| 3725 | 50 | T | F | } else if (SvOK(arg)) { |
| 3726 | 100 | T | F | total += SvNV(arg); |
| 3737 | 100 | T | F | double sum(...) |
| 3739 | 100 | T | F | INIT: |
| 50 | T | F | INIT: |
| 3742 | 100 | T | F | CODE: |
| 3744 | 50 | T | F | SV* restrict arg = ST(i); |
| 100 | T | F | SV* restrict arg = ST(i); |
| 3754 | 100 | T | F | croak("sum: undefined value at array ref index %zu (argument %zu)", j, i); |
| 3764 | 100 | T | F | if (count == 0) croak("sum needs >= 1 element"); |
| 3765 | 50 | T | F | RETVAL = total; |
| 3777 | 100 | T | F | SV* restrict arg = ST(i); |
| 3779 | 100 | T | F | AV* restrict av = (AV*)SvRV(arg); |
| 50 | T | F | AV* restrict av = (AV*)SvRV(arg); |
| 3782 | 100 | T | F | SV** restrict tv = av_fetch(av, j, 0); |
| 3784 | 50 | T | F | count++; |
| 100 | T | F | count++; |
| 3794 | 100 | T | F | count++; |
| 3804 | 100 | T | F | RETVAL = sqrt(M2 / (count - 1)); |
| 3805 | 100 | T | F | OUTPUT: |
| 3821 | 50 | T | F | for (size_t j = 0; j < len; j++) { |
| 100 | T | F | for (size_t j = 0; j < len; j++) { |
| 50 | T | F | for (size_t j = 0; j < len; j++) { |
| 3827 | 50 | T | F | mean += delta / count; |
| 100 | T | F | mean += delta / count; |
| 50 | T | F | mean += delta / count; |
| 3833 | 50 | T | F | } else if (SvOK(arg)) { |
| 3838 | 100 | T | F | M2 += delta * (val - mean); |
| 3842 | 100 | T | F | } |
| 3843 | 100 | T | F | if (count < 2) croak("var needs >= 2 elements"); |
| 3844 | 100 | T | F | RETVAL = M2 / (count - 1); |
| 3845 | 100 | T | F | OUTPUT: |
| 3846 | 100 | T | F | RETVAL |
| 3847 | 100 | T | F | |
| 3848 | 50 | T | F | SV* t_test(...) |
| 3853 | 100 | T | F | double mu = 0.0, conf_level = 0.95; |
| 50 | T | F | double mu = 0.0, conf_level = 0.95; |
| 50 | T | F | double mu = 0.0, conf_level = 0.95; |
| 3857 | 50 | T | F | int arg_idx = 0; |
| 3859 | 100 | T | F | // 1. Shift first positional argument as 'x' if it's an array reference |
| 50 | T | F | // 1. Shift first positional argument as 'x' if it's an array reference |
| 50 | T | F | // 1. Shift first positional argument as 'x' if it's an array reference |
| 3862 | 50 | T | F | arg_idx++; |
| 100 | T | F | arg_idx++; |
| 3867 | 100 | T | F | y_sv = ST(arg_idx); |
| 3869 | 50 | T | F | } |
| 50 | T | F | } |
| 3875 | 100 | T | F | |
| 50 | T | F | |
| 3877 | 100 | T | F | for (; arg_idx < items; arg_idx += 2) { |
| 100 | T | F | for (; arg_idx < items; arg_idx += 2) { |
| 3878 | 100 | T | F | const char*restrict key = SvPV_nolen(ST(arg_idx)); |
| 3880 | 100 | T | F | |
| 100 | T | F | |
| 3882 | 100 | T | F | else if (strEQ(key, "y")) y_sv = val; |
| 3884 | 50 | T | F | else if (strEQ(key, "paired")) paired = SvTRUE(val); |
| 50 | T | F | else if (strEQ(key, "paired")) paired = SvTRUE(val); |
| 3890 | 100 | T | F | |
| 3892 | 100 | T | F | if (!x_sv || !SvROK(x_sv) || SvTYPE(SvRV(x_sv)) != SVt_PVAV) |
| 3895 | 50 | T | F | size_t nx = av_len(x_av) + 1; |
| 50 | T | F | size_t nx = av_len(x_av) + 1; |
| 3896 | 50 | T | F | if (nx < 2) croak("t_test: 'x' needs at least 2 elements"); |
| 50 | T | F | if (nx < 2) croak("t_test: 'x' needs at least 2 elements"); |
| 3903 | 50 | T | F | // --- Computation via Welford's Algorithm --- */ |
| 3909 | 100 | T | F | double delta = val - mean_x; |
| 3910 | 50 | T | F | mean_x += delta / (i + 1); |
| 0 | T | F | mean_x += delta / (i + 1); |
| 3919 | 50 | T | F | if (paired && ny != nx) croak("t_test: Paired arrays must be same length"); |
| 0 | T | F | if (paired && ny != nx) croak("t_test: Paired arrays must be same length"); |
| 3939 | 100 | T | F | M2_d += delta * (val - mean_d); |
| 3943 | 100 | T | F | cint_est = mean_d; |
| 3966 | 100 | T | F | hv_store(results, "estimate_x", 10, newSVnv(mean_x), 0); |
| 50 | T | F | hv_store(results, "estimate_x", 10, newSVnv(mean_x), 0); |
| 3972 | 100 | T | F | t_stat = (cint_est - mu) / std_err; |
| 3978 | 100 | T | F | if (strcmp(alternative, "less") == 0) { |
| 3980 | 100 | T | F | ci_lower = -INFINITY; |
| 100 | T | F | ci_lower = -INFINITY; |
| 3981 | 100 | T | F | ci_upper = cint_est + t_crit * std_err; |
| 50 | T | F | ci_upper = cint_est + t_crit * std_err; |
| 3982 | 50 | T | F | } else if (strcmp(alternative, "greater") == 0) { |
| 3986 | 50 | T | F | } else { |
| 3987 | 50 | T | F | t_crit = qt_tail(df, alpha / 2.0); |
| 3989 | 100 | T | F | ci_upper = cint_est + t_crit * std_err; |
| 3991 | 50 | T | F | AV*restrict conf_int = newAV(); |
| 50 | T | F | AV*restrict conf_int = newAV(); |
| 3997 | 100 | T | F | hv_store(results, "conf_int", 8, newRV_noinc((SV*)conf_int), 0); |
| 3998 | 100 | T | F | RETVAL = newRV_noinc((SV*)results); |
| 4000 | 100 | T | F | OUTPUT: |
| 4002 | 100 | T | F | |
| 4004 | 100 | T | F | INIT: |
| 4006 | 100 | T | F | croak("p_adjust: first argument must be an ARRAY reference of p-values"); |
| 4007 | 100 | T | F | } |
| 4009 | 100 | T | F | size_t n = av_len(p_av) + 1; |
| 4011 | 100 | T | F | if (n == 0) { |
| 4013 | 100 | T | F | } |
| 4014 | 50 | T | F | // Normalize method string |
| 4016 | 100 | T | F | strncpy(meth, method, 63); meth[63] = '\0'; |
| 4018 | 100 | T | F | // Resolve aliases |
| 4020 | 100 | T | F | if (strstr(meth, "benjamini") && strstr(meth, "yekutieli")) strcpy(meth, "by"); |
| 4021 | 50 | T | F | if (strcmp(meth, "fdr") == 0) strcpy(meth, "bh"); |
| 4023 | 100 | T | F | PVal *restrict arr; |
| 4025 | 100 | T | F | Newx(arr, n, PVal); |
| 4027 | 100 | T | F | |
| 4029 | 100 | T | F | SV**restrict tv = av_fetch(p_av, i, 0); |
| 4030 | 100 | T | F | arr[i].p = (tv && SvOK(*tv)) ? SvNV(*tv) : 1.0; |
| 4032 | 100 | T | F | } |
| 4034 | 50 | T | F | qsort(arr, n, sizeof(PVal), cmp_pval); |
| 4035 | 50 | T | F | PPCODE: |
| 4038 | 100 | T | F | double v = arr[i].p * n; |
| 4040 | 50 | T | F | } |
| 4045 | 100 | T | F | if (v > cummax) cummax = v; |
| 4049 | 100 | T | F | double cummin = 1.0; |
| 4054 | 100 | T | F | } |
| 4056 | 100 | T | F | double cummin = 1.0; |
| 4061 | 100 | T | F | } |
| 4063 | 100 | T | F | double q = 0.0; |
| 4066 | 100 | T | F | for (ssize_t i = n - 1; i >= 0; i--) { |
| 4070 | 100 | T | F | } |
| 4071 | 100 | T | F | } else if (strcmp(meth, "hommel") == 0) { |
| 4077 | 100 | T | F | for (size_t i = 1; i < n; i++) { |
| 4078 | 100 | T | F | double temp = (n * arr[i].p) / (i + 1.0); |
| 4079 | 50 | T | F | if (temp < min_val) { |
| 4083 | 50 | T | F | // pa <- q <- rep(min, n) |
| 4084 | 0 | T | F | for (size_t i = 0; i < n; i++) { |
| 4092 | 50 | T | F | double q1 = (j * arr[n_mj + 1].p) / 2.0; |
| 4093 | 100 | T | F | for (size_t k = 1; k < i2_len; k++) { |
| 4107 | 100 | T | F | } |
| 4109 | 100 | T | F | for (size_t i = 0; i < n; i++) { |
| 50 | T | F | for (size_t i = 0; i < n; i++) { |
| 4112 | 100 | T | F | } |
| 4114 | 50 | T | F | } |
| 100 | T | F | } |
| 4120 | 100 | T | F | } |
| 4126 | 100 | T | F | } else { |
| 4129 | 50 | T | F | } |
| 4132 | 100 | T | F | for (size_t i = 0; i < n; i++) { |
| 4134 | 100 | T | F | } |
| 50 | T | F | } |
| 4137 | 100 | T | F | |
| 4139 | 50 | T | F | PROTOTYPE: @ |
| 50 | T | F | PROTOTYPE: @ |
| 4146 | 50 | T | F | for (size_t i = 0; i < items; i++) { |
| 4156 | 100 | T | F | croak("median: undefined value at array ref index %zu (argument %zu)", j, i); |
| 4163 | 100 | T | F | } |
| 4170 | 100 | T | F | /* Pass 2: Populate the C array â Safefree before any croak */ |
| 4171 | 100 | T | F | for (size_t i = 0; i < items; i++) { |
| 4172 | 100 | T | F | SV* restrict arg = ST(i); |
| 4177 | 50 | T | F | SV** restrict tv = av_fetch(av, j, 0); |
| 50 | T | F | SV** restrict tv = av_fetch(av, j, 0); |
| 4182 | 50 | T | F | croak("median: undefined value at array ref index %zu (argument %zu)", j, i); |
| 4188 | 50 | T | F | Safefree(nums); |
| 100 | T | F | Safefree(nums); |
| 50 | T | F | Safefree(nums); |
| 4193 | 50 | T | F | /* Sort and calculate median */ |
| 50 | T | F | /* Sort and calculate median */ |
| 4194 | 50 | T | F | qsort(nums, total_count, sizeof(double), compare_doubles); |
| 4196 | 50 | T | F | median_val = (nums[total_count / 2 - 1] + nums[total_count / 2]) / 2.0; |
| 4197 | 50 | T | F | } else { |
| 4200 | 50 | T | F | Safefree(nums); |
| 50 | T | F | Safefree(nums); |
| 4202 | 50 | T | F | RETVAL = median_val; |
| 100 | T | F | RETVAL = median_val; |
| 50 | T | F | RETVAL = median_val; |
| 4208 | 100 | T | F | // --- validate method ------------------------------------------- |
| 50 | T | F | // --- validate method ------------------------------------------- |
| 4209 | 50 | T | F | if (strcmp(method, "pearson") != 0 && |
| 4213 | 100 | T | F | method); |
| 4217 | 50 | T | F | croak("cor: x must be an ARRAY reference"); |
| 4221 | 50 | T | F | if (nx == 0) croak("cor: x is empty"); |
| 4222 | 50 | T | F | |
| 4224 | 100 | T | F | bool x_is_matrix = 0; |
| 4226 | 50 | T | F | SV**restrict fp = av_fetch(x_av, 0, 0); |
| 50 | T | F | SV**restrict fp = av_fetch(x_av, 0, 0); |
| 50 | T | F | SV**restrict fp = av_fetch(x_av, 0, 0); |
| 4228 | 100 | T | F | x_is_matrix = 1; |
| 4230 | 50 | T | F | |
| 50 | T | F | |
| 50 | T | F | |
| 4239 | 50 | T | F | if (has_y && ny > 0) { |
| 4244 | 50 | T | F | |
| 50 | T | F | |
| 50 | T | F | |
| 4248 | 50 | T | F | if (!has_y) { |
| 4253 | 100 | T | F | croak("cor: x and y must have the same length (%lu vs %lu)", |
| 4255 | 50 | T | F | |
| 50 | T | F | |
| 50 | T | F | |
| 4259 | 50 | T | F | double *restrict xd, *restrict yd; |
| 50 | T | F | double *restrict xd, *restrict yd; |
| 4260 | 50 | T | F | Newx(xd, nx, double); |
| 4261 | 100 | T | F | Newx(yd, ny, double); |
| 4263 | 50 | T | F | bool x_sd0 = 1, y_sd0 = 1; |
| 50 | T | F | bool x_sd0 = 1, y_sd0 = 1; |
| 50 | T | F | bool x_sd0 = 1, y_sd0 = 1; |
| 4270 | 50 | T | F | if (!isnan(val)) { |
| 4272 | 100 | T | F | else if (val != x_first) x_sd0 = 0; |
| 4273 | 50 | T | F | } |
| 4274 | 100 | T | F | } |
| 4278 | 50 | T | F | yd[i] = val; |
| 50 | T | F | yd[i] = val; |
| 50 | T | F | yd[i] = val; |
| 4288 | 50 | T | F | croak("cor: standard deviation of y is 0"); |
| 50 | T | F | croak("cor: standard deviation of y is 0"); |
| 4292 | 50 | T | F | Safefree(xd); Safefree(yd); |
| 4294 | 50 | T | F | } |
| 4295 | 100 | T | F | } else {//Branch 2: x is a matrix (or y is a matrix) â AoA result |
| 4296 | 50 | T | F | // -- resolve x matrix dimensions |
| 4297 | 100 | T | F | if (!x_is_matrix) |
| 4301 | 50 | T | F | SV**restrict xr0 = av_fetch(x_av, 0, 0); |
| 50 | T | F | SV**restrict xr0 = av_fetch(x_av, 0, 0); |
| 50 | T | F | SV**restrict xr0 = av_fetch(x_av, 0, 0); |
| 4309 | 50 | T | F | |
| 4316 | 50 | T | F | |
| 4317 | 100 | T | F | if (has_y && y_is_matrix) { |
| 4321 | 50 | T | F | if (!rv || !SvROK(*rv) || SvTYPE(SvRV(*rv)) != SVt_PVAV) |
| 4324 | 0 | T | F | } |
| 4325 | 0 | T | F | |
| 4326 | 0 | T | F | // -- extract x columns |
| 4328 | 0 | T | F | Newx(col_x, ncols_x, double*); |
| 4330 | 0 | T | F | for (size_t j = 0; j < ncols_x; j++) { |
| 4337 | 0 | T | F | SV**restrict cv = av_fetch(row, j, 0); |
| 4338 | 0 | T | F | double val = (cv && SvOK(*cv) && looks_like_number(*cv)) ? SvNV(*cv) : NAN; |
| 4341 | 0 | T | F | if (isnan(first)) first = val; |
| 4345 | 100 | T | F | if (sd0) { |
| 4346 | 100 | T | F | for (size_t k = 0; k <= j; k++) Safefree(col_x[k]); |
| 4350 | 100 | T | F | } |
| 4354 | 100 | T | F | double **restrict col_y = NULL; |
| 4356 | 50 | T | F | |
| 4357 | 100 | T | F | // 1 = cor(X) â result is symmetric |
| 4373 | 50 | T | F | double val = (cv && SvOK(*cv) && looks_like_number(*cv)) ? SvNV(*cv) : NAN; |
| 4375 | 100 | T | F | if (!isnan(val)) { |
| 100 | T | F | if (!isnan(val)) { |
| 4380 | 50 | T | F | if (sd0) { |
| 4382 | 50 | T | F | Safefree(col_x); |
| 4387 | 50 | T | F | } |
| 50 | T | F | } |
| 100 | T | F | } |
| 4389 | 50 | T | F | ncols_y = ncols_x; |
| 50 | T | F | ncols_y = ncols_x; |
| 50 | T | F | ncols_y = ncols_x; |
| 0 | T | F | ncols_y = ncols_x; |
| 4391 | 0 | T | F | symmetric = 1; |
| 4393 | 0 | T | F | if (nrows < 2) |
| 4402 | 100 | T | F | rows_out[i] = newAV(); |
| 4404 | 50 | T | F | } |
| 4408 | 50 | T | F | Newx(r_cache, ncols_x, double*); |
| 50 | T | F | Newx(r_cache, ncols_x, double*); |
| 50 | T | F | Newx(r_cache, ncols_x, double*); |
| 4410 | 50 | T | F | Newx(r_cache[i], ncols_x, double); |
| 50 | T | F | Newx(r_cache[i], ncols_x, double); |
| 50 | T | F | Newx(r_cache[i], ncols_x, double); |
| 0 | T | F | Newx(r_cache[i], ncols_x, double); |
| 4412 | 0 | T | F | for (size_t i = 0; i < ncols_x; i++) { |
| 4414 | 0 | T | F | for (size_t j = i + 1; j < ncols_x; j++) { |
| 4415 | 0 | T | F | double r = compute_cor(col_x[i], col_x[j], nrows, method); |
| 4426 | 100 | T | F | Safefree(r_cache); r_cache = NULL; |
| 4428 | 100 | T | F | // cross-correlation: every (i,j) pair is independent |
| 50 | T | F | // cross-correlation: every (i,j) pair is independent |
| 4430 | 50 | T | F | for (size_t j = 0; j < ncols_y; j++) |
| 4432 | 50 | T | F | } |
| 50 | T | F | } |
| 50 | T | F | } |
| 4438 | 100 | T | F | for (size_t j = 0; j < ncols_x; j++) Safefree(col_x[j]); |
| 4448 | 50 | T | F | |
| 50 | T | F | |
| 4454 | 100 | T | F | double center_val = 0.0, scale_val = 1.0; |
| 4460 | 100 | T | F | data_items = items - 1; // Exclude hash from data processing |
| 4463 | 50 | T | F | SV**restrict center_sv = hv_fetch(opt_hv, "center", 6, 0); |
| 4465 | 100 | T | F | SV*restrict val_sv = *center_sv; |
| 4467 | 50 | T | F | do_center_mean = FALSE; center_val = 0.0; |
| 50 | T | F | do_center_mean = FALSE; center_val = 0.0; |
| 4470 | 50 | T | F | /* Trap booleans and empty strings before numeric checks */ |
| 50 | T | F | /* Trap booleans and empty strings before numeric checks */ |
| 4477 | 50 | T | F | } else if (SvTRUE(val_sv)) { |
| 4480 | 50 | T | F | do_center_mean = FALSE; center_val = 0.0; |
| 4481 | 50 | T | F | } |
| 4487 | 100 | T | F | SV*restrict val_sv = *scale_sv; |
| 4494 | 100 | T | F | } else if (strcasecmp(str, "none") == 0 || strcasecmp(str, "false") == 0 || strcmp(str, "0") == 0 || strcmp(str, "") == 0) { |
| 4496 | 50 | T | F | } else if (looks_like_number(val_sv)) { |
| 4503 | 50 | T | F | } |
| 4512 | 100 | T | F | if (SvROK(first_arg) && SvTYPE(SvRV(first_arg)) == SVt_PVAV) { |
| 4514 | 50 | T | F | if (av_len(av) >= 0) { |
| 0 | T | F | if (av_len(av) >= 0) { |
| 4517 | 0 | T | F | is_matrix = TRUE; |
| 4519 | 0 | T | F | } |
| 0 | T | F | } |
| 4521 | 50 | T | F | } |
| 4525 | 50 | T | F | //========================================================= |
| 4526 | 50 | T | F | AV*restrict mat_av = (AV*)SvRV(ST(0)); |
| 4527 | 100 | T | F | size_t nrow = av_len(mat_av) + 1, ncol = 0; |
| 4529 | 50 | T | F | SV**restrict first_row = av_fetch(mat_av, 0, 0); |
| 0 | T | F | SV**restrict first_row = av_fetch(mat_av, 0, 0); |
| 4532 | 0 | T | F | if (nrow == 0 || ncol == 0) croak("scale requires non-empty matrix"); |
| 4534 | 0 | T | F | // Create a new matrix for the scaled output |
| 0 | T | F | // Create a new matrix for the scaled output |
| 4539 | 50 | T | F | row_ptrs[r] = newAV(); |
| 4544 | 100 | T | F | for (size_t c = 0; c < ncol; c++) { |
| 4545 | 100 | T | F | double col_sum = 0.0; |
| 4546 | 100 | T | F | double *restrict col_data; |
| 4551 | 100 | T | F | if (row_sv && SvROK(*row_sv)) { |
| 4557 | 50 | T | F | } |
| 4558 | 100 | T | F | col_sum += col_data[r]; |
| 4560 | 50 | T | F | |
| 4570 | 50 | T | F | double sum_sq = 0.0; |
| 4577 | 100 | T | F | // Store scaled values back into the new matrix rows |
| 4580 | 100 | T | F | double final_val = (col_scale == 0.0) ? (0.0 / 0.0) : (centered / col_scale); |
| 4582 | 100 | T | F | } |
| 4585 | 50 | T | F | safefree(row_ptrs); |
| 4588 | 0 | T | F | PUSHs(sv_2mortal(newRV_noinc((SV*)result_av))); |
| 4595 | 50 | T | F | double sum = 0.0; |
| 100 | T | F | double sum = 0.0; |
| 50 | T | F | double sum = 0.0; |
| 4599 | 50 | T | F | AV*restrict av = (AV*)SvRV(arg); |
| 4600 | 100 | T | F | size_t len = av_len(av) + 1; |
| 4604 | 50 | T | F | } |
| 0 | T | F | } |
| 4607 | 50 | T | F | } |
| 100 | T | F | } |
| 4609 | 50 | T | F | if (total_count == 0) croak("scale requires at least 1 numeric element"); |
| 0 | T | F | if (total_count == 0) croak("scale requires at least 1 numeric element"); |
| 4613 | 100 | T | F | if (SvROK(arg) && SvTYPE(SvRV(arg)) == SVt_PVAV) { |
| 50 | T | F | if (SvROK(arg) && SvTYPE(SvRV(arg)) == SVt_PVAV) { |
| 4621 | 100 | T | F | } |
| 4628 | 100 | T | F | if (do_center_mean) center_val = sum / total_count; |
| 4631 | 50 | T | F | Safefree(nums); |
| 4632 | 50 | T | F | croak("scale needs >= 2 elements to calculate SD"); |
| 4675 | 50 | T | F | croak("Unknown option: %s", key); |
| 4677 | 100 | T | F | } |
| 4680 | 100 | T | F | croak("The 'data' option must be an array reference (e.g. data => [1..6])"); |
| 4681 | 50 | T | F | } |
| 4684 | 100 | T | F | if (data_len == 0) { |
| 4685 | 100 | T | F | croak("Data array cannot be empty"); |
| 100 | T | F | croak("Data array cannot be empty"); |
| 4691 | 50 | T | F | } else if (nrow_set && !ncol_set) { |
| 4693 | 50 | T | F | } else if (!nrow_set && ncol_set) { |
| 4695 | 50 | T | F | } |
| 4697 | 50 | T | F | if (nrow == 0 || ncol == 0) { |
| 100 | T | F | if (nrow == 0 || ncol == 0) { |
| 4700 | 50 | T | F | // Create the matrix (Array of Arrays) |
| 4701 | 100 | T | F | AV*restrict result_av = newAV(); |
| 4706 | 50 | T | F | row_ptrs[r] = newAV(); |
| 50 | T | F | row_ptrs[r] = newAV(); |
| 4708 | 50 | T | F | av_push(result_av, newRV_noinc((SV*)row_ptrs[r])); |
| 50 | T | F | av_push(result_av, newRV_noinc((SV*)row_ptrs[r])); |
| 4710 | 100 | T | F | // Fill the matrix |
| 4718 | 0 | T | F | c = i % ncol; |
| 4720 | 0 | T | F | r = i % nrow; |
| 4721 | 0 | T | F | c = i / nrow; |
| 4722 | 0 | T | F | } |
| 4724 | 0 | T | F | } |
| 0 | T | F | } |
| 0 | T | F | } |
| 4729 | 0 | T | F | |
| 4740 | 100 | T | F | char **restrict dummy_base = NULL, **restrict dummy_level = NULL; |
| 100 | T | F | char **restrict dummy_base = NULL, **restrict dummy_level = NULL; |
| 50 | T | F | char **restrict dummy_base = NULL, **restrict dummy_level = NULL; |
| 4744 | 100 | T | F | |
| 4745 | 100 | T | F | char **restrict row_names = NULL, **restrict valid_row_names = NULL; |
| 4746 | 50 | T | F | HV **restrict row_hashes = NULL; |
| 4758 | 100 | T | F | |
| 4760 | 50 | T | F | |
| 0 | T | F | |
| 4762 | 0 | T | F | const char *restrict key = SvPV_nolen(ST(i_arg)); |
| 0 | T | F | const char *restrict key = SvPV_nolen(ST(i_arg)); |
| 0 | T | F | const char *restrict key = SvPV_nolen(ST(i_arg)); |
| 0 | T | F | const char *restrict key = SvPV_nolen(ST(i_arg)); |
| 4766 | 100 | T | F | else croak("lm: unknown argument '%s'", key); |
| 50 | T | F | else croak("lm: unknown argument '%s'", key); |
| 4767 | 50 | T | F | } |
| 0 | T | F | } |
| 0 | T | F | } |
| 4773 | 100 | T | F | // ======================================================================== |
| 50 | T | F | // ======================================================================== |
| 4774 | 0 | T | F | ref = SvRV(data_sv); |
| 0 | T | F | ref = SvRV(data_sv); |
| 0 | T | F | ref = SvRV(data_sv); |
| 4780 | 100 | T | F | SV *restrict val = hv_iterval(hv, entry); |
| 50 | T | F | SV *restrict val = hv_iterval(hv, entry); |
| 0 | T | F | SV *restrict val = hv_iterval(hv, entry); |
| 4786 | 100 | T | F | char buf[32]; |
| 50 | T | F | char buf[32]; |
| 0 | T | F | char buf[32]; |
| 4790 | 100 | T | F | } else if (SvROK(val) && SvTYPE(SvRV(val)) == SVt_PVHV) { |
| 50 | T | F | } else if (SvROK(val) && SvTYPE(SvRV(val)) == SVt_PVHV) { |
| 4791 | 0 | T | F | n = hv_iterinit(hv); |
| 0 | T | F | n = hv_iterinit(hv); |
| 0 | T | F | n = hv_iterinit(hv); |
| 4796 | 100 | T | F | row_names[i] = savepv(hv_iterkey(entry, &len)); |
| 4797 | 50 | T | F | row_hashes[i] = (HV*)SvRV(hv_iterval(hv, entry)); |
| 0 | T | F | row_hashes[i] = (HV*)SvRV(hv_iterval(hv, entry)); |
| 4798 | 50 | T | F | i++; |
| 0 | T | F | i++; |
| 4807 | 50 | T | F | SV **restrict val = av_fetch(av, i, 0); |
| 4809 | 50 | T | F | row_hashes[i] = (HV*)SvRV(*val); |
| 4811 | 50 | T | F | row_names[i] = savepv(buf); |
| 50 | T | F | row_names[i] = savepv(buf); |
| 4818 | 100 | T | F | } else croak("lm: Data must be an Array or Hash reference"); |
| 4819 | 100 | T | F | |
| 4821 | 100 | T | F | // PHASE 2: Formula Parsing & `.` Expansion |
| 4823 | 50 | T | F | src = (char*)formula; dst = f_cpy; |
| 50 | T | F | src = (char*)formula; dst = f_cpy; |
| 4825 | 100 | T | F | *dst = '\0'; |
| 4827 | 50 | T | F | tilde = strchr(f_cpy, '~'); |
| 4828 | 100 | T | F | if (!tilde) { |
| 4838 | 50 | T | F | // IMPORTANT: skip tokens that appear inside I(...) wrappers so that |
| 4839 | 100 | T | F | // expressions like I(x^-1) are never mistakenly treated as "-1". |
| 4851 | 100 | T | F | (p_idx[2] == '\0' || p_idx[2] == '+' || p_idx[2] == '-')) { |
| 4853 | 50 | T | F | memmove(p_idx, p_idx + 2, strlen(p_idx + 2) + 1); |
| 4855 | 100 | T | F | } |
| 4856 | 50 | T | F | // Match +0 |
| 4861 | 100 | T | F | continue; |
| 4866 | 50 | T | F | memmove(p_idx, p_idx + 2, strlen(p_idx + 2) + 1); |
| 0 | T | F | memmove(p_idx, p_idx + 2, strlen(p_idx + 2) + 1); |
| 4868 | 50 | T | F | } |
| 50 | T | F | } |
| 4876 | 50 | T | F | memmove(p_idx, p_idx + 2, strlen(p_idx + 2) + 1); |
| 0 | T | F | memmove(p_idx, p_idx + 2, strlen(p_idx + 2) + 1); |
| 4883 | 100 | T | F | } |
| 4885 | 50 | T | F | } |
| 100 | T | F | } |
| 4886 | 50 | T | F | } |
| 4893 | 100 | T | F | if (rhs[0] == '+') memmove(rhs, rhs + 1, strlen(rhs + 1) + 1); |
| 4894 | 50 | T | F | size_t len_rhs = strlen(rhs); |
| 4899 | 100 | T | F | char rhs_expanded[2048] = ""; |
| 4902 | 100 | T | F | while (chunk != NULL) { |
| 4906 | 100 | T | F | SV **col_sv = av_fetch(cols, c, 0); |
| 4908 | 50 | T | F | const char *col_name = SvPV_nolen(*col_sv); |
| 4910 | 100 | T | F | size_t slen = strlen(col_name); |
| 100 | T | F | size_t slen = strlen(col_name); |
| 4911 | 100 | T | F | if (rhs_len + slen + 2 < sizeof(rhs_expanded)) { |
| 4912 | 50 | T | F | if (rhs_len > 0) { strcat(rhs_expanded, "+"); rhs_len++; } |
| 4918 | 50 | T | F | } |
| 4919 | 100 | T | F | SvREFCNT_dec(cols); |
| 4920 | 100 | T | F | } else { |
| 4921 | 100 | T | F | size_t slen = strlen(chunk); |
| 4922 | 100 | T | F | if (rhs_len + slen + 2 < sizeof(rhs_expanded)) { |
| 4923 | 50 | T | F | if (rhs_len > 0) { strcat(rhs_expanded, "+"); rhs_len++; } |
| 4936 | 100 | T | F | |
| 4947 | 50 | T | F | char *restrict left = chunk; |
| 50 | T | F | char *restrict left = chunk; |
| 4948 | 50 | T | F | char *restrict right = star + 1; |
| 4953 | 100 | T | F | terms[num_terms++] = savepv(left); |
| 4955 | 100 | T | F | size_t inter_len = strlen(left) + strlen(right) + 2; |
| 4959 | 100 | T | F | char *restrict c_chunk = strchr(chunk, '^'); |
| 4960 | 100 | T | F | if (c_chunk && strncmp(chunk, "I(", 2) != 0) *c_chunk = '\0'; |
| 4962 | 100 | T | F | } |
| 4964 | 50 | T | F | } |
| 4965 | 100 | T | F | } |
| 4970 | 50 | T | F | if (!found) uniq_terms[num_uniq++] = savepv(terms[i]); |
| 4973 | 50 | T | F | |
| 4976 | 100 | T | F | // ======================================================================== |
| 4983 | 100 | T | F | if (strcmp(uniq_terms[j], "Intercept") == 0) { |
| 4984 | 100 | T | F | exp_terms[p_exp] = savepv("Intercept"); is_dummy[p_exp] = FALSE; p_exp++; continue; |
| 4985 | 100 | T | F | } |
| 4986 | 100 | T | F | if (is_column_categorical(data_hoa, row_hashes, n, uniq_terms[j])) { |
| 4988 | 50 | T | F | unsigned int num_levels = 0, levels_cap = 8; |
| 4992 | 50 | T | F | if (str_val) { |
| 5000 | 100 | T | F | } |
| 5001 | 100 | T | F | } |
| 5003 | 100 | T | F | for (l1 = 0; l1 < num_levels - 1; l1++) |
| 5007 | 100 | T | F | if (p_exp >= exp_cap) { |
| 5009 | 100 | T | F | Renew(exp_terms, exp_cap, char*); Renew(is_dummy, exp_cap, bool); |
| 5015 | 100 | T | F | is_dummy[p_exp] = TRUE; |
| 5016 | 100 | T | F | dummy_base[p_exp] = savepv(uniq_terms[j]); |
| 5019 | 100 | T | F | } |
| 100 | T | F | } |
| 5034 | 100 | T | F | // ======================================================================== |
| 5037 | 100 | T | F | for (i = 0; i < n; i++) { |
| 5039 | 100 | T | F | if (isnan(y_val)) { Safefree(row_names[i]); continue; } |
| 100 | T | F | if (isnan(y_val)) { Safefree(row_names[i]); continue; } |
| 5042 | 100 | T | F | double *restrict row_x = (double*)safemalloc(p * sizeof(double)); |
| 5051 | 50 | T | F | } else { row_ok = FALSE; break; } |
| 5057 | 50 | T | F | if (!row_ok) { Safefree(row_names[i]); Safefree(row_x); continue; } |
| 50 | T | F | if (!row_ok) { Safefree(row_names[i]); Safefree(row_x); continue; } |
| 5060 | 50 | T | F | for (j = 0; j < p; j++) X[valid_n * p + j] = row_x[j]; |
| 50 | T | F | for (j = 0; j < p; j++) X[valid_n * p + j] = row_x[j]; |
| 5063 | 0 | T | F | Safefree(row_x); |
| 5067 | 0 | T | F | if (valid_n <= p) { |
| 5071 | 100 | T | F | Safefree(exp_terms[j]); |
| 5075 | 100 | T | F | Safefree(X); Safefree(Y); Safefree(valid_row_names); |
| 5082 | 50 | T | F | // ======================================================================== |
| 0 | T | F | // ======================================================================== |
| 5102 | 50 | T | F | double sum = 0.0; |
| 5112 | 100 | T | F | summary_hv = newHV(); terms_av = newAV(); |
| 5113 | 100 | T | F | |
| 5114 | 100 | T | F | df_res = (int)valid_n - final_rank; |
| 5116 | 100 | T | F | // rss / mss accumulated here â rse_sq computed AFTER this loop (not before) |
| 5121 | 100 | T | F | for (i = 0; i < valid_n; i++) { |
| 5135 | 50 | T | F | rse_sq = (df_res > 0) ? (rss / (double)df_res) : NAN; |
| 5136 | 0 | T | F | |
| 5137 | 0 | T | F | int df_int = has_intercept ? 1 : 0; |
| 5145 | 100 | T | F | f_stat = (mss / (double)numdf) / rse_sq; |
| 50 | T | F | f_stat = (mss / (double)numdf) / rse_sq; |
| 100 | T | F | f_stat = (mss / (double)numdf) / rse_sq; |
| 50 | T | F | f_stat = (mss / (double)numdf) / rse_sq; |
| 5153 | 50 | T | F | } |
| 5156 | 50 | T | F | hv_store(coef_hv, exp_terms[j], strlen(exp_terms[j]), newSVnv(beta[j]), 0); |
| 5157 | 100 | T | F | av_push(terms_av, newSVpv(exp_terms[j], 0)); |
| 5167 | 100 | T | F | double p_val = get_t_pvalue(t_val, df_res, "two.sided"); |
| 5174 | 50 | T | F | } |
| 50 | T | F | } |
| 0 | T | F | } |
| 0 | T | F | } |
| 5180 | 50 | T | F | hv_store(res_hv, "rank", 4, newSVuv(final_rank), 0); |
| 5184 | 100 | T | F | hv_store(res_hv, "r.squared", 9, newSVnv(r_squared), 0); |
| 5188 | 100 | T | F | av_push(fstat_av, newSVnv(f_stat)); |
| 5189 | 100 | T | F | av_push(fstat_av, newSViv(numdf)); |
| 5190 | 50 | T | F | av_push(fstat_av, newSViv(df_res)); |
| 5194 | 100 | T | F | |
| 5197 | 50 | T | F | for (i = 0; i < num_uniq; i++) Safefree(uniq_terms[i]); Safefree(uniq_terms); |
| 5200 | 100 | T | F | if (is_dummy[j]) { Safefree(dummy_base[j]); Safefree(dummy_level[j]); } |
| 5207 | 100 | T | F | RETVAL = newRV_noinc((SV*)res_hv); |
| 50 | T | F | RETVAL = newRV_noinc((SV*)res_hv); |
| 5212 | 100 | T | F | void seq(from, to, by = 1.0) |
| 5248 | 50 | T | F | CODE: |
| 5253 | 50 | T | F | size_t n = 0; |
| 5255 | 50 | T | F | int arg_start = 0; |
| 5257 | 50 | T | F | // Check if the first argument is a simple integer (rnorm(33)) |
| 5259 | 50 | T | F | n = (unsigned int)SvUV(ST(0)); |
| 50 | T | F | n = (unsigned int)SvUV(ST(0)); |
| 5262 | 50 | T | F | |
| 5263 | 100 | T | F | // --- Parse remaining named arguments from the flat stack --- |
| 5267 | 0 | T | F | |
| 0 | T | F | |
| 5269 | 0 | T | F | const char* restrict key = SvPV_nolen(ST(i)); |
| 0 | T | F | const char* restrict key = SvPV_nolen(ST(i)); |
| 5271 | 0 | T | F | |
| 5279 | 0 | T | F | |
| 5282 | 0 | T | F | av_extend(result_av, n - 1); |
| 5283 | 0 | T | F | // Generate random normals using the Box-Muller transform |
| 5284 | 0 | T | F | for (size_t i = 0; i < n; ) { |
| 5286 | 0 | T | F | do { |
| 0 | T | F | do { |
| 0 | T | F | do { |
| 5292 | 0 | T | F | |
| 5303 | 100 | T | F | OUTPUT: |
| 100 | T | F | OUTPUT: |
| 50 | T | F | OUTPUT: |
| 5307 | 100 | T | F | SV* data_sv |
| 5308 | 100 | T | F | SV* formula_sv |
| 5309 | 50 | T | F | CODE: |
| 5317 | 50 | T | F | char **restrict dummy_base = NULL, **restrict dummy_level = NULL; |
| 5318 | 50 | T | F | int *restrict term_map = NULL, *restrict left_idx = NULL, *restrict right_idx = NULL; |
| 5319 | 50 | T | F | unsigned int term_cap = 64, exp_cap = 64, num_terms = 0, num_uniq = 0, p = 0, p_exp = 0; |
| 5320 | 50 | T | F | size_t n = 0, valid_n = 0, i, j; |
| 0 | T | F | size_t n = 0, valid_n = 0, i, j; |
| 5321 | 50 | T | F | bool has_intercept = TRUE; |
| 5322 | 50 | T | F | |
| 0 | T | F | |
| 5323 | 50 | T | F | char **restrict row_names = NULL; |
| 0 | T | F | char **restrict row_names = NULL; |
| 5325 | 50 | T | F | HV *restrict data_hoa = NULL; |
| 5326 | 50 | T | F | SV *restrict ref = NULL; |
| 5328 | 50 | T | F | double **restrict X_mat = NULL; |
| 50 | T | F | double **restrict X_mat = NULL; |
| 5333 | 100 | T | F | // ======================================================================== |
| 5334 | 100 | T | F | // PHASE 1: Data Extraction |
| 5336 | 100 | T | F | ref = SvRV(data_sv); |
| 5338 | 50 | T | F | HV*restrict hv = (HV*)ref; |
| 50 | T | F | HV*restrict hv = (HV*)ref; |
| 5340 | 100 | T | F | entry = hv_iternext(hv); |
| 5342 | 50 | T | F | SV*restrict val = hv_iterval(hv, entry); |
| 5343 | 100 | T | F | if (SvROK(val) && SvTYPE(SvRV(val)) == SVt_PVAV) { |
| 5353 | 50 | T | F | Newx(row_names, n, char*); Newx(row_hashes, n, HV*); |
| 5354 | 100 | T | F | i = 0; |
| 5370 | 50 | T | F | if (val && SvROK(*val) && SvTYPE(SvRV(*val)) == SVt_PVHV) { |
| 5372 | 50 | T | F | char buf[32]; |
| 5374 | 100 | T | F | row_names[i] = savepv(buf); |
| 5375 | 50 | T | F | } else { |
| 5380 | 100 | T | F | } |
| 5385 | 50 | T | F | // ======================================================================== |
| 0 | T | F | // ======================================================================== |
| 5386 | 50 | T | F | src = (char*)formula; dst = f_cpy; |
| 0 | T | F | src = (char*)formula; dst = f_cpy; |
| 5394 | 50 | T | F | croak("aov: invalid formula, missing '~'"); |
| 0 | T | F | croak("aov: invalid formula, missing '~'"); |
| 5401 | 100 | T | F | while ((p_idx = strstr(rhs, "-1")) != NULL) { has_intercept = FALSE; memmove(p_idx, p_idx + 2, strlen(p_idx + 2) + 1); } |
| 5403 | 100 | T | F | while ((p_idx = strstr(rhs, "0+")) != NULL) { has_intercept = FALSE; memmove(p_idx, p_idx + 2, strlen(p_idx + 2) + 1); } |
| 5404 | 50 | T | F | if (rhs[0] == '0' && rhs[1] == '\0') { has_intercept = FALSE; rhs[0] = '\0'; } |
| 5406 | 50 | T | F | if (rhs[0] == '1' && rhs[1] == '\0') { rhs[0] = '\0'; } |
| 5418 | 100 | T | F | if (strcmp(chunk, ".") == 0) { |
| 5419 | 100 | T | F | AV *restrict cols = get_all_columns(data_hoa, row_hashes, n); |
| 5427 | 100 | T | F | if (rhs_len > 0) { strcat(rhs_expanded, "+"); rhs_len++; } |
| 5437 | 100 | T | F | if (rhs_len + slen + 2 < sizeof(rhs_expanded)) { |
| 5445 | 100 | T | F | |
| 5446 | 100 | T | F | // Setup arrays safely |
| 5447 | 100 | T | F | Newx(terms, term_cap, char*); |
| 5450 | 100 | T | F | Newx(is_dummy, exp_cap, bool); Newx(is_interact, exp_cap, bool); |
| 50 | T | F | Newx(is_dummy, exp_cap, bool); Newx(is_interact, exp_cap, bool); |
| 5454 | 100 | T | F | if (has_intercept) { terms[num_terms++] = savepv("Intercept"); } |
| 5455 | 100 | T | F | |
| 5456 | 50 | T | F | if (strlen(rhs_expanded) > 0) { |
| 5477 | 100 | T | F | char *restrict c_chunk = strchr(chunk, '^'); |
| 5481 | 100 | T | F | chunk = strtok(NULL, "+"); |
| 5483 | 50 | T | F | } |
| 5485 | 100 | T | F | for (i = 0; i < num_terms; i++) { |
| 5486 | 100 | T | F | bool found = FALSE; |
| 5488 | 100 | T | F | if (strcmp(terms[i], uniq_terms[k]) == 0) { found = TRUE; break; } |
| 5489 | 50 | T | F | } |
| 5496 | 50 | T | F | Newxz(term_base_level, num_uniq, char*); |
| 5497 | 100 | T | F | /* ----------------------------------------------------------------------- */ |
| 5498 | 100 | T | F | |
| 5499 | 100 | T | F | // ======================================================================== |
| 5509 | 100 | T | F | } |
| 5510 | 50 | T | F | |
| 5527 | 100 | T | F | int *restrict l_indices = (int*)safemalloc(p_exp * sizeof(int)); int l_count = 0; |
| 5547 | 100 | T | F | size_t t_len = strlen(exp_terms[l_indices[li]]) + strlen(exp_terms[r_indices[ri]]) + 2; |
| 5548 | 50 | T | F | exp_terms[p_exp] = (char*)safemalloc(t_len); |
| 5552 | 100 | T | F | left_idx[p_exp] = l_indices[li]; |
| 5554 | 50 | T | F | term_map[p_exp] = j; |
| 5557 | 100 | T | F | } |
| 5558 | 100 | T | F | } |
| 5560 | 100 | T | F | } else { |
| 5562 | 100 | T | F | char **restrict levels = NULL; |
| 5564 | 50 | T | F | Newx(levels, levels_cap, char*); |
| 5565 | 100 | T | F | for (i = 0; i < n; i++) { |
| 5570 | 50 | T | F | if (strcmp(levels[l], str_val) == 0) { found = TRUE; break; } |
| 5573 | 50 | T | F | if (num_levels >= levels_cap) { levels_cap *= 2; Renew(levels, levels_cap, char*); } |
| 5576 | 100 | T | F | Safefree(str_val); |
| 5582 | 100 | T | F | for (size_t l2 = l1 + 1; l2 < num_levels; l2++) { |
| 5584 | 100 | T | F | char *tmp = levels[l1]; levels[l1] = levels[l2]; levels[l2] = tmp; |
| 5585 | 100 | T | F | } |
| 5586 | 100 | T | F | } |
| 5588 | 50 | T | F | |
| 5594 | 100 | T | F | if (p_exp >= exp_cap) { |
| 5596 | 50 | T | F | Renew(exp_terms, exp_cap, char*); Renew(parent_term, exp_cap, char*); |
| 5598 | 50 | T | F | Renew(dummy_base, exp_cap, char*); Renew(dummy_level, exp_cap, char*); |
| 100 | T | F | Renew(dummy_base, exp_cap, char*); Renew(dummy_level, exp_cap, char*); |
| 5614 | 100 | T | F | Safefree(levels); |
| 5615 | 100 | T | F | exp_terms[p_exp] = savepv(uniq_terms[j]); |
| 5616 | 100 | T | F | parent_term[p_exp] = savepv(uniq_terms[j]); |
| 5623 | 100 | T | F | parent_term[p_exp] = savepv(uniq_terms[j]); |
| 5624 | 100 | T | F | is_dummy[p_exp] = FALSE; is_interact[p_exp] = FALSE; |
| 5627 | 100 | T | F | } |
| 5631 | 50 | T | F | for(i = 0; i < n; i++) X_mat[i] = (double*)safemalloc(p_exp * sizeof(double)); |
| 5634 | 100 | T | F | // PHASE 4: Matrix Construction & Listwise Deletion |
| 5635 | 100 | T | F | // ======================================================================== |
| 5639 | 100 | T | F | bool row_ok = TRUE; |
| 5644 | 50 | T | F | } else if (is_interact[j]) { |
| 100 | T | F | } else if (is_interact[j]) { |
| 5666 | 100 | T | F | if (valid_n <= p_exp) { |
| 5668 | 50 | T | F | for (i = 0; i < num_terms; i++) Safefree(terms[i]); Safefree(terms); |
| 50 | T | F | for (i = 0; i < num_terms; i++) Safefree(terms[i]); Safefree(terms); |
| 5672 | 100 | T | F | if (is_dummy[j]) { Safefree(dummy_base[j]); Safefree(dummy_level[j]); } |
| 5674 | 100 | T | F | Safefree(exp_terms); Safefree(parent_term); |
| 5676 | 100 | T | F | Safefree(dummy_base); Safefree(dummy_level); |
| 5689 | 100 | T | F | // ======================================================================== |
| 5690 | 100 | T | F | bool *restrict aliased_qr = (bool*)safemalloc(p_exp * sizeof(bool)); |
| 5691 | 100 | T | F | size_t *restrict rank_map = (size_t*)safemalloc(p_exp * sizeof(size_t)); |
| 5693 | 100 | T | F | double *restrict term_ss; |
| 5700 | 100 | T | F | if (aliased_qr[i]) continue; |
| 5703 | 50 | T | F | term_ss[t_idx] += Y[r_k] * Y[r_k]; |
| 5714 | 100 | T | F | int res_df = valid_n - rank; |
| 5721 | 100 | T | F | double ss = term_ss[j]; |
| 5722 | 50 | T | F | int df = term_df[j]; |
| 5725 | 50 | T | F | hv_stores(term_stats, "Df", newSViv(df)); |
| 50 | T | F | hv_stores(term_stats, "Df", newSViv(df)); |
| 5727 | 50 | T | F | hv_stores(term_stats, "Mean Sq", newSVnv(ms)); |
| 5732 | 50 | T | F | } else { |
| 5736 | 100 | T | F | hv_store(ret_hash, uniq_terms[j], strlen(uniq_terms[j]), newRV_noinc((SV*)term_stats), 0); |
| 5738 | 50 | T | F | |
| 5741 | 50 | T | F | hv_stores(res_stats, "Sum Sq", newSVnv(rss_prev)); |
| 50 | T | F | hv_stores(res_stats, "Sum Sq", newSVnv(rss_prev)); |
| 50 | T | F | hv_stores(res_stats, "Sum Sq", newSVnv(rss_prev)); |
| 50 | T | F | hv_stores(res_stats, "Sum Sq", newSVnv(rss_prev)); |
| 5748 | 50 | T | F | HV *restrict mean_hv = newHV(); |
| 50 | T | F | HV *restrict mean_hv = newHV(); |
| 5749 | 50 | T | F | HV *restrict size_hv = newHV(); |
| 50 | T | F | HV *restrict size_hv = newHV(); |
| 5750 | 50 | T | F | for (size_t c = 0; c <= (size_t)av_len(all_cols); c++) { |
| 50 | T | F | for (size_t c = 0; c <= (size_t)av_len(all_cols); c++) { |
| 5751 | 50 | T | F | SV **restrict col_sv = av_fetch(all_cols, c, 0); |
| 50 | T | F | SV **restrict col_sv = av_fetch(all_cols, c, 0); |
| 5755 | 50 | T | F | IV col_count = 0; |
| 5758 | 50 | T | F | if (!isnan(val)) { col_sum += val; col_count++; } |
| 5761 | 50 | T | F | hv_store(mean_hv, col_name, strlen(col_name), newSVnv(col_mean), 0); |
| 50 | T | F | hv_store(mean_hv, col_name, strlen(col_name), newSVnv(col_mean), 0); |
| 5764 | 100 | T | F | SvREFCNT_dec(all_cols); |
| 5765 | 100 | T | F | HV *restrict gs_hv = newHV(); |
| 5768 | 50 | T | F | hv_stores(ret_hash, "group_stats", newRV_noinc((SV*)gs_hv)); |
| 50 | T | F | hv_stores(ret_hash, "group_stats", newRV_noinc((SV*)gs_hv)); |
| 5769 | 50 | T | F | } |
| 50 | T | F | } |
| 5774 | 50 | T | F | for (i = 0; i < num_uniq; i++) Safefree(uniq_terms[i]); Safefree(uniq_terms); |
| 50 | T | F | for (i = 0; i < num_uniq; i++) Safefree(uniq_terms[i]); Safefree(uniq_terms); |
| 5779 | 50 | T | F | Safefree(exp_terms); Safefree(parent_term); |
| 5780 | 50 | T | F | Safefree(is_dummy); Safefree(is_interact); |
| 5785 | 50 | T | F | Safefree(X_mat); Safefree(Y); |
| 5786 | 50 | T | F | Safefree(aliased_qr); Safefree(rank_map); |
| 5787 | 50 | T | F | if (row_hashes) Safefree(row_hashes); |
| 50 | T | F | if (row_hashes) Safefree(row_hashes); |
| 5788 | 50 | T | F | RETVAL = newRV_noinc((SV*)ret_hash); |
| 50 | T | F | RETVAL = newRV_noinc((SV*)ret_hash); |
| 5789 | 50 | T | F | } |
| 50 | T | F | } |
| 5790 | 50 | T | F | OUTPUT: |
| 50 | T | F | OUTPUT: |
| 5832 | 50 | T | F | a = (a_ptr && SvOK(*a_ptr)) ? SvIV(*a_ptr) : 0; |
| 5833 | 100 | T | F | b = (b_ptr && SvOK(*b_ptr)) ? SvIV(*b_ptr) : 0; |
| 5837 | 100 | T | F | croak("Invalid 2D Array structure"); |
| 5838 | 100 | T | F | } |
| 5839 | 100 | T | F | } else if (SvTYPE(deref) == SVt_PVHV) { |
| 5840 | 50 | T | F | // Fixed 2D Hash Logic: Sort keys lexically to enforce structured rows/columns |
| 100 | T | F | // Fixed 2D Hash Logic: Sort keys lexically to enforce structured rows/columns |
| 5841 | 100 | T | F | HV*restrict outer = (HV*)deref; |
| 5842 | 100 | T | F | if (hv_iterinit(outer) != 2) croak("Outer hash must have exactly 2 keys"); |
| 5843 | 50 | T | F | HE*restrict he1 = hv_iternext(outer); |
| 5844 | 0 | T | F | HE*restrict he2 = hv_iternext(outer); |
| 5845 | 0 | T | F | if (!he1 || !he2) croak("Invalid outer hash"); |
| 5849 | 100 | T | F | HE*restrict row2_he = (strcmp(k1, k2) < 0) ? he2 : he1; |
| 50 | T | F | HE*restrict row2_he = (strcmp(k1, k2) < 0) ? he2 : he1; |
| 5850 | 50 | T | F | SV*restrict row1_sv = hv_iterval(outer, row1_he); |
| 50 | T | F | SV*restrict row1_sv = hv_iterval(outer, row1_he); |
| 5851 | 100 | T | F | SV*restrict row2_sv = hv_iterval(outer, row2_he); |
| 50 | T | F | SV*restrict row2_sv = hv_iterval(outer, row2_he); |
| 5852 | 50 | T | F | if (!SvROK(row1_sv) || SvTYPE(SvRV(row1_sv)) != SVt_PVHV || |
| 50 | T | F | if (!SvROK(row1_sv) || SvTYPE(SvRV(row1_sv)) != SVt_PVHV || |
| 5853 | 100 | T | F | !SvROK(row2_sv) || SvTYPE(SvRV(row2_sv)) != SVt_PVHV) { |
| 50 | T | F | !SvROK(row2_sv) || SvTYPE(SvRV(row2_sv)) != SVt_PVHV) { |
| 5856 | 100 | T | F | HV*restrict in1 = (HV*)SvRV(row1_sv); |
| 5857 | 50 | T | F | HV*restrict in2 = (HV*)SvRV(row2_sv); |
| 5858 | 100 | T | F | if (hv_iterinit(in1) != 2 || hv_iterinit(in2) != 2) croak("Inner hashes must have exactly 2 keys"); |
| 5859 | 50 | T | F | HE*restrict in1_he1 = hv_iternext(in1); |
| 5860 | 50 | T | F | HE*restrict in1_he2 = hv_iternext(in1); |
| 5862 | 50 | T | F | const char*restrict in1_k2 = SvPV_nolen(hv_iterkeysv(in1_he2)); |
| 5866 | 100 | T | F | HE*restrict in2_he2 = hv_iternext(in2); |
| 5867 | 50 | T | F | const char*restrict in2_k1 = SvPV_nolen(hv_iterkeysv(in2_he1)); |
| 5868 | 50 | T | F | const char*restrict in2_k2 = SvPV_nolen(hv_iterkeysv(in2_he2)); |
| 50 | T | F | const char*restrict in2_k2 = SvPV_nolen(hv_iterkeysv(in2_he2)); |
| 5869 | 100 | T | F | HE*restrict in2_c1 = (strcmp(in2_k1, in2_k2) < 0) ? in2_he1 : in2_he2; |
| 50 | T | F | HE*restrict in2_c1 = (strcmp(in2_k1, in2_k2) < 0) ? in2_he1 : in2_he2; |
| 5870 | 100 | T | F | HE*restrict in2_c2 = (strcmp(in2_k1, in2_k2) < 0) ? in2_he2 : in2_he1; |
| 5871 | 100 | T | F | a = (hv_iterval(in1, in1_c1) && SvOK(hv_iterval(in1, in1_c1))) ? SvIV(hv_iterval(in1, in1_c1)) : 0; |
| 100 | T | F | a = (hv_iterval(in1, in1_c1) && SvOK(hv_iterval(in1, in1_c1))) ? SvIV(hv_iterval(in1, in1_c1)) : 0; |
| 5872 | 100 | T | F | b = (hv_iterval(in1, in1_c2) && SvOK(hv_iterval(in1, in1_c2))) ? SvIV(hv_iterval(in1, in1_c2)) : 0; |
| 50 | T | F | b = (hv_iterval(in1, in1_c2) && SvOK(hv_iterval(in1, in1_c2))) ? SvIV(hv_iterval(in1, in1_c2)) : 0; |
| 50 | T | F | b = (hv_iterval(in1, in1_c2) && SvOK(hv_iterval(in1, in1_c2))) ? SvIV(hv_iterval(in1, in1_c2)) : 0; |
| 5873 | 100 | T | F | c = (hv_iterval(in2, in2_c1) && SvOK(hv_iterval(in2, in2_c1))) ? SvIV(hv_iterval(in2, in2_c1)) : 0; |
| 50 | T | F | c = (hv_iterval(in2, in2_c1) && SvOK(hv_iterval(in2, in2_c1))) ? SvIV(hv_iterval(in2, in2_c1)) : 0; |
| 5874 | 100 | T | F | d = (hv_iterval(in2, in2_c2) && SvOK(hv_iterval(in2, in2_c2))) ? SvIV(hv_iterval(in2, in2_c2)) : 0; |
| 5876 | 50 | T | F | croak("Input must be a 2D Array or 2D Hash"); |
| 5878 | 50 | T | F | |
| 0 | T | F | |
| 5879 | 100 | T | F | // Perform Calculations via Helpers |
| 5881 | 100 | T | F | double mle_or, ci_low, ci_high; |
| 5885 | 0 | T | F | HV*restrict ret_hash = newHV(); |
| 5887 | 0 | T | F | hv_stores(ret_hash, "alternative", newSVpv(alternative, 0)); |
| 5889 | 0 | T | F | av_push(ci_array, newSVnv(ci_low)); |
| 5893 | 0 | T | F | hv_stores(ret_hash, "estimate", newRV_noinc((SV*)est_hash)); |
| 5895 | 0 | T | F | hv_stores(ret_hash, "p_value", newSVnv(p_val)); |
| 0 | T | F | hv_stores(ret_hash, "p_value", newSVnv(p_val)); |
| 5896 | 0 | T | F | // Return the HashRef |
| 5898 | 0 | T | F | } |
| 5902 | 0 | T | F | SV* power_t_test(...) |
| 5904 | 0 | T | F | { |
| 5906 | 0 | T | F | SV*restrict sv_delta = NULL; |
| 5918 | 100 | T | F | const char* restrict key = SvPV_nolen(ST(i)); |
| 100 | T | F | const char* restrict key = SvPV_nolen(ST(i)); |
| 5920 | 100 | T | F | |
| 100 | T | F | |
| 5921 | 100 | T | F | if (strEQ(key, "n")) sv_n = val; |
| 5935 | 50 | T | F | bool is_null_power = (!sv_power || !SvOK(sv_power)); |
| 100 | T | F | bool is_null_power = (!sv_power || !SvOK(sv_power)); |
| 5937 | 100 | T | F | bool is_null_sig_level = (sv_sig_level && !SvOK(sv_sig_level)); |
| 5939 | 50 | T | F | unsigned int missing_count = 0; |
| 5943 | 100 | T | F | if (is_null_sd) missing_count++; |
| 50 | T | F | if (is_null_sd) missing_count++; |
| 5944 | 100 | T | F | if (is_null_sig_level) missing_count++; |
| 5945 | 50 | T | F | |
| 5949 | 100 | T | F | |
| 5952 | 100 | T | F | double sd = (!sv_sd || is_null_sd) ? 1.0 : SvNV(sv_sd); |
| 5953 | 50 | T | F | double sig_level = (!sv_sig_level || is_null_sig_level) ? 0.05 : SvNV(sv_sig_level); |
| 5954 | 0 | T | F | double power = is_null_power ? 0.0 : SvNV(sv_power); |
| 5958 | 100 | T | F | if (is_null_power) { |
| 50 | T | F | if (is_null_power) { |
| 50 | T | F | if (is_null_power) { |
| 5973 | 100 | T | F | if (p_body(n, delta, mid, sig_level, tsample, tside, strict) > power) low = mid; |
| 5974 | 50 | T | F | else high = mid; |
| 50 | T | F | else high = mid; |
| 5982 | 100 | T | F | if (p_body(n, mid, sd, sig_level, tsample, tside, strict) < power) low = mid; |
| 5984 | 50 | T | F | } |
| 50 | T | F | } |
| 5988 | 50 | T | F | while (high - low > tol) { |
| 5992 | 50 | T | F | } |
| 5998 | 100 | T | F | hv_stores(ret, "sd", newSVnv(sd)); |
| 6000 | 50 | T | F | hv_stores(ret, "power", newSVnv(power)); |
| 6005 | 100 | T | F | if (n_str[0] != '\0') hv_stores(ret, "note", newSVpv(n_str, 0)); |
| 6007 | 50 | T | F | } |
| 50 | T | F | } |
| 50 | T | F | } |
| 6021 | 50 | T | F | if (t == SVt_PVAV) { |
| 50 | T | F | if (t == SVt_PVAV) { |
| 50 | T | F | if (t == SVt_PVAV) { |
| 6023 | 50 | T | F | } else if (t == SVt_PVHV) { |
| 50 | T | F | } else if (t == SVt_PVHV) { |
| 50 | T | F | } else if (t == SVt_PVHV) { |
| 6030 | 50 | T | F | g_sv = ST(arg_idx++); |
| 6031 | 50 | T | F | } |
| 6040 | 100 | T | F | } |
| 6043 | 50 | T | F | croak("kruskal_test: cannot mix 'h' (hash-of-arrays) with 'x'/'g' inputs"); |
| 50 | T | F | croak("kruskal_test: cannot mix 'h' (hash-of-arrays) with 'x'/'g' inputs"); |
| 50 | T | F | croak("kruskal_test: cannot mix 'h' (hash-of-arrays) with 'x'/'g' inputs"); |
| 6044 | 50 | T | F | |
| 50 | T | F | |
| 6049 | 100 | T | F | char **restrict group_names = NULL; /* Track names to build group_stats */ |
| 6068 | 50 | T | F | croak("kruskal_test: every value in 'h' must be an ARRAY reference"); |
| 50 | T | F | croak("kruskal_test: every value in 'h' must be an ARRAY reference"); |
| 6070 | 0 | T | F | } |
| 6071 | 0 | T | F | if (total < 2) croak("not enough observations"); |
| 0 | T | F | if (total < 2) croak("not enough observations"); |
| 6074 | 0 | T | F | size_t num_keys = HvKEYS(h_hv); |
| 6087 | 100 | T | F | SV **restrict el = av_fetch(av, i, 0); |
| 6096 | 100 | T | F | k = group_id; /* number of unique groups = number of hash keys */ |
| 6097 | 50 | T | F | |
| 6104 | 50 | T | F | if (!g_sv || !SvROK(g_sv) || SvTYPE(SvRV(g_sv)) != SVt_PVAV) |
| 6124 | 100 | T | F | if (x_el && SvOK(*x_el) && looks_like_number(*x_el) |
| 6125 | 50 | T | F | && g_el && SvOK(*g_el)) { |
| 50 | T | F | && g_el && SvOK(*g_el)) { |
| 6132 | 50 | T | F | } else { |
| 6162 | 50 | T | F | |
| 50 | T | F | |
| 50 | T | F | |
| 6168 | 50 | T | F | for (size_t i = 0; i < valid_n; i++) { |
| 50 | T | F | for (size_t i = 0; i < valid_n; i++) { |
| 50 | T | F | for (size_t i = 0; i < valid_n; i++) { |
| 6174 | 50 | T | F | |
| 6179 | 100 | T | F | stat_base += (group_rank_sums[i] * group_rank_sums[i]) |
| 6183 | 50 | T | F | double n_d = (double)valid_n; |
| 6184 | 50 | T | F | double stat = (12.0 * stat_base / (n_d * (n_d + 1.0))) - 3.0 * (n_d + 1.0); |
| 6185 | 100 | T | F | if (tie_adj > 0.0) { |
| 6186 | 50 | T | F | double tie_denom = 1.0 - (tie_adj / (n_d * n_d * n_d - n_d)); |
| 0 | T | F | double tie_denom = 1.0 - (tie_adj / (n_d * n_d * n_d - n_d)); |
| 6187 | 0 | T | F | stat /= tie_denom; |
| 6192 | 50 | T | F | // 9. Return structured data exactly like R's htest |
| 50 | T | F | // 9. Return structured data exactly like R's htest |
| 50 | T | F | // 9. Return structured data exactly like R's htest |
| 6194 | 50 | T | F | hv_stores(res, "statistic", newSVnv(stat)); |
| 50 | T | F | hv_stores(res, "statistic", newSVnv(stat)); |
| 50 | T | F | hv_stores(res, "statistic", newSVnv(stat)); |
| 6197 | 50 | T | F | hv_stores(res, "p.value", newSVnv(p_val)); |
| 50 | T | F | hv_stores(res, "p.value", newSVnv(p_val)); |
| 6199 | 50 | T | F | |
| 50 | T | F | |
| 50 | T | F | |
| 6210 | 100 | T | F | hv_store(stats_mean, group_names[i], nlen, newSVnv(mean), 0); |
| 6212 | 50 | T | F | } |
| 50 | T | F | } |
| 50 | T | F | } |
| 6214 | 50 | T | F | } |
| 50 | T | F | } |
| 6225 | 100 | T | F | |
| 6227 | 50 | T | F | } |
| 50 | T | F | } |
| 50 | T | F | } |
| 6229 | 50 | T | F | RETVAL |
| 50 | T | F | RETVAL |
| 6238 | 100 | T | F | unsigned int arg_idx = 0; |
| 6239 | 100 | T | F | |
| 6246 | 100 | T | F | // 2. Shift positional argument 'y' if it's an array reference |
| 6255 | 50 | T | F | } |
| 6257 | 50 | T | F | // --- Parse named arguments from the remaining flat stack --- |
| 6264 | 50 | T | F | else if (strEQ(key, "ratio")) ratio = SvNV(val); |