import pandas as pd import matplotlib.pyplot as plt import numpy as np def plotVariables(x,y): # scatter plot (matplotlib, single plot, no explicit colors) #plt.ylim(0, 1) plt.figure() plt.scatter(x, y) plt.axhline(0, linestyle="--") # reference line plt.xlabel(x.name) plt.ylabel(y.name) plt.title("diff_price vs. confidence") plt.grid(True) plt.show() def plotPriceConfidence(condensed): # pick the right confidence column conf_col = "confidence" if "confidence" in condensed.columns else ( "resp_confidence" if "resp_confidence" in condensed.columns else None ) if conf_col is None: raise KeyError("No 'confidence' or 'resp_confidence' column found in condensed.") # keep only the needed columns and coerce to numeric dfp = condensed[[conf_col, "diff_price"]].copy() dfp[conf_col] = pd.to_numeric(dfp[conf_col], errors="coerce") dfp["diff_price"] = pd.to_numeric(dfp["diff_price"], errors="coerce") dfp = dfp.dropna(subset=[conf_col, "diff_price"]) # scatter plot (matplotlib, single plot, no explicit colors) #plt.ylim(0, 1) plt.figure() plt.scatter(dfp[conf_col], dfp["diff_price"]) plt.axhline(0, linestyle="--") # reference line plt.xlabel(conf_col) plt.ylabel("diff_price") plt.title("diff_price vs. confidence") plt.grid(True) plt.show() def histPriceDiff(condensed): conf_col = ( "confidence" if "confidence" in condensed.columns else "resp_confidence" if "resp_confidence" in condensed.columns else None ) if conf_col is None: raise KeyError("No 'confidence' or 'resp_confidence' column in condensed.") # --- prepare data --- df = condensed[[conf_col, "diff_price"]].copy() df[conf_col] = pd.to_numeric(df[conf_col], errors="coerce") df["diff_price"] = pd.to_numeric(df["diff_price"], errors="coerce") df = df.dropna(subset=[conf_col, "diff_price"]) # scale confidence to 0–100 if it looks like 0–1 if df[conf_col].max() <= 1.01: df[conf_col] = df[conf_col] * 100 # --- define bands --- bands = [ ("confidence == 100", df[ df[conf_col] == 100 ]), ("100 > confidence ≥ 90", df[(df[conf_col] < 100) & (df[conf_col] >= 90)]), ("90 > confidence ≥ 80", df[(df[conf_col] < 90) & (df[conf_col] >= 80)]), ("80 > confidence ≥ 50", df[(df[conf_col] < 80) & (df[conf_col] >= 50)]), ("50 > confidence", df[ df[conf_col] < 50 ]), ] # --- common bins across all groups for fair comparison --- all_vals = df["diff_price"].values if all_vals.size == 0: raise ValueError("No numeric diff_price values to plot.") xmin, xmax = np.nanmin(all_vals), np.nanmax(all_vals) if xmin == xmax: # degenerate case: make a tiny range around the single value xmin, xmax = xmin - 0.5, xmax + 0.5 bins = np.linspace(xmin, xmax, 31) # 30 bins # --- plot each histogram in its own figure (no subplots, no explicit colors) --- for title, d in bands: if d.empty: print(f"[skip] {title}: no rows") continue plt.figure() plt.hist(d["diff_price"].values, bins=bins) plt.title(f"diff_price for {title}") plt.xlabel("diff_price") plt.ylabel("count") plt.grid(True) plt.show() # (optional) quick counts per band for title, d in bands: print(f"{title}: {len(d)} rows")