diff --git a/dataMining.ipynb b/dataMining.ipynb
new file mode 100644
index 0000000..23a29bb
--- /dev/null
+++ b/dataMining.ipynb
@@ -0,0 +1,255 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 156,
+   "id": "83c66bad-e9bb-4ea7-9f6b-be842df821f0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import json\n",
+    "import re\n",
+    "import unicodedata\n",
+    "import numpy as np\n",
+    "data = pd.read_csv(\"./quotations.csv\", on_bad_lines='skip',sep=\";\")\n",
+    "\n",
+    "# Filter to just “Fensterreinigung” and drop rows missing requirements_textual\n",
+    "data = data.loc[\n",
+    "     data['requirements_textual'].notna()\n",
+    "    & data['price'].notna()\n",
+    "]\n",
+    "\n",
+    "\n",
+    "#    (booking['product_name'] == \"Fensterreinigung\")\n",
+    "#    & booking['quotation_info_request'].isna()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 170,
+   "id": "d78b8f4a-f140-463f-8369-163f520dca4b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_3060060/2985588999.py:27: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  shortened.loc[len(shortened)] = [othersFreq,othersRelFreq,\"Sonstige\",othersCumRelFreq]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>freq</th>\n",
+       "      <th>relFreq</th>\n",
+       "      <th>name</th>\n",
+       "      <th>cumRelFreq</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>5169</td>\n",
+       "      <td>0.41</td>\n",
+       "      <td>Umzugsreinigung</td>\n",
+       "      <td>0.41</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2448</td>\n",
+       "      <td>0.19</td>\n",
+       "      <td>Intensivreinigung</td>\n",
+       "      <td>0.60</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2124</td>\n",
+       "      <td>0.17</td>\n",
+       "      <td>Fensterreinigung</td>\n",
+       "      <td>0.77</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>503</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>Poolreinigung</td>\n",
+       "      <td>0.81</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>474</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>Wintergartenreinigung</td>\n",
+       "      <td>0.85</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>456</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>Baureinigung</td>\n",
+       "      <td>0.88</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>418</td>\n",
+       "      <td>0.03</td>\n",
+       "      <td>Polsterreinigung</td>\n",
+       "      <td>0.91</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>158</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>Messie-Reinigung</td>\n",
+       "      <td>0.93</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>133</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>Frühjahrsputz</td>\n",
+       "      <td>0.94</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>116</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>Treppenhaus-Reinigung</td>\n",
+       "      <td>0.95</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>682</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>Sonstige</td>\n",
+       "      <td>1.00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    freq  relFreq                   name  cumRelFreq\n",
+       "0   5169     0.41        Umzugsreinigung        0.41\n",
+       "1   2448     0.19      Intensivreinigung        0.60\n",
+       "2   2124     0.17       Fensterreinigung        0.77\n",
+       "3    503     0.04          Poolreinigung        0.81\n",
+       "4    474     0.04  Wintergartenreinigung        0.85\n",
+       "5    456     0.04           Baureinigung        0.88\n",
+       "6    418     0.03       Polsterreinigung        0.91\n",
+       "7    158     0.01       Messie-Reinigung        0.93\n",
+       "8    133     0.01          Frühjahrsputz        0.94\n",
+       "9    116     0.01  Treppenhaus-Reinigung        0.95\n",
+       "10   682     0.05               Sonstige        1.00"
+      ]
+     },
+     "execution_count": 170,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Nur Spezialreinigungsanfragen\n",
+    "\n",
+    "data.keys()\n",
+    "\n",
+    "#Absolute und relative Häufigkeit von Reinigungsarten\n",
+    "countProduct = (lambda x: [ sum((data[\"product_name\"]==x)) \\\n",
+    "                           ,sum((data[\"product_name\"]==x)) / len(data[\"product_name\"]) \\\n",
+    "                            ,x ])\n",
+    "\n",
+    "products = sorted(list(map(countProduct,data[\"product_name\"].unique())),reverse=True)\n",
+    "products = pd.DataFrame(products, columns=[\"freq\",\"relFreq\",\"name\"])\n",
+    "#products = products.sort_values(by=\"freq\", ascending=False)\n",
+    "products\n",
+    "\n",
+    "for i in products.index:\n",
+    "    if i>0:\n",
+    "        products.at[i,\"cumRelFreq\"]=products.at[i-1,\"cumRelFreq\"] + products.at[i,\"relFreq\"]\n",
+    "    else:\n",
+    "        products.at[i,\"cumRelFreq\"]=products.at[i,\"relFreq\"]\n",
+    "\n",
+    "products[[\"cumRelFreq\",\"relFreq\"]] = round(products[[\"cumRelFreq\",\"relFreq\"]], 2)\n",
+    "\n",
+    "shortened = products.head(10)\n",
+    "othersFreq =  sum(products[\"freq\"]) - sum(shortened[\"freq\"])  \n",
+    "othersRelFreq = round( othersFreq / sum(products[\"freq\"])  ,  2)\n",
+    "othersCumRelFreq = 1\n",
+    "shortened.loc[len(shortened)] = [othersFreq,othersRelFreq,\"Sonstige\",othersCumRelFreq]\n",
+    "shortened\n",
+    "\n",
+    "#shortened[\"freq\"]\n",
+    "#pd.concat([[0,0,0,0],shortened])\n",
+    "#help(pd.concat)\n",
+    "#help(pd.DataFrame.sort_values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4286aa50-1ef5-45a0-a90b-c65534e1ec3a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#data\n",
+    "\n",
+    "booking = \n",
+    "#images = os.listdir(\"/Users/antonwirsing/Nextcloud/share_anton/ExtraSauber-quotations-training-data-2025-05-09\")\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "df = filtered.sample(n=1000, random_state=43).reset_index(drop=True)\n",
+    "#print(df.columns)\n",
+    "\n",
+    "\n",
+    "#df.to_csv(\"./windowQuotationsSample.csv\", index=False,sep=\";\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

	freq	relFreq	name	cumRelFreq
0	5169	0.41	Umzugsreinigung	0.41
1	2448	0.19	Intensivreinigung	0.60
2	2124	0.17	Fensterreinigung	0.77
3	503	0.04	Poolreinigung	0.81
4	474	0.04	Wintergartenreinigung	0.85
5	456	0.04	Baureinigung	0.88
6	418	0.03	Polsterreinigung	0.91
7	158	0.01	Messie-Reinigung	0.93
8	133	0.01	Frühjahrsputz	0.94
9	116	0.01	Treppenhaus-Reinigung	0.95
10	682	0.05	Sonstige	1.00