{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "83c66bad-e9bb-4ea7-9f6b-be842df821f0", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import json\n", "import re\n", "import unicodedata\n", "import numpy as np\n", "data = pd.read_csv(\"./quotations.csv\", on_bad_lines='skip',sep=\";\")\n", "\n", "# Filter to just “Fensterreinigung” and drop rows missing requirements_textual\n", "data = data.loc[\n", " data['requirements_textual'].notna()\n", " & data['price'].notna()\n", "]\n", "\n", "\n", "# (booking['product_name'] == \"Fensterreinigung\")\n", "# & booking['quotation_info_request'].isna()\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "d78b8f4a-f140-463f-8369-163f520dca4b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3260376/2893397658.py:27: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " shortened.loc[len(shortened)] = [othersFreq,othersRelFreq,\"Sonstige\",othersCumRelFreq]\n" ] }, { "data": { "text/html": [ "
| \n", " | freq | \n", "relFreq | \n", "name | \n", "cumRelFreq | \n", "
|---|---|---|---|---|
| 0 | \n", "5169 | \n", "0.41 | \n", "Umzugsreinigung | \n", "0.41 | \n", "
| 1 | \n", "2448 | \n", "0.19 | \n", "Intensivreinigung | \n", "0.60 | \n", "
| 2 | \n", "2124 | \n", "0.17 | \n", "Fensterreinigung | \n", "0.77 | \n", "
| 3 | \n", "2940 | \n", "0.23 | \n", "Sonstige | \n", "1.00 | \n", "