november status

create frequency table
2025-11-10 11:37:20 +01:00 · 2025-08-21 17:01:44 +02:00
5 changed files with 1743 additions and 54 deletions
--- a/analysis.ipynb
+++ b/analysis.ipynb
@ -2,11 +2,11 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
-     "end_time": "2025-08-13T09:04:47.365323Z",
-     "start_time": "2025-08-13T09:04:46.211866Z"
+     "end_time": "2025-09-29T18:17:56.534291268Z",
+     "start_time": "2025-09-29T18:17:56.490560986Z"
    }
   },
   "outputs": [],
@ -19,6 +19,13 @@
    "import numpy as np"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -27,10 +34,7 @@
     "end_time": "2025-08-13T09:05:06.879965Z",
     "start_time": "2025-08-13T09:05:06.803852Z"
    },
-    "collapsed": false,
-    "jupyter": {
-     "outputs_hidden": false
-    }
+    "collapsed": false
   },
   "outputs": [],
   "source": [
@ -39,44 +43,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-08-19T09:25:33.132235159Z",
+     "start_time": "2025-08-19T09:25:33.104903712Z"
+    },
    "scrolled": true
   },
   "outputs": [
    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   totalPrice  totalPriceLow  totalPriceHigh  \\\n",
-      "0         145            145             145   \n",
-      "\n",
-      "                                          ZusatzInfo  complete  confidence  \\\n",
-      "0  Bitte bestätigen Sie, dass die Oberlichten als...     False         0.8   \n",
-      "\n",
-      "                           missverständliche Aspekte  \\\n",
-      "0  Der Ausdruck \"Oberlichten\" ist nicht eindeutig...   \n",
-      "\n",
-      "                          Zu erbringende Leistungen:  \\\n",
-      "0  4 Altbau‑Doppelfenster (je 25 €), 2 Neubaufens...   \n",
-      "\n",
-      "                                           Rechenweg  \\\n",
-      "0  4×25 € + 2×15 € + 1×15 € = 100 € + 30 € + 15 €...   \n",
-      "\n",
-      "                                          Kommentare  \n",
-      "0  Die Kosten basieren ausschließlich auf den in ...  \n",
-      "Index(['id', 'product_id', 'product_name', 'price', 'currency', 'duration',\n",
-      "       'requirements_textual', 'street', 'zipcode', 'city', 'country',\n",
-      "       'coordinate', 'comment_price', 'comment_key', 'comment_important',\n",
-      "       'comment_restrict', 'comment_other', 'inquired',\n",
-      "       'quotation_info_request', 'quotation_state', 'quotation_ts',\n",
-      "       'quotation_comment', 'completed', 'customer_id', 'name', 'response',\n",
-      "       'n_Images', 'resp_totalPrice', 'resp_totalPriceLow',\n",
-      "       'resp_totalPriceHigh', 'resp_ZusatzInfo', 'resp_complete',\n",
-      "       'resp_confidence', 'resp_missverständliche Aspekte',\n",
-      "       'resp_Zu erbringende Leistungen', 'resp_Rechenweg', 'resp_Kommentare',\n",
-      "       'resp_Zu erbringen Leistungen', 'diff_price'],\n",
-      "      dtype='object')\n"
+     "ename": "ImportError",
+     "evalue": "cannot import name 'data_expanded' from 'extractResponse' (/var/huggingface/extractResponse.py)",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[31m---------------------------------------------------------------------------\u001B[39m",
+      "\u001B[31mImportError\u001B[39m                               Traceback (most recent call last)",
+      "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[3]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m      1\u001B[39m \u001B[38;5;66;03m#Get Data\u001B[39;00m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m \u001B[38;5;28;01mfrom\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mextractResponse\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;28;01mimport\u001B[39;00m data_expanded\n\u001B[32m      3\u001B[39m \u001B[38;5;28mprint\u001B[39m(data_expanded.columns)\n\u001B[32m      4\u001B[39m columns=[\u001B[33m\"\u001B[39m\u001B[33mprice\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_totalPrice\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mdiff_price\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mduration\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mrequirements_textual\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33minquired\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_ZusatzInfo\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_complete\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_confidence\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mcomment_price\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mcomment_important\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mcomment_important\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_missverständliche Aspekte\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_Zu erbringende Leistungen\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_Rechenweg\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_Kommentare\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresponse\u001B[39m\u001B[33m\"\u001B[39m  ]\n",
+      "\u001B[31mImportError\u001B[39m: cannot import name 'data_expanded' from 'extractResponse' (/var/huggingface/extractResponse.py)"
     ]
    }
   ],
@ -159,11 +143,27 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 1,
   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-08-18T12:02:05.433859201Z",
+     "start_time": "2025-08-18T12:02:05.301566171Z"
+    },
    "scrolled": true
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'data' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[31m---------------------------------------------------------------------------\u001B[39m",
+      "\u001B[31mNameError\u001B[39m                                 Traceback (most recent call last)",
+      "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[1]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m outliers = \u001B[43mdata\u001B[49m.loc[( \u001B[38;5;28mabs\u001B[39m(data[\u001B[33m\"\u001B[39m\u001B[33mdiff_price\u001B[39m\u001B[33m\"\u001B[39m]) <\u001B[32m10\u001B[39m)]\n\u001B[32m      3\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m (\u001B[38;5;28;01mFalse\u001B[39;00m):\n\u001B[32m      4\u001B[39m     \u001B[38;5;28;01mfor\u001B[39;00m o \u001B[38;5;129;01min\u001B[39;00m outliers.index:\n\u001B[32m      5\u001B[39m         \u001B[38;5;66;03m#print(data_expanded.iloc[0])\u001B[39;00m\n\u001B[32m      6\u001B[39m         \u001B[38;5;66;03m#print(data_expanded.iloc[columns,o])\u001B[39;00m\n",
+      "\u001B[31mNameError\u001B[39m: name 'data' is not defined"
+     ]
+    }
+   ],
   "source": [
    "\n",
    "\n",
@ -349,7 +349,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.11"
+   "version": "3.12.3"
  }
 },
 "nbformat": 4,
--- a/client.py
+++ b/client.py
@ -4,7 +4,7 @@ client = Client(host='http://localhost:11434')



-def askGPT(systemprompt,userprompt):
+def askGPT(systemprompt,userprompt,format="json"):

    messages = [{
        'role': 'system',
@ -17,6 +17,7 @@ def askGPT(systemprompt,userprompt):
    # 1) First call: let the model decide to call the tool
    resp: ChatResponse = client.chat(
        model='gpt-oss',        # be explicit about the tag
+        format="json",
        messages=messages,
    )

--- a/dataMining.ipynb
+++ b/dataMining.ipynb
--- a/durationEstimate.ipynb
+++ b/durationEstimate.ipynb
--- a/main.py
+++ b/main.py
@ -124,7 +124,7 @@ for i in index:
    print(quotation["duration"])


-    data.to_csv("./intensivQuotationsSampleWithResponse.csv", index=False,sep=";")
+    #data.to_csv("./intensivQuotationsSampleWithResponse.csv", index=False,sep=";")


 data = pd.read_csv("./umzugQuotationsSample.csv", on_bad_lines='skip',sep=";")
@ -139,7 +139,7 @@ for i in index:
    print(quotation["duration"])


-    data.to_csv("./umzugQuotationsSampleWithResponse.csv", index=False,sep=";")
+    #data.to_csv("./umzugQuotationsSampleWithResponse.csv", index=False,sep=";")
 #print(data)

 #print(systemprompt)
Author	SHA1	Message	Date
Anton Wirsing	85662f24bd	november status	2025-11-10 11:37:20 +01:00
Anton Wirsing	c0a083ae92	create frequency table	2025-08-21 17:01:44 +02:00