Compare commits

...

2 Commits

Author SHA1 Message Date
Anton Wirsing
85662f24bd november status 2025-11-10 11:37:20 +01:00
Anton Wirsing
c0a083ae92 create frequency table 2025-08-21 17:01:44 +02:00
5 changed files with 1743 additions and 54 deletions

View File

@ -2,11 +2,11 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-13T09:04:47.365323Z",
"start_time": "2025-08-13T09:04:46.211866Z"
"end_time": "2025-09-29T18:17:56.534291268Z",
"start_time": "2025-09-29T18:17:56.490560986Z"
}
},
"outputs": [],
@ -19,6 +19,13 @@
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
@ -27,10 +34,7 @@
"end_time": "2025-08-13T09:05:06.879965Z",
"start_time": "2025-08-13T09:05:06.803852Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [],
"source": [
@ -39,44 +43,24 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-19T09:25:33.132235159Z",
"start_time": "2025-08-19T09:25:33.104903712Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" totalPrice totalPriceLow totalPriceHigh \\\n",
"0 145 145 145 \n",
"\n",
" ZusatzInfo complete confidence \\\n",
"0 Bitte bestätigen Sie, dass die Oberlichten als... False 0.8 \n",
"\n",
" missverständliche Aspekte \\\n",
"0 Der Ausdruck \"Oberlichten\" ist nicht eindeutig... \n",
"\n",
" Zu erbringende Leistungen: \\\n",
"0 4 AltbauDoppelfenster (je 25 €), 2 Neubaufens... \n",
"\n",
" Rechenweg \\\n",
"0 4×25€ + 2×15€ + 1×15€ = 100€ + 30€ + 15€... \n",
"\n",
" Kommentare \n",
"0 Die Kosten basieren ausschließlich auf den in ... \n",
"Index(['id', 'product_id', 'product_name', 'price', 'currency', 'duration',\n",
" 'requirements_textual', 'street', 'zipcode', 'city', 'country',\n",
" 'coordinate', 'comment_price', 'comment_key', 'comment_important',\n",
" 'comment_restrict', 'comment_other', 'inquired',\n",
" 'quotation_info_request', 'quotation_state', 'quotation_ts',\n",
" 'quotation_comment', 'completed', 'customer_id', 'name', 'response',\n",
" 'n_Images', 'resp_totalPrice', 'resp_totalPriceLow',\n",
" 'resp_totalPriceHigh', 'resp_ZusatzInfo', 'resp_complete',\n",
" 'resp_confidence', 'resp_missverständliche Aspekte',\n",
" 'resp_Zu erbringende Leistungen', 'resp_Rechenweg', 'resp_Kommentare',\n",
" 'resp_Zu erbringen Leistungen', 'diff_price'],\n",
" dtype='object')\n"
"ename": "ImportError",
"evalue": "cannot import name 'data_expanded' from 'extractResponse' (/var/huggingface/extractResponse.py)",
"output_type": "error",
"traceback": [
"\u001B[31m---------------------------------------------------------------------------\u001B[39m",
"\u001B[31mImportError\u001B[39m Traceback (most recent call last)",
"\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[3]\u001B[39m\u001B[32m, line 2\u001B[39m\n\u001B[32m 1\u001B[39m \u001B[38;5;66;03m#Get Data\u001B[39;00m\n\u001B[32m----> \u001B[39m\u001B[32m2\u001B[39m \u001B[38;5;28;01mfrom\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mextractResponse\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;28;01mimport\u001B[39;00m data_expanded\n\u001B[32m 3\u001B[39m \u001B[38;5;28mprint\u001B[39m(data_expanded.columns)\n\u001B[32m 4\u001B[39m columns=[\u001B[33m\"\u001B[39m\u001B[33mprice\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_totalPrice\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mdiff_price\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mduration\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mrequirements_textual\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33minquired\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_ZusatzInfo\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_complete\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_confidence\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mcomment_price\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mcomment_important\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mcomment_important\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_missverständliche Aspekte\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_Zu erbringende Leistungen\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_Rechenweg\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresp_Kommentare\u001B[39m\u001B[33m\"\u001B[39m,\u001B[33m\"\u001B[39m\u001B[33mresponse\u001B[39m\u001B[33m\"\u001B[39m ]\n",
"\u001B[31mImportError\u001B[39m: cannot import name 'data_expanded' from 'extractResponse' (/var/huggingface/extractResponse.py)"
]
}
],
@ -159,11 +143,27 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-18T12:02:05.433859201Z",
"start_time": "2025-08-18T12:02:05.301566171Z"
},
"scrolled": true
},
"outputs": [],
"outputs": [
{
"ename": "NameError",
"evalue": "name 'data' is not defined",
"output_type": "error",
"traceback": [
"\u001B[31m---------------------------------------------------------------------------\u001B[39m",
"\u001B[31mNameError\u001B[39m Traceback (most recent call last)",
"\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[1]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m outliers = \u001B[43mdata\u001B[49m.loc[( \u001B[38;5;28mabs\u001B[39m(data[\u001B[33m\"\u001B[39m\u001B[33mdiff_price\u001B[39m\u001B[33m\"\u001B[39m]) <\u001B[32m10\u001B[39m)]\n\u001B[32m 3\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m (\u001B[38;5;28;01mFalse\u001B[39;00m):\n\u001B[32m 4\u001B[39m \u001B[38;5;28;01mfor\u001B[39;00m o \u001B[38;5;129;01min\u001B[39;00m outliers.index:\n\u001B[32m 5\u001B[39m \u001B[38;5;66;03m#print(data_expanded.iloc[0])\u001B[39;00m\n\u001B[32m 6\u001B[39m \u001B[38;5;66;03m#print(data_expanded.iloc[columns,o])\u001B[39;00m\n",
"\u001B[31mNameError\u001B[39m: name 'data' is not defined"
]
}
],
"source": [
"\n",
"\n",
@ -349,7 +349,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
"version": "3.12.3"
}
},
"nbformat": 4,

View File

@ -4,7 +4,7 @@ client = Client(host='http://localhost:11434')
def askGPT(systemprompt,userprompt):
def askGPT(systemprompt,userprompt,format="json"):
messages = [{
'role': 'system',
@ -17,6 +17,7 @@ def askGPT(systemprompt,userprompt):
# 1) First call: let the model decide to call the tool
resp: ChatResponse = client.chat(
model='gpt-oss', # be explicit about the tag
format="json",
messages=messages,
)

704
dataMining.ipynb Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -124,7 +124,7 @@ for i in index:
print(quotation["duration"])
data.to_csv("./intensivQuotationsSampleWithResponse.csv", index=False,sep=";")
#data.to_csv("./intensivQuotationsSampleWithResponse.csv", index=False,sep=";")
data = pd.read_csv("./umzugQuotationsSample.csv", on_bad_lines='skip',sep=";")
@ -139,7 +139,7 @@ for i in index:
print(quotation["duration"])
data.to_csv("./umzugQuotationsSampleWithResponse.csv", index=False,sep=";")
#data.to_csv("./umzugQuotationsSampleWithResponse.csv", index=False,sep=";")
#print(data)
#print(systemprompt)