From 804d11d3cc9138ac92057cbdfa4a0fccc3a79098 Mon Sep 17 00:00:00 2001 From: AntonWirsing Date: Mon, 18 Aug 2025 13:20:56 +0200 Subject: [PATCH] first model --- analysis.ipynb | 64 +++++++++++++++++++++++++++++++++++ client.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++ curl.py | 21 ++++++++++++ data.py | 81 ++++++++++++++++++++++++++++++++++++++++++++ extractResponse.py | 0 5 files changed, 250 insertions(+) create mode 100644 analysis.ipynb create mode 100644 client.py create mode 100644 curl.py create mode 100644 data.py create mode 100644 extractResponse.py diff --git a/analysis.ipynb b/analysis.ipynb new file mode 100644 index 0000000..7a3e9d9 --- /dev/null +++ b/analysis.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2025-08-17T13:06:41.263897Z", + "start_time": "2025-08-17T13:06:40.939099Z" + } + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "data = pd.read_csv(\"./quotationsSampleWithResponse.csv\", on_bad_lines='skip',sep=\";\")\n" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "start_time": "2025-08-13T09:05:06.803852Z", + "end_time": "2025-08-13T09:05:06.879965Z" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/client.py b/client.py new file mode 100644 index 0000000..6c59c88 --- /dev/null +++ b/client.py @@ -0,0 +1,84 @@ +from ollama import Client, ChatResponse +client = Client(host='http://localhost:11434') + + + + +def askGPT(systemprompt,userprompt): + + messages = [{ + 'role': 'system', + 'content': systemprompt + },{ + 'role': 'user', + 'content': userprompt + }] + + # 1) First call: let the model decide to call the tool + resp: ChatResponse = client.chat( + model='gpt-oss', # be explicit about the tag + messages=messages, + ) + + + + # Model chose not to call the tool; just print what it said + print("############### content ###############") + print(resp.message.content) + + print("############### Message ###############") + print(resp['message']) # or print(resp.message) + return(resp['message']) + + + +def howToSayHello() -> str: + """Return the customary greeting.""" + print("function called") + return "Hallo Welt" +def askGPTwithTools(systemprompt,userprompt): + + messages = [{ + 'role': 'system', + 'content': systemprompt + },{ + 'role': 'user', + 'content': userprompt + }] + + # 1) First call: let the model decide to call the tool + resp: ChatResponse = client.chat( + model='gpt-oss', # be explicit about the tag + messages=messages, + tools=[howToSayHello], + ) + + tcalls = resp.message.tool_calls or [] + if tcalls: + # 2) Execute the requested tool(s) + tool_msgs = [] + for tc in tcalls: + if tc.function.name == 'howToSayHello': + result = howToSayHello() + else: + result = f"Unknown tool: {tc.function.name}" + tool_msgs.append({ + "role": "tool", + "content": result, + # include id if present; Ollama doesn't always require it + **({"tool_call_id": getattr(tc, "id", None)} if getattr(tc, "id", None) else {}) + }) + + # 3) Send tool outputs back, then ask the model to finish + messages = messages + [resp.message] + tool_msgs + final = client.chat(model='gpt-oss', messages=messages) + print("############### content ###############") + print(final.message.content) + else: + # Model chose not to call the tool; just print what it said + print("############### content ###############") + print(resp.message.content) + + print("############### Message ###############") + print(resp['message']) # or print(resp.message) + return(resp['message']) diff --git a/curl.py b/curl.py new file mode 100644 index 0000000..466092a --- /dev/null +++ b/curl.py @@ -0,0 +1,21 @@ +import requests +import json + + + + + +url = "http://localhost:11434/api/chat" +payload = { + "model": "gpt-oss", + "messages": [ + {"role": "user", "content": "why is the sky blue?"} + ] +} + +resp = requests.post(url, json=payload, timeout=120) +resp.raise_for_status() +data = resp.json() + +# Ollama returns the assistant reply under data["message"]["content"] +print(data["message"]["content"]) \ No newline at end of file diff --git a/data.py b/data.py new file mode 100644 index 0000000..7da462f --- /dev/null +++ b/data.py @@ -0,0 +1,81 @@ +import pandas as pd +import json +import os +# generate short sample +booking = pd.read_csv("./quotations.csv", on_bad_lines='skip',sep=";") +#images = os.listdir("/Users/antonwirsing/Nextcloud/share_anton/ExtraSauber-quotations-training-data-2025-05-09") +images = os.listdir("/var/huggingface/data") +images_by_id = {} +for img in images: + id = img[0:6] + images_by_id.setdefault(id, []).append(img) + + + +# Filter to just “Fensterreinigung” and drop rows missing requirements_textual +filtered = booking.loc[ + (booking['product_name'] == "Fensterreinigung") + & booking['requirements_textual'].notna() + & booking['price'].notna() + & booking['quotation_info_request'].isna() +] + +df = filtered.sample(n=1000, random_state=43).reset_index(drop=True) +#print(df.columns) + +# 4) Create the `images` column by JSON-dumping each list (or empty list) +df['images'] = ( + df['id'] + .astype(str) + .apply(lambda i: json.dumps(images_by_id.get(i, []))) +) +df['n_Images'] = df['images'].apply(lambda s: len(json.loads(s))) +print(df['n_Images']) +df.to_csv("./windowQuotationsSample.csv", index=False,sep=";") + + + +# Filter to just “Umzugsreinigung” and drop rows missing requirements_textual +filtered = booking.loc[ + (booking['product_name'] == "Umzugsreinigung") + & booking['requirements_textual'].notna() + & booking['price'].notna() + & booking['quotation_info_request'].isna() +] + +df = filtered.sample(n=1000, random_state=43).reset_index(drop=True) +#print(df.columns) + +# 4) Create the `images` column by JSON-dumping each list (or empty list) +df['images'] = ( + df['id'] + .astype(str) + .apply(lambda i: json.dumps(images_by_id.get(i, []))) +) +df['n_Images'] = df['images'].apply(lambda s: len(json.loads(s))) +print(df['n_Images']) +df.to_csv("./umzugQuotationsSample.csv", index=False,sep=";") + + + + +# Filter to just “Intensivreinigung” and drop rows missing requirements_textual +filtered = booking.loc[ + (booking['product_name'] == "Intensivreinigung") + & booking['requirements_textual'].notna() + & booking['price'].notna() + & booking['quotation_info_request'].isna() +] + +df = filtered.sample(n=1000, random_state=43).reset_index(drop=True) +#print(df.columns) + +# 4) Create the `images` column by JSON-dumping each list (or empty list) +df['images'] = ( + df['id'] + .astype(str) + .apply(lambda i: json.dumps(images_by_id.get(i, []))) +) +df['n_Images'] = df['images'].apply(lambda s: len(json.loads(s))) +print(df['n_Images']) +df.to_csv("./intensivQuotationsSample.csv", index=False,sep=";") \ No newline at end of file diff --git a/extractResponse.py b/extractResponse.py new file mode 100644 index 0000000..e69de29