first model

2025-08-18 13:20:56 +02:00 · 2025-08-18 13:20:56 +02:00 · 804d11d3cc
commit 804d11d3cc
parent f393130598
5 changed files with 250 additions and 0 deletions
--- a/analysis.ipynb
+++ b/analysis.ipynb
@ -0,0 +1,64 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2025-08-17T13:06:41.263897Z",
+     "start_time": "2025-08-17T13:06:40.939099Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv(\"./quotationsSampleWithResponse.csv\", on_bad_lines='skip',sep=\";\")\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "start_time": "2025-08-13T09:05:06.803852Z",
+     "end_time": "2025-08-13T09:05:06.879965Z"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/client.py
+++ b/client.py
@ -0,0 +1,84 @@
+from ollama import Client, ChatResponse
+client = Client(host='http://localhost:11434')
+
+
+
+
+def askGPT(systemprompt,userprompt):
+
+    messages = [{
+        'role': 'system',
+        'content': systemprompt
+    },{
+        'role': 'user',
+        'content': userprompt
+    }]
+
+    # 1) First call: let the model decide to call the tool
+    resp: ChatResponse = client.chat(
+        model='gpt-oss',        # be explicit about the tag
+        messages=messages,
+    )
+
+
+
+    # Model chose not to call the tool; just print what it said
+    print("############### content ###############")
+    print(resp.message.content)
+
+    print("############### Message ###############")
+    print(resp['message'])  # or print(resp.message)
+    return(resp['message'])
+
+
+
+def howToSayHello() -> str:
+    """Return the customary greeting."""
+    print("function called")
+    return "Hallo Welt"
+def askGPTwithTools(systemprompt,userprompt):
+
+    messages = [{
+        'role': 'system',
+        'content': systemprompt
+    },{
+        'role': 'user',
+        'content': userprompt
+    }]
+
+    # 1) First call: let the model decide to call the tool
+    resp: ChatResponse = client.chat(
+        model='gpt-oss',        # be explicit about the tag
+        messages=messages,
+        tools=[howToSayHello],
+    )
+
+    tcalls = resp.message.tool_calls or []
+    if tcalls:
+        # 2) Execute the requested tool(s)
+        tool_msgs = []
+        for tc in tcalls:
+            if tc.function.name == 'howToSayHello':
+                result = howToSayHello()
+            else:
+                result = f"Unknown tool: {tc.function.name}"
+            tool_msgs.append({
+                "role": "tool",
+                "content": result,
+                # include id if present; Ollama doesn't always require it
+                **({"tool_call_id": getattr(tc, "id", None)} if getattr(tc, "id", None) else {})
+            })
+
+        # 3) Send tool outputs back, then ask the model to finish
+        messages = messages + [resp.message] + tool_msgs
+        final = client.chat(model='gpt-oss', messages=messages)
+        print("############### content ###############")
+        print(final.message.content)
+    else:
+        # Model chose not to call the tool; just print what it said
+        print("############### content ###############")
+        print(resp.message.content)
+
+    print("############### Message ###############")
+    print(resp['message'])  # or print(resp.message)
+    return(resp['message'])
--- a/curl.py
+++ b/curl.py
@ -0,0 +1,21 @@
+import requests
+import json
+
+
+
+
+
+url = "http://localhost:11434/api/chat"
+payload = {
+    "model": "gpt-oss",
+    "messages": [
+        {"role": "user", "content": "why is the sky blue?"}
+    ]
+}
+
+resp = requests.post(url, json=payload, timeout=120)
+resp.raise_for_status()
+data = resp.json()
+
+# Ollama returns the assistant reply under data["message"]["content"]
+print(data["message"]["content"])
--- a/data.py
+++ b/data.py
@ -0,0 +1,81 @@
+import pandas as pd
+import json
+import os
+# generate short sample
+booking = pd.read_csv("./quotations.csv", on_bad_lines='skip',sep=";")
+#images = os.listdir("/Users/antonwirsing/Nextcloud/share_anton/ExtraSauber-quotations-training-data-2025-05-09")
+images = os.listdir("/var/huggingface/data")
+images_by_id = {}
+for img in images:
+    id = img[0:6]
+    images_by_id.setdefault(id, []).append(img)
+
+
+
+# Filter to just “Fensterreinigung” and drop rows missing requirements_textual
+filtered = booking.loc[
+    (booking['product_name'] == "Fensterreinigung")
+    & booking['requirements_textual'].notna()
+    & booking['price'].notna()
+    & booking['quotation_info_request'].isna()
+]
+
+df = filtered.sample(n=1000, random_state=43).reset_index(drop=True)
+#print(df.columns)
+
+# 4) Create the `images` column by JSON-dumping each list (or empty list)
+df['images'] = (
+    df['id']
+    .astype(str)
+    .apply(lambda i: json.dumps(images_by_id.get(i, [])))
+)
+df['n_Images'] = df['images'].apply(lambda s: len(json.loads(s)))
+print(df['n_Images'])
+df.to_csv("./windowQuotationsSample.csv", index=False,sep=";")
+
+
+
+# Filter to just “Umzugsreinigung” and drop rows missing requirements_textual
+filtered = booking.loc[
+    (booking['product_name'] == "Umzugsreinigung")
+    & booking['requirements_textual'].notna()
+    & booking['price'].notna()
+    & booking['quotation_info_request'].isna()
+]
+
+df = filtered.sample(n=1000, random_state=43).reset_index(drop=True)
+#print(df.columns)
+
+# 4) Create the `images` column by JSON-dumping each list (or empty list)
+df['images'] = (
+    df['id']
+    .astype(str)
+    .apply(lambda i: json.dumps(images_by_id.get(i, [])))
+)
+df['n_Images'] = df['images'].apply(lambda s: len(json.loads(s)))
+print(df['n_Images'])
+df.to_csv("./umzugQuotationsSample.csv", index=False,sep=";")
+
+
+
+
+# Filter to just “Intensivreinigung” and drop rows missing requirements_textual
+filtered = booking.loc[
+    (booking['product_name'] == "Intensivreinigung")
+    & booking['requirements_textual'].notna()
+    & booking['price'].notna()
+    & booking['quotation_info_request'].isna()
+]
+
+df = filtered.sample(n=1000, random_state=43).reset_index(drop=True)
+#print(df.columns)
+
+# 4) Create the `images` column by JSON-dumping each list (or empty list)
+df['images'] = (
+    df['id']
+    .astype(str)
+    .apply(lambda i: json.dumps(images_by_id.get(i, [])))
+)
+df['n_Images'] = df['images'].apply(lambda s: len(json.loads(s)))
+print(df['n_Images'])
+df.to_csv("./intensivQuotationsSample.csv", index=False,sep=";")
--- a/extractResponse.py
+++ b/extractResponse.py