diff --git a/egeland/test-task.ipynb b/egeland/test-task.ipynb new file mode 100644 index 0000000..597d771 --- /dev/null +++ b/egeland/test-task.ipynb @@ -0,0 +1,852 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "724c2204", + "metadata": {}, + "source": [ + "# Библиотеки" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c2845a30", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from tqdm import tqdm\n", + "\n", + "plt.rcParams.update({'font.size': 14})" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "7c4025d7", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import timedelta" + ] + }, + { + "cell_type": "markdown", + "id": "d4c4405a", + "metadata": {}, + "source": [ + "## Отключение предупреждений\n", + "\n", + "Возникали на этапе подключение к СУБД" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "f7cfd67c", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "c573a116", + "metadata": {}, + "outputs": [], + "source": [ + "import psycopg2" + ] + }, + { + "cell_type": "markdown", + "id": "69997e6b", + "metadata": {}, + "source": [ + "# Константы" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "3e3370fe", + "metadata": {}, + "outputs": [], + "source": [ + "USER = \"test_user\"\n", + "PASSWORD = \"j2M{CnnFq@\" # Лучше, через переменные окружения, но это в целям экономии времени\n", + "HOSTNAME = \"rc1a-p8bp15mmxsfwpbt0.mdb.yandexcloud.net\"\n", + "# HOSTNAME = \"130.193.48.126\" # Были проблемы с разыменовыванием доменного имени\n", + "DATABASE = \"db1\"\n", + "PORT = '6432'" + ] + }, + { + "cell_type": "markdown", + "id": "ec3cb72e", + "metadata": {}, + "source": [ + "# Тестовое задание" + ] + }, + { + "cell_type": "markdown", + "id": "d1635ad4", + "metadata": {}, + "source": [ + "## Подключение к базе данных" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "e51e0bc4", + "metadata": {}, + "outputs": [], + "source": [ + "conn = psycopg2.connect(\n", + " database=DATABASE,\n", + " user=USER,\n", + " host=HOSTNAME,\n", + " password=PASSWORD,\n", + " port = PORT\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3790de39", + "metadata": {}, + "source": [ + "## Получение таблиц из запроса" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "05d856c8", + "metadata": {}, + "outputs": [], + "source": [ + "# Запросы\n", + "cm = pd.read_sql_query(\"select * from test.chat_messages\", conn)\n", + "mop = pd.read_sql_query(\"select * from test.managers\", conn)\n", + "rop = pd.read_sql_query(\"select * from test.rops\", conn)\n", + "\n", + "# Сортировка таблицы по времени сообщения\n", + "cm.sort_values(by=['created_at'], ascending=True, inplace=True)\n", + "\n", + "# Приведение к типу строки, что нужно для операции JOIN\n", + "rop['rop_id'] = rop['rop_id'].astype(str)\n", + "\n", + "# Установка первичных ключей в качестве индексов\n", + "mop.set_index('mop_id', inplace=True)\n", + "cm.set_index('message_id', inplace=True)\n", + "rop.set_index('rop_id', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "4de1c50d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
typeentity_idcreated_bycreated_at
message_id
\"01jb2qn9kwq95f3cm3w9pte8m7\"outgoing_chat_message37548741102625131729890002
\"01jb2qnbwyqywemwww3kxh8wxp\"outgoing_chat_message36713959102624851729890004
\"01jb2qney0mkr94bx272g4qhs3\"incoming_chat_message3754876101729890008
\"01jb2qnhnkb0n83mrg790xjn82\"outgoing_chat_message3754808767447921729890010
\"01jb2qnmn7asjn9m5478zy7c82\"outgoing_chat_message37362585104656901729890013
\n", + "
" + ], + "text/plain": [ + " type entity_id created_by \\\n", + "message_id \n", + "\"01jb2qn9kwq95f3cm3w9pte8m7\" outgoing_chat_message 37548741 10262513 \n", + "\"01jb2qnbwyqywemwww3kxh8wxp\" outgoing_chat_message 36713959 10262485 \n", + "\"01jb2qney0mkr94bx272g4qhs3\" incoming_chat_message 37548761 0 \n", + "\"01jb2qnhnkb0n83mrg790xjn82\" outgoing_chat_message 37548087 6744792 \n", + "\"01jb2qnmn7asjn9m5478zy7c82\" outgoing_chat_message 37362585 10465690 \n", + "\n", + " created_at \n", + "message_id \n", + "\"01jb2qn9kwq95f3cm3w9pte8m7\" 1729890002 \n", + "\"01jb2qnbwyqywemwww3kxh8wxp\" 1729890004 \n", + "\"01jb2qney0mkr94bx272g4qhs3\" 1729890008 \n", + "\"01jb2qnhnkb0n83mrg790xjn82\" 1729890010 \n", + "\"01jb2qnmn7asjn9m5478zy7c82\" 1729890013 " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cm.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "4fae9113", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
name_moprop_id
mop_id
6645315Гюнель и Илина1
6744792Юля и Наташа1
10262505Вика и Марго1
10262513Ира и Варя1
10465254Настя и Даша1
\n", + "
" + ], + "text/plain": [ + " name_mop rop_id\n", + "mop_id \n", + "6645315 Гюнель и Илина 1\n", + "6744792 Юля и Наташа 1\n", + "10262505 Вика и Марго 1\n", + "10262513 Ира и Варя 1\n", + "10465254 Настя и Даша 1" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mop.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "ddd63eab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rop_name
rop_id
1Катя РОП
2Полина РОП
3Эля РОП
\n", + "
" + ], + "text/plain": [ + " rop_name\n", + "rop_id \n", + "1 Катя РОП\n", + "2 Полина РОП\n", + "3 Эля РОП" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rop.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "6da1229a", + "metadata": {}, + "outputs": [], + "source": [ + "def time_diff(x):\n", + " \"\"\"\n", + " Функция, которая на каждую строку в таблице chat_members, считает время ответа по\n", + " следующим правилам:\n", + " 1) менеджер ответил ночью -- тогда время ответа равно 24:00 - `время сообщения клиента`\n", + " 2) клиент написал ночью -- тогда время ответа отсчитывается от 09:30\n", + " 3) клиент написал днем, а менеджер ответил на след. день -- тогда время ответа равно\n", + " разности времен сообщений за вычетом ночного времени\n", + " 4) обычный вариант, тогда просто разность\n", + " \"\"\"\n", + " shift_begin = timedelta(hours=9, minutes=30)\n", + " day = timedelta(days=1)\n", + " ct = x['dtime']\n", + " mt = x['lag_dtime']\n", + " manager_time = timedelta(seconds=mt.second, minutes=mt.minute, hours=mt.hour)\n", + " client_time = timedelta(seconds=ct.second, minutes=ct.minute, hours=ct.hour)\n", + " if manager_time < shift_begin:\n", + " return day - client_time\n", + " elif client_time < shift_begin:\n", + " return manager_time - shift_begin\n", + " elif client_time > manager_time:\n", + " return client_time - manager_time - shift_begin\n", + " else:\n", + " return manager_time - client_time" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "e7f4d343", + "metadata": {}, + "outputs": [], + "source": [ + "# Добавление столбца с временем суток\n", + "cm['dtime'] = pd.to_datetime(cm['created_at'], unit='s').dt.time" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "80dc378b", + "metadata": {}, + "outputs": [], + "source": [ + "# Аналог \n", + "# ROW_NUMBER() OVER(PARTITION BY entity_id ORDER BY created_at) \n", + "# из SQL\n", + "# Нужно, чтобы установить порядок сообщений внутри одной сделки и \n", + "# отфильтровать те, в которых первое сообщение написал менеджер\n", + "cm['num'] = cm.sort_values('created_at').groupby(by=['entity_id']).cumcount()+1\n", + "condition1 = np.logical_not(np.logical_and(cm['created_by'] != 0, cm['num'] == 0))" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "79d51892", + "metadata": {}, + "outputs": [], + "source": [ + "# Аналог\n", + "# created_by <> lag(created_by) OVER(PARTITION BY entity_id ORDER BY created_at\n", + "# из SQL\n", + "# Сдвигает значение на одну позицию, чтобы отфильтровать те сообщение, идущие подряд,\n", + "# которые написаны одним человеком\n", + "cm['lag'] = cm.sort_values('created_at').groupby(by=['entity_id'])['created_by'].shift(-1)\n", + "condition2 = cm['created_by'] != cm['lag']" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "856be093", + "metadata": {}, + "outputs": [], + "source": [ + "# Фильтр по пересечению условий\n", + "result = cm[condition1 & condition2]" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "ab1e36bc", + "metadata": {}, + "outputs": [], + "source": [ + "# Сдвиг времени сообщения на одну позицию, чтобы получить разницу времен сообщений\n", + "result['lag_dtime'] = result.sort_values('created_at').groupby('entity_id')['dtime'].shift(-1)\n", + "result['lag_diff'] = result.dropna().apply(time_diff, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "eedbdf11", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
name_moprop_namelag_diff
message_id
\"01jb2qn9kwq95f3cm3w9pte8m7\"Ира и ВаряКатя РОП0 days 00:02:49
\"01jb2qnhnkb0n83mrg790xjn82\"Юля и НаташаКатя РОП0 days 00:00:15
\"01jb2qnmn7asjn9m5478zy7c82\"Настя и МаликаЭля РОП0 days 00:00:09
\"01jb2qnvamsq2866te84re669b\"Соня и КатяПолина РОП0 days 00:00:27
\"01jb2qpc1qfrqgmgx998t9vgg2\"Мария и СоняПолина РОП0 days 00:00:23
\n", + "
" + ], + "text/plain": [ + " name_mop rop_name lag_diff\n", + "message_id \n", + "\"01jb2qn9kwq95f3cm3w9pte8m7\" Ира и Варя Катя РОП 0 days 00:02:49\n", + "\"01jb2qnhnkb0n83mrg790xjn82\" Юля и Наташа Катя РОП 0 days 00:00:15\n", + "\"01jb2qnmn7asjn9m5478zy7c82\" Настя и Малика Эля РОП 0 days 00:00:09\n", + "\"01jb2qnvamsq2866te84re669b\" Соня и Катя Полина РОП 0 days 00:00:27\n", + "\"01jb2qpc1qfrqgmgx998t9vgg2\" Мария и Соня Полина РОП 0 days 00:00:23" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Объединение таблиц для получения конечных времен реакций на сообщения\n", + "result2 = result.join(mop, on='created_by').join(rop, on='rop_id').dropna()[['name_mop', 'rop_name', 'lag_diff']]\n", + "result2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "c7038cc6", + "metadata": {}, + "outputs": [], + "source": [ + "# Группировка и применение агрегирующей функции\n", + "final_df = result2.groupby(by=['rop_name', 'name_mop']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "0e3440d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Имя руководителяИмя менеджераСреднее время реакции
0Катя РОПАня и Ксюша0 days 03:27:30.020872865
1Катя РОПВика и Катя0 days 03:24:13.302118171
2Катя РОПГюнель и Илина0 days 03:55:31.088691796
3Катя РОПИра и Варя0 days 02:44:58.748677248
4Катя РОПКсюша и Джамиля0 days 01:58:12.669724770
5Катя РОПНастя и Даша0 days 03:28:37.838951310
6Катя РОПЮля и Наташа0 days 03:57:47.909896602
7Полина РОПДаша и Влада0 days 03:48:25.811715481
8Полина РОПДаша и Даша0 days 03:27:55.650793650
9Полина РОПЛиза и Ева0 days 03:19:41.608280254
10Полина РОПМария и Соня0 days 03:16:59.401098901
11Полина РОПНастя и Саша0 days 02:36:18.021337126
12Полина РОПСоня и Катя0 days 01:35:07.795180722
13Эля РОПАлина и Юля0 days 02:28:19.712707182
14Эля РОПАнгелина Милованова0 days 06:06:26.477272727
15Эля РОПВлада и Настя0 days 00:28:08.972222222
16Эля РОПДаша и Карина0 days 02:38:46.333333333
17Эля РОПКами и Мила0 days 02:23:03.636904761
18Эля РОПНастя и Малика0 days 02:57:01.780487804
19Эля РОППолина Мирзоян0 days 01:55:44.690476190
20Эля РОППорхачева Полина0 days 05:06:48.200000
21Эля РОПСофья Боднар0 days 03:26:38.134831460
\n", + "
" + ], + "text/plain": [ + " Имя руководителя Имя менеджера Среднее время реакции\n", + "0 Катя РОП Аня и Ксюша 0 days 03:27:30.020872865\n", + "1 Катя РОП Вика и Катя 0 days 03:24:13.302118171\n", + "2 Катя РОП Гюнель и Илина 0 days 03:55:31.088691796\n", + "3 Катя РОП Ира и Варя 0 days 02:44:58.748677248\n", + "4 Катя РОП Ксюша и Джамиля 0 days 01:58:12.669724770\n", + "5 Катя РОП Настя и Даша 0 days 03:28:37.838951310\n", + "6 Катя РОП Юля и Наташа 0 days 03:57:47.909896602\n", + "7 Полина РОП Даша и Влада 0 days 03:48:25.811715481\n", + "8 Полина РОП Даша и Даша 0 days 03:27:55.650793650\n", + "9 Полина РОП Лиза и Ева 0 days 03:19:41.608280254\n", + "10 Полина РОП Мария и Соня 0 days 03:16:59.401098901\n", + "11 Полина РОП Настя и Саша 0 days 02:36:18.021337126\n", + "12 Полина РОП Соня и Катя 0 days 01:35:07.795180722\n", + "13 Эля РОП Алина и Юля 0 days 02:28:19.712707182\n", + "14 Эля РОП Ангелина Милованова 0 days 06:06:26.477272727\n", + "15 Эля РОП Влада и Настя 0 days 00:28:08.972222222\n", + "16 Эля РОП Даша и Карина 0 days 02:38:46.333333333\n", + "17 Эля РОП Ками и Мила 0 days 02:23:03.636904761\n", + "18 Эля РОП Настя и Малика 0 days 02:57:01.780487804\n", + "19 Эля РОП Полина Мирзоян 0 days 01:55:44.690476190\n", + "20 Эля РОП Порхачева Полина 0 days 05:06:48.200000\n", + "21 Эля РОП Софья Боднар 0 days 03:26:38.134831460" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "di = {\n", + " 'name_mop': \"Имя менеджера\",\n", + " 'rop_name': \"Имя руководителя\",\n", + " 'lag_diff': \"Среднее время реакции\",\n", + "}\n", + "\n", + "# Более красивое отображение\n", + "final_df.reset_index().rename(di, axis=1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}