Files
test-tasks/Aigrind/Analisys.ipynb
Basyrov Rustam 9811487087 some analisys
2024-11-22 15:03:21 +03:00

794 lines
23 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 79,
"id": "c6bb3ab6",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from tqdm import tqdm"
]
},
{
"cell_type": "markdown",
"id": "d7cfc02e",
"metadata": {},
"source": [
"# Данные"
]
},
{
"cell_type": "markdown",
"id": "67c32cdd",
"metadata": {},
"source": [
"## Payments"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "038a62d8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>payment_date</th>\n",
" <th>Payment_types</th>\n",
" <th>real_cost</th>\n",
" <th>account_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2021-01-01 00:00:56</td>\n",
" <td>Payment_system_5</td>\n",
" <td>9.650000</td>\n",
" <td>2907221</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2021-01-01 00:01:48</td>\n",
" <td>Payment_system_5</td>\n",
" <td>0.955872</td>\n",
" <td>3228373</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2021-01-01 00:03:42</td>\n",
" <td>Payment_system_5</td>\n",
" <td>3.026930</td>\n",
" <td>318552</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2021-01-01 00:04:27</td>\n",
" <td>Payment_system_2</td>\n",
" <td>99.990000</td>\n",
" <td>3832817</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2021-01-01 00:06:41</td>\n",
" <td>Payment_system_5</td>\n",
" <td>1.432550</td>\n",
" <td>7229767</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" payment_date Payment_types real_cost account_id\n",
"0 2021-01-01 00:00:56 Payment_system_5 9.650000 2907221\n",
"1 2021-01-01 00:01:48 Payment_system_5 0.955872 3228373\n",
"2 2021-01-01 00:03:42 Payment_system_5 3.026930 318552\n",
"3 2021-01-01 00:04:27 Payment_system_2 99.990000 3832817\n",
"4 2021-01-01 00:06:41 Payment_system_5 1.432550 7229767"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"payments = pd.read_csv(\"data/payments.csv\")\n",
"payments.head()"
]
},
{
"cell_type": "markdown",
"id": "68048c5a",
"metadata": {},
"source": [
"## Persents"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "a4b72e40",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Payment_types</th>\n",
" <th>Share, %</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Payment_system_1</td>\n",
" <td>20.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Payment_system_2</td>\n",
" <td>15.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Payment_system_3</td>\n",
" <td>58.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Payment_system_4</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Payment_system_5</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Payment_types Share, %\n",
"0 Payment_system_1 20.7\n",
"1 Payment_system_2 15.0\n",
"2 Payment_system_3 58.5\n",
"3 Payment_system_4 0.0\n",
"4 Payment_system_5 24.0"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"persents = pd.read_csv(\"data/persents.csv\", index_col=0)\n",
"persents.head()"
]
},
{
"cell_type": "markdown",
"id": "9a2ba6d3",
"metadata": {},
"source": [
"## Registrations"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "a0f238ba",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>account_id</th>\n",
" <th>created_date</th>\n",
" <th>campaign</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3842380.0</td>\n",
" <td>2021-01-01</td>\n",
" <td>BRA_MS1_install</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3842381.0</td>\n",
" <td>2021-01-01</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3842382.0</td>\n",
" <td>2021-01-01</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3842383.0</td>\n",
" <td>2021-01-01</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3842384.0</td>\n",
" <td>2021-01-01</td>\n",
" <td>BRA_MS1_install</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" account_id created_date campaign\n",
"0 3842380.0 2021-01-01 BRA_MS1_install\n",
"1 3842381.0 2021-01-01 NaN\n",
"2 3842382.0 2021-01-01 NaN\n",
"3 3842383.0 2021-01-01 NaN\n",
"4 3842384.0 2021-01-01 BRA_MS1_install"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"registrations = pd.read_csv(\"data/registrations.csv\", index_col=0)#.dropna()\n",
"registrations.head()"
]
},
{
"cell_type": "markdown",
"id": "f52e9466",
"metadata": {},
"source": [
"## Budget"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "bf031a83",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>media_source</th>\n",
" <th>Campaign_type</th>\n",
" <th>Target</th>\n",
" <th>Plan, USD</th>\n",
" <th>Installs</th>\n",
" <th>Spend, USD</th>\n",
" <th>Deviation, USD</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Media_source_1</td>\n",
" <td>install</td>\n",
" <td>Brazil</td>\n",
" <td>100</td>\n",
" <td>10460</td>\n",
" <td>99.12</td>\n",
" <td>0.88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Media_source_1</td>\n",
" <td>purchase</td>\n",
" <td>Brazil</td>\n",
" <td>400</td>\n",
" <td>1081</td>\n",
" <td>398.67</td>\n",
" <td>1.33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Media_source_1</td>\n",
" <td>install</td>\n",
" <td>Russia</td>\n",
" <td>200</td>\n",
" <td>11894</td>\n",
" <td>246.30</td>\n",
" <td>-46.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Media_source_1</td>\n",
" <td>purchase</td>\n",
" <td>Russia</td>\n",
" <td>500</td>\n",
" <td>1441</td>\n",
" <td>616.77</td>\n",
" <td>-116.77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Media_source_1</td>\n",
" <td>install</td>\n",
" <td>Ukrane</td>\n",
" <td>100</td>\n",
" <td>6424</td>\n",
" <td>99.43</td>\n",
" <td>0.57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Media_source_1</td>\n",
" <td>purchase</td>\n",
" <td>Ukrane</td>\n",
" <td>500</td>\n",
" <td>242</td>\n",
" <td>348.13</td>\n",
" <td>151.87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Media_source_2</td>\n",
" <td>install</td>\n",
" <td>Brazil</td>\n",
" <td>100</td>\n",
" <td>152</td>\n",
" <td>81.15</td>\n",
" <td>18.85</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Media_source_2</td>\n",
" <td>install</td>\n",
" <td>English_speaking</td>\n",
" <td>100</td>\n",
" <td>371</td>\n",
" <td>99.95</td>\n",
" <td>0.05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Total</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2000</td>\n",
" <td>32065</td>\n",
" <td>1989.52</td>\n",
" <td>10.48</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" media_source Campaign_type Target Plan, USD Installs \\\n",
"0 Media_source_1 install Brazil 100 10460 \n",
"1 Media_source_1 purchase Brazil 400 1081 \n",
"2 Media_source_1 install Russia 200 11894 \n",
"3 Media_source_1 purchase Russia 500 1441 \n",
"4 Media_source_1 install Ukrane 100 6424 \n",
"5 Media_source_1 purchase Ukrane 500 242 \n",
"6 Media_source_2 install Brazil 100 152 \n",
"7 Media_source_2 install English_speaking 100 371 \n",
"8 Total NaN NaN 2000 32065 \n",
"\n",
" Spend, USD Deviation, USD \n",
"0 99.12 0.88 \n",
"1 398.67 1.33 \n",
"2 246.30 -46.30 \n",
"3 616.77 -116.77 \n",
"4 99.43 0.57 \n",
"5 348.13 151.87 \n",
"6 81.15 18.85 \n",
"7 99.95 0.05 \n",
"8 1989.52 10.48 "
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"budget = pd.read_excel(\"data/Рекламный бюджет январь 2021.xlsx\")\n",
"budget.loc[0:5, 'media_source'] = 'Media_source_1'\n",
"budget.loc[6:7, 'media_source'] = 'Media_source_2'\n",
"budget"
]
},
{
"cell_type": "markdown",
"id": "561d9fce",
"metadata": {},
"source": [
"# Analisys"
]
},
{
"cell_type": "markdown",
"id": "1e9e2cb6",
"metadata": {},
"source": [
"## Оптимальный срок окупаемости"
]
},
{
"cell_type": "markdown",
"id": "1701b5e5",
"metadata": {},
"source": [
"$$\\Large\n",
"T = \\frac{IC}{FV}\n",
"$$\n",
"\n",
"где:\n",
"\n",
"- $\\large T$ — срок окупаемости\n",
"- $\\large IC$ — инвестиционные расходы\n",
"- $\\large FV$ — прибыль"
]
},
{
"cell_type": "markdown",
"id": "7df2460e",
"metadata": {},
"source": [
"### $\\large IC$ — инвестиционные расходы"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "33a0a78b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['BRA_MS1_install']"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"campaigns = list(registrations['campaign'].unique()[:1])\n",
"campaigns"
]
},
{
"cell_type": "markdown",
"id": "bdaedc89",
"metadata": {},
"source": [
"### $\\large FV$ — прибыль"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "8353352e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>payment_date</th>\n",
" <th>Payment_types</th>\n",
" <th>real_cost</th>\n",
" <th>account_id</th>\n",
" <th>Share, %</th>\n",
" <th>cost</th>\n",
" <th>created_date</th>\n",
" <th>campaign</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>330</th>\n",
" <td>2021-01-01 15:10:26</td>\n",
" <td>Payment_system_5</td>\n",
" <td>2.445710</td>\n",
" <td>3861811</td>\n",
" <td>24.0</td>\n",
" <td>1.858740</td>\n",
" <td>2021-01-07</td>\n",
" <td>RUS_MS1_purchase</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339</th>\n",
" <td>2021-01-01 15:22:00</td>\n",
" <td>Payment_system_5</td>\n",
" <td>2.445710</td>\n",
" <td>3861811</td>\n",
" <td>24.0</td>\n",
" <td>1.858740</td>\n",
" <td>2021-01-07</td>\n",
" <td>RUS_MS1_purchase</td>\n",
" </tr>\n",
" <tr>\n",
" <th>827</th>\n",
" <td>2021-01-01 17:16:09</td>\n",
" <td>Payment_system_5</td>\n",
" <td>9.624570</td>\n",
" <td>3844647</td>\n",
" <td>24.0</td>\n",
" <td>7.314673</td>\n",
" <td>2021-01-01</td>\n",
" <td>BRA_MS1_install</td>\n",
" </tr>\n",
" <tr>\n",
" <th>837</th>\n",
" <td>2021-01-01 17:17:45</td>\n",
" <td>Payment_system_5</td>\n",
" <td>9.624570</td>\n",
" <td>3844647</td>\n",
" <td>24.0</td>\n",
" <td>7.314673</td>\n",
" <td>2021-01-01</td>\n",
" <td>BRA_MS1_install</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1922</th>\n",
" <td>2021-01-02 01:08:32</td>\n",
" <td>Payment_system_5</td>\n",
" <td>0.955872</td>\n",
" <td>3845613</td>\n",
" <td>24.0</td>\n",
" <td>0.726463</td>\n",
" <td>2021-01-02</td>\n",
" <td>RUS_MS1_purchase</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>301124</th>\n",
" <td>2021-12-29 17:14:22</td>\n",
" <td>Payment_system_4</td>\n",
" <td>2.620000</td>\n",
" <td>3924840</td>\n",
" <td>0.0</td>\n",
" <td>2.620000</td>\n",
" <td>2021-01-29</td>\n",
" <td>RUS_MS1_install</td>\n",
" </tr>\n",
" <tr>\n",
" <th>301169</th>\n",
" <td>2021-12-29 18:55:43</td>\n",
" <td>Payment_system_4</td>\n",
" <td>0.100000</td>\n",
" <td>3924840</td>\n",
" <td>0.0</td>\n",
" <td>0.100000</td>\n",
" <td>2021-01-29</td>\n",
" <td>RUS_MS1_install</td>\n",
" </tr>\n",
" <tr>\n",
" <th>301243</th>\n",
" <td>2021-12-29 21:22:10</td>\n",
" <td>Payment_system_4</td>\n",
" <td>0.310000</td>\n",
" <td>3924840</td>\n",
" <td>0.0</td>\n",
" <td>0.310000</td>\n",
" <td>2021-01-29</td>\n",
" <td>RUS_MS1_install</td>\n",
" </tr>\n",
" <tr>\n",
" <th>301471</th>\n",
" <td>2021-12-30 08:15:58</td>\n",
" <td>Payment_system_4</td>\n",
" <td>0.380000</td>\n",
" <td>3924840</td>\n",
" <td>0.0</td>\n",
" <td>0.380000</td>\n",
" <td>2021-01-29</td>\n",
" <td>RUS_MS1_install</td>\n",
" </tr>\n",
" <tr>\n",
" <th>303332</th>\n",
" <td>2021-12-31 20:58:48</td>\n",
" <td>Payment_system_5</td>\n",
" <td>2.309120</td>\n",
" <td>3861811</td>\n",
" <td>24.0</td>\n",
" <td>1.754931</td>\n",
" <td>2021-01-07</td>\n",
" <td>RUS_MS1_purchase</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>711 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" payment_date Payment_types real_cost account_id \\\n",
"330 2021-01-01 15:10:26 Payment_system_5 2.445710 3861811 \n",
"339 2021-01-01 15:22:00 Payment_system_5 2.445710 3861811 \n",
"827 2021-01-01 17:16:09 Payment_system_5 9.624570 3844647 \n",
"837 2021-01-01 17:17:45 Payment_system_5 9.624570 3844647 \n",
"1922 2021-01-02 01:08:32 Payment_system_5 0.955872 3845613 \n",
"... ... ... ... ... \n",
"301124 2021-12-29 17:14:22 Payment_system_4 2.620000 3924840 \n",
"301169 2021-12-29 18:55:43 Payment_system_4 0.100000 3924840 \n",
"301243 2021-12-29 21:22:10 Payment_system_4 0.310000 3924840 \n",
"301471 2021-12-30 08:15:58 Payment_system_4 0.380000 3924840 \n",
"303332 2021-12-31 20:58:48 Payment_system_5 2.309120 3861811 \n",
"\n",
" Share, % cost created_date campaign \n",
"330 24.0 1.858740 2021-01-07 RUS_MS1_purchase \n",
"339 24.0 1.858740 2021-01-07 RUS_MS1_purchase \n",
"827 24.0 7.314673 2021-01-01 BRA_MS1_install \n",
"837 24.0 7.314673 2021-01-01 BRA_MS1_install \n",
"1922 24.0 0.726463 2021-01-02 RUS_MS1_purchase \n",
"... ... ... ... ... \n",
"301124 0.0 2.620000 2021-01-29 RUS_MS1_install \n",
"301169 0.0 0.100000 2021-01-29 RUS_MS1_install \n",
"301243 0.0 0.310000 2021-01-29 RUS_MS1_install \n",
"301471 0.0 0.380000 2021-01-29 RUS_MS1_install \n",
"303332 24.0 1.754931 2021-01-07 RUS_MS1_purchase \n",
"\n",
"[711 rows x 8 columns]"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = payments.join(persents.set_index(\"Payment_types\"), on=\"Payment_types\")\n",
"df['cost'] = df['real_cost'] * (100 - df['Share, %']) / 100\n",
"df.join(registrations.set_index('account_id'), on=\"account_id\").dropna()\n",
"# registrations.set_index('account_id')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.0"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 5
}