YouTip LogoYouTip

Pandas Ecommerce

This section uses a complete e-commerce data analysis case to comprehensively apply various functions of Pandas for data analysis. * * * ## Case Overview Analyze order data from an e-commerce platform, including sales trends, product performance, and customer analysis dimensions. ### Data Preparation ## Example import pandas as pd import numpy as np # Simulate e-commerce order data np.random.seed(42) n_orders =1000 orders = pd.DataFrame({ "Order ID": range(1, n_orders + 1), "Customer ID": np.random.randint(100,200, n_orders), "Product ID": np.random.randint(1,20, n_orders), "Order Date": pd.date_range("2024-01-01", periods=n_orders, freq="30min"), "Quantity": np.random.randint(1,5, n_orders), "Unit Price": np.random.uniform(10,500, n_orders).round(2) }) # Calculate order amount orders=(orders * orders).round(2) print("Order data overview:") print(orders.head(10)) print(f"n Data volume: {len(orders)} records") ### Data Preprocessing ## Example # Extract date features orders= orders.dt.date orders= orders.dt.hour orders= orders.dt.day_name() orders= orders.dt.month print("After adding time features:") print(orders.head()) print() # Check for missing values print("Missing value check:") print(orders.isnull().sum()) * * * ## Sales Analysis ### Overall Sales Situation ## Example # Overall sales metrics print("=== Overall Sales Situation ===n") print(f"Total Orders: {len(orders):,}") print(f"Total Sales: Β₯{orders['Order Amount'].sum():,.2f}") print(f"Average Order Value: Β₯{orders['Order Amount'].mean():,.2f}") print(f"Median Order Value: Β₯{orders['Order Amount'].median():,.2f}") print() # Monthly statistics monthly = orders.groupby("Month").agg({ "Order ID": "count", "Order Amount": "sum", "Customer ID": "nunique" }).rename(columns={ "Order ID": "Order Count", "Order Amount": "Sales", "Customer ID": "Customer Count" }) print("Monthly Sales Trend:") print(monthly) ### Product Analysis ## Example # Product sales ranking product_sales = orders.groupby("Product ID").agg({ "Order ID": "count", "Quantity": "sum", "Order Amount": "sum" }).rename(columns={ "Order ID": "Order Count", "Quantity": "Sales Volume", "Order Amount": "Sales" }).sort_values("Sales", ascending=False) print("=== Top 10 Product Sales Ranking ===n") print(product_sales.head(10)) print() # Best-selling products print(f"Best-selling product: Product {product_sales.index}") print(f"Sales: Β₯{product_sales.iloc['Sales']:,.2f}") ### Customer Analysis ## Example # Customer spending analysis customer_sales = orders.groupby("Customer ID").agg({ "Order ID": "count", "Order Amount": "sum" }).rename(columns={ "Order ID": "Order Count", "Order Amount": "Total Spending" }) print("=== Customer Analysis ===n") print(f"Active Customers: {len(customer_sales)}") print(f"Average Orders per Customer: {customer_sales['Order Count'].mean():.1f}") print(f"Average Customer Spending: Β₯{customer_sales['Total Spending'].mean():,.2f}") print() # Customer segmentation customer_sales= pd.cut( customer_sales, bins=[0,1000,5000,10000,float("inf")], labels=["Regular","Silver Card","Gold Card","VIP"] ) print("Customer Segmentation Statistics:") print(customer_sales.value_counts()) ### Time Analysis ## Example # Hourly analysis hourly = orders.groupby("Hour").sum() print("=== Hourly Sales Analysis ===n") print(f"Peak Sales Hour: {hourly.idxmax()} o'clock") print(f"Sales during peak hour: Β₯{hourly.max():,.2f}") print() # Weekly analysis weekday = orders.groupby("Weekday").sum().reindex([ "Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday" ]) print("Weekly Sales:") for day, amount in weekday.items(): print(f"{day}: Β₯{amount:,.2f}") * * * ## Analysis Summary ## Example print(""" === E-commerce Data Analysis Summary === 1. Sales Overview - Total Orders: {0} - Total Sales: Β₯{1:,.2f} - Average Order Value: Β₯{2:.2f} 2. Product Performance - Best-selling Product: Product {3} - Sales distribution is uneven among products, with top products contributing most revenue 3. Customer Insights - Active Customers: {4} people - Suggestion: Focus on maintaining high-value customers 4. Time Patterns - Peak Sales Around: {5} o'clock - Marketing strategies can be adjusted based on peak hours 5. Optimization Suggestions - 1) Increase inventory and promotion for best-selling products - 2) Provide personalized services to high-value customers - 3) Increase customer service staffing during peak hours """ .format( len(orders), orders.sum(), orders.mean(), product_sales.index, len(customer_sales), hourly.idxmax() ))
← Pandas Pd IsnaPandas Numpy β†’