Verified Commit 006e2ea2 authored by 施乐存's avatar 施乐存
Browse files

添加UP主视频数量分布分区多样性分布图



Signed-off-by: 施乐存's avatarszdytom <szdytom@qq.com>
parent e9da1fd9
Loading
Loading
Loading
Loading
+92.3 KiB
Loading image diff...
+91 −0
Original line number Diff line number Diff line
(* ::Package:: *)

(* \:4ece\:6570\:636e\:5e93\:83b7\:53d6UP\:4e3b\:5206\:533a\:591a\:6837\:6027\:6570\:636e *)
data = Normal[ExternalEvaluate[
  DatabaseReference[
   File[FileNameJoin[{NotebookDirectory[], "..", "data.db"}]]],
   "WITH up_stats AS (
    SELECT 
        author_id,
        COUNT(*) as video_count,
        COUNT(DISTINCT tidv2) as distinct_tidv2_count
    FROM videos
    WHERE tidv2 IS NOT NULL
    GROUP BY author_id
    HAVING COUNT(*) >= 10
)
SELECT 
    distinct_tidv2_count as x,
    COUNT(author_id) as y
FROM up_stats
GROUP BY distinct_tidv2_count
ORDER BY distinct_tidv2_count;"]];

(* \:8f6c\:6362\:4e3a\:5750\:6807\:70b9\:5217\:8868 *)
points = {#["x"], #["y"]} & /@ data;

(* \:521b\:5efa\:67f1\:72b6\:56fe *)
plot = BarChart[points[[All, 2]],
  ChartStyle -> RGBColor[0.368, 0.507, 0.71],
  Frame -> True,
  FrameLabel -> {"\:6d89\:730e\:7684\:5206\:533a\:6570\:91cf", "UP\:4e3b\:6570\:91cf"},
  FrameTicks -> {All, All},
  FrameStyle -> Directive[Black, 14],
  GridLines -> Automatic,
  GridLinesStyle -> Directive[Gray, Dotted],
  ImageSize -> 800,
  PlotLabel -> 
   Style["\:4e0a\:699c\:89c6\:9891\:6570\[GreaterEqual]10\:7684UP\:4e3b\:5206\:533a\:591a\:6837\:6027\:5206\:5e03", 16, Bold],
  LabelStyle -> Directive[Black, 12],
  Epilog -> {
    (* \:5728\:6bcf\:4e2a\:67f1\:5b50\:4e0a\:65b9\:663e\:793a\:6570\:91cf *)
    MapIndexed[
     Text[Style[#1, 10, Black], {First[#2], #1 + 
         Max[points[[All, 2]]]*0.01}] &, points[[All, 2]]],
    (* \:6dfb\:52a0\:7edf\:8ba1\:4fe1\:606f *)
    Text[
     Style[
      Row[{"\:603bUP\:4e3b\:6570: ", Total[points[[All, 2]]], 
        "  \:5e73\:5747\:6d89\:730e\:5206\:533a\:6570: ", 
        NumberForm[
         N[Total[points[[All, 1]]*points[[All, 2]]]/
          Total[points[[All, 2]]]], 3]}], 12, Black], 
     Scaled[{0.5, 0.95}]]
  }
];

(* \:663e\:793a\:56fe\:5f62 *)
plot

(* \:5bfc\:51fa\:4e3aPNG\:6587\:4ef6 *)
Export[FileNameJoin[{NotebookDirectory[], "up_diversity_distribution.png"}], plot]

(* \:67e5\:770b\:6d89\:730e\:5206\:533a\:6700\:591a\:7684UP\:4e3b *)
topDiverseUPs = Normal[ExternalEvaluate[
  DatabaseReference[
   File[FileNameJoin[{NotebookDirectory[], "..", "data.db"}]]],
   "WITH up_stats AS (
    SELECT 
        a.name as up_name,
        COUNT(v.id) as video_count,
        COUNT(DISTINCT v.tidv2) as distinct_tidv2_count
    FROM videos v
    JOIN authors a ON v.author_id = a.id
    WHERE v.tidv2 IS NOT NULL
    GROUP BY v.author_id
    HAVING COUNT(v.id) >= 10
)
SELECT 
    up_name,
    video_count,
    distinct_tidv2_count
FROM up_stats
ORDER BY distinct_tidv2_count DESC, video_count DESC
LIMIT 10;"]];

Print["\:6d89\:730e\:5206\:533a\:6700\:591a\:7684\:524d10\:540dUP\:4e3b\:ff1a"];
TableForm[{#["up_name"], #["video_count"],#["distinct_tidv2_count"]} & /@ topDiverseUPs, 
 TableHeadings -> {None, {"UP\:4e3b\:540d\:79f0", "\:4e0a\:699c\:89c6\:9891\:6570", "\:6d89\:730e\:5206\:533a\:6570"}}]