broadfield-dev commited on
Commit
5bed3d1
·
verified ·
1 Parent(s): ccd3bcf

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +27 -27
parser.py CHANGED
@@ -13,29 +13,29 @@ def get_category_id(category):
13
 
14
  def create_vector(category, level, location, total_lines, parent_path):
15
  """
16
- Creates a 6D normalized vector:
17
- [Category, Depth, RelativeCenter, Density, ParentDepth, AncestryWeight]
18
  """
19
  cat_id = get_category_id(category)
20
  start, end = location
21
  total_lines = max(1, total_lines)
22
 
23
- # metrics
24
  span = (end - start + 1) / total_lines
25
  center = ((start + end) / 2) / total_lines
26
  parent_depth = len(parent_path)
27
 
28
- # Ancestry weight: Simple hash sum of parent IDs to represent unique path
29
  path_str = "".join(parent_path)
30
  parent_weight = (int(hashlib.md5(path_str.encode()).hexdigest(), 16) % 100) / 100.0
31
 
 
32
  return [
33
  cat_id,
34
  level,
35
- float(f"{center:.4f}"),
36
- float(f"{span:.4f}"),
37
  parent_depth,
38
- float(f"{parent_weight:.4f}")
39
  ]
40
 
41
  def parse_source_to_graph(code):
@@ -48,16 +48,13 @@ def parse_source_to_graph(code):
48
  total_lines = len(lines)
49
  nodes = []
50
 
51
- # Recursive visitor
52
  def traverse(node, parent_path=[], level=0, parent_id=None):
53
  category = 'other'
54
  name = getattr(node, 'name', None)
55
- # Unique Node ID based on position to ensure consistency
56
  node_id = f"{type(node).__name__}_{getattr(node, 'lineno', 0)}_{getattr(node, 'col_offset', 0)}"
57
 
58
  # Categorization logic
59
- if isinstance(node, (ast.Import, ast.ImportFrom)): category = 'import'; name = "import"
60
- elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): category = 'function'
61
  elif isinstance(node, ast.ClassDef): category = 'class'
62
  elif isinstance(node, ast.If): category = 'if'; name = "if"
63
  elif isinstance(node, (ast.For, ast.AsyncFor)): category = 'for'; name = "for"
@@ -66,17 +63,13 @@ def parse_source_to_graph(code):
66
  elif isinstance(node, (ast.Assign, ast.AnnAssign)): category = 'assigned_variable'; name = "assignment"
67
  elif isinstance(node, ast.Expr): category = 'expression'; name = "expr"
68
  elif isinstance(node, ast.Try): category = 'try'; name = "try"
69
- elif isinstance(node, ast.ExceptHandler): category = 'except'; name = "except"
70
 
71
  lineno = getattr(node, 'lineno', 0)
72
  end_lineno = getattr(node, 'end_lineno', lineno)
73
 
74
- if lineno == 0: return # Skip nodes without line numbers (e.g. Load context)
75
 
76
- # Create source snippet
77
- source_segment = "".join(lines[lineno-1:end_lineno])
78
-
79
- # Determine Label
80
  label = name if name else category
81
  if category == 'assigned_variable':
82
  targets = getattr(node, 'targets', []) or [getattr(node, 'target', None)]
@@ -85,18 +78,18 @@ def parse_source_to_graph(code):
85
 
86
  vector = create_vector(category, level, (lineno, end_lineno), total_lines, parent_path)
87
 
 
 
88
  node_data = {
89
  "id": node_id,
90
- "label": label,
91
  "type": category,
92
- "source": source_segment.strip(),
93
- "vector": vector,
94
- "level": level,
95
- "lineno": lineno,
96
- "parent_id": parent_id
97
  }
98
 
99
- # Filter: Only visualize structural elements (skip raw expressions unless useful)
100
  if category != 'other':
101
  nodes.append(node_data)
102
  current_path = parent_path + [node_id]
@@ -113,10 +106,17 @@ def parse_source_to_graph(code):
113
  for node in tree.body:
114
  traverse(node)
115
 
116
- # Sort by line number for linear visual flow
117
- nodes.sort(key=lambda x: x['lineno'])
118
 
119
- return {"nodes": nodes, "connections": generate_connections(nodes)}
 
 
 
 
 
 
 
 
120
 
121
  def generate_connections(nodes):
122
  connections = []
 
13
 
14
  def create_vector(category, level, location, total_lines, parent_path):
15
  """
16
+ Creates a 6D normalized vector with rounded values to reduce JSON size.
 
17
  """
18
  cat_id = get_category_id(category)
19
  start, end = location
20
  total_lines = max(1, total_lines)
21
 
22
+ # Calculate metrics
23
  span = (end - start + 1) / total_lines
24
  center = ((start + end) / 2) / total_lines
25
  parent_depth = len(parent_path)
26
 
27
+ # Ancestry weight
28
  path_str = "".join(parent_path)
29
  parent_weight = (int(hashlib.md5(path_str.encode()).hexdigest(), 16) % 100) / 100.0
30
 
31
+ # OPTIMIZATION: Round floats to 4 decimals
32
  return [
33
  cat_id,
34
  level,
35
+ round(center, 4),
36
+ round(span, 4),
37
  parent_depth,
38
+ round(parent_weight, 4)
39
  ]
40
 
41
  def parse_source_to_graph(code):
 
48
  total_lines = len(lines)
49
  nodes = []
50
 
 
51
  def traverse(node, parent_path=[], level=0, parent_id=None):
52
  category = 'other'
53
  name = getattr(node, 'name', None)
 
54
  node_id = f"{type(node).__name__}_{getattr(node, 'lineno', 0)}_{getattr(node, 'col_offset', 0)}"
55
 
56
  # Categorization logic
57
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): category = 'function'
 
58
  elif isinstance(node, ast.ClassDef): category = 'class'
59
  elif isinstance(node, ast.If): category = 'if'; name = "if"
60
  elif isinstance(node, (ast.For, ast.AsyncFor)): category = 'for'; name = "for"
 
63
  elif isinstance(node, (ast.Assign, ast.AnnAssign)): category = 'assigned_variable'; name = "assignment"
64
  elif isinstance(node, ast.Expr): category = 'expression'; name = "expr"
65
  elif isinstance(node, ast.Try): category = 'try'; name = "try"
66
+ elif isinstance(node, (ast.Import, ast.ImportFrom)): category = 'import'; name = "import"
67
 
68
  lineno = getattr(node, 'lineno', 0)
69
  end_lineno = getattr(node, 'end_lineno', lineno)
70
 
71
+ if lineno == 0: return
72
 
 
 
 
 
73
  label = name if name else category
74
  if category == 'assigned_variable':
75
  targets = getattr(node, 'targets', []) or [getattr(node, 'target', None)]
 
78
 
79
  vector = create_vector(category, level, (lineno, end_lineno), total_lines, parent_path)
80
 
81
+ # OPTIMIZATION: Send 'loc' (location) instead of 'source' string.
82
+ # Shorten keys to reduce payload size.
83
  node_data = {
84
  "id": node_id,
85
+ "lbl": label, # label -> lbl
86
  "type": category,
87
+ "loc": [lineno, end_lineno], # Start/End lines only
88
+ "vec": vector, # vector -> vec
89
+ "lvl": level, # level -> lvl
90
+ "pid": parent_id # parent_id -> pid
 
91
  }
92
 
 
93
  if category != 'other':
94
  nodes.append(node_data)
95
  current_path = parent_path + [node_id]
 
106
  for node in tree.body:
107
  traverse(node)
108
 
109
+ nodes.sort(key=lambda x: x['loc'][0])
 
110
 
111
+ # Update connections to use shorter keys
112
+ connections = []
113
+ node_ids = {n['id'] for n in nodes}
114
+ for node in nodes:
115
+ if node['pid'] and node['pid'] in node_ids:
116
+ connections.append({"f": node['pid'], "t": node['id']}) # from/to -> f/t
117
+
118
+ return {"nodes": nodes, "connections": connections}
119
+
120
 
121
  def generate_connections(nodes):
122
  connections = []